diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1147 @@ +[2023-02-28 10:38:56,598][00359] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-28 10:38:56,602][00359] Rollout worker 0 uses device cpu +[2023-02-28 10:38:56,604][00359] Rollout worker 1 uses device cpu +[2023-02-28 10:38:56,605][00359] Rollout worker 2 uses device cpu +[2023-02-28 10:38:56,606][00359] Rollout worker 3 uses device cpu +[2023-02-28 10:38:56,608][00359] Rollout worker 4 uses device cpu +[2023-02-28 10:38:56,609][00359] Rollout worker 5 uses device cpu +[2023-02-28 10:38:56,610][00359] Rollout worker 6 uses device cpu +[2023-02-28 10:38:56,611][00359] Rollout worker 7 uses device cpu +[2023-02-28 10:38:57,119][00359] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-28 10:38:57,121][00359] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-28 10:38:57,151][00359] Starting all processes... +[2023-02-28 10:38:57,153][00359] Starting process learner_proc0 +[2023-02-28 10:38:57,223][00359] Starting all processes... +[2023-02-28 10:38:57,278][00359] Starting process inference_proc0-0 +[2023-02-28 10:38:57,279][00359] Starting process rollout_proc0 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc2 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc3 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc4 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc5 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc6 +[2023-02-28 10:38:57,294][00359] Starting process rollout_proc7 +[2023-02-28 10:38:57,290][00359] Starting process rollout_proc1 +[2023-02-28 10:39:07,043][13073] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-28 10:39:07,044][13073] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-28 10:39:07,184][13090] Worker 3 uses CPU cores [1] +[2023-02-28 10:39:07,198][13089] Worker 2 uses CPU cores [0] +[2023-02-28 10:39:07,223][13094] Worker 7 uses CPU cores [1] +[2023-02-28 10:39:07,327][13092] Worker 6 uses CPU cores [0] +[2023-02-28 10:39:07,558][13088] Worker 0 uses CPU cores [0] +[2023-02-28 10:39:07,678][13095] Worker 1 uses CPU cores [1] +[2023-02-28 10:39:07,708][13091] Worker 4 uses CPU cores [0] +[2023-02-28 10:39:07,738][13093] Worker 5 uses CPU cores [1] +[2023-02-28 10:39:07,974][13087] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-28 10:39:07,974][13087] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-28 10:39:08,194][13073] Num visible devices: 1 +[2023-02-28 10:39:08,195][13087] Num visible devices: 1 +[2023-02-28 10:39:08,209][13073] Starting seed is not provided +[2023-02-28 10:39:08,209][13073] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-28 10:39:08,209][13073] Initializing actor-critic model on device cuda:0 +[2023-02-28 10:39:08,210][13073] RunningMeanStd input shape: (3, 72, 128) +[2023-02-28 10:39:08,213][13073] RunningMeanStd input shape: (1,) +[2023-02-28 10:39:08,232][13073] ConvEncoder: input_channels=3 +[2023-02-28 10:39:08,573][13073] Conv encoder output size: 512 +[2023-02-28 10:39:08,574][13073] Policy head output size: 512 +[2023-02-28 10:39:08,636][13073] Created Actor Critic model with architecture: +[2023-02-28 10:39:08,637][13073] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-28 10:39:16,030][13073] Using optimizer +[2023-02-28 10:39:16,032][13073] No checkpoints found +[2023-02-28 10:39:16,032][13073] Did not load from checkpoint, starting from scratch! +[2023-02-28 10:39:16,033][13073] Initialized policy 0 weights for model version 0 +[2023-02-28 10:39:16,037][13073] LearnerWorker_p0 finished initialization! +[2023-02-28 10:39:16,040][13073] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-28 10:39:16,236][13087] RunningMeanStd input shape: (3, 72, 128) +[2023-02-28 10:39:16,237][13087] RunningMeanStd input shape: (1,) +[2023-02-28 10:39:16,253][13087] ConvEncoder: input_channels=3 +[2023-02-28 10:39:16,350][13087] Conv encoder output size: 512 +[2023-02-28 10:39:16,351][13087] Policy head output size: 512 +[2023-02-28 10:39:16,817][00359] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-28 10:39:17,108][00359] Heartbeat connected on Batcher_0 +[2023-02-28 10:39:17,117][00359] Heartbeat connected on LearnerWorker_p0 +[2023-02-28 10:39:17,130][00359] Heartbeat connected on RolloutWorker_w0 +[2023-02-28 10:39:17,136][00359] Heartbeat connected on RolloutWorker_w1 +[2023-02-28 10:39:17,142][00359] Heartbeat connected on RolloutWorker_w2 +[2023-02-28 10:39:17,147][00359] Heartbeat connected on RolloutWorker_w4 +[2023-02-28 10:39:17,148][00359] Heartbeat connected on RolloutWorker_w3 +[2023-02-28 10:39:17,154][00359] Heartbeat connected on RolloutWorker_w5 +[2023-02-28 10:39:17,159][00359] Heartbeat connected on RolloutWorker_w6 +[2023-02-28 10:39:17,162][00359] Heartbeat connected on RolloutWorker_w7 +[2023-02-28 10:39:18,654][00359] Inference worker 0-0 is ready! +[2023-02-28 10:39:18,655][00359] All inference workers are ready! Signal rollout workers to start! +[2023-02-28 10:39:18,662][00359] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-28 10:39:18,794][13089] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,801][13092] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,813][13088] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,820][13094] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,835][13091] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,833][13090] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,832][13095] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:18,834][13093] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 10:39:19,692][13095] Decorrelating experience for 0 frames... +[2023-02-28 10:39:19,694][13093] Decorrelating experience for 0 frames... +[2023-02-28 10:39:20,375][13089] Decorrelating experience for 0 frames... +[2023-02-28 10:39:20,367][13092] Decorrelating experience for 0 frames... +[2023-02-28 10:39:20,379][13088] Decorrelating experience for 0 frames... +[2023-02-28 10:39:20,396][13091] Decorrelating experience for 0 frames... +[2023-02-28 10:39:21,335][13093] Decorrelating experience for 32 frames... +[2023-02-28 10:39:21,339][13095] Decorrelating experience for 32 frames... +[2023-02-28 10:39:21,576][13092] Decorrelating experience for 32 frames... +[2023-02-28 10:39:21,599][13091] Decorrelating experience for 32 frames... +[2023-02-28 10:39:21,817][00359] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-28 10:39:22,060][13090] Decorrelating experience for 0 frames... +[2023-02-28 10:39:22,061][13094] Decorrelating experience for 0 frames... +[2023-02-28 10:39:22,866][13093] Decorrelating experience for 64 frames... +[2023-02-28 10:39:23,364][13089] Decorrelating experience for 32 frames... +[2023-02-28 10:39:23,391][13090] Decorrelating experience for 32 frames... +[2023-02-28 10:39:23,628][13092] Decorrelating experience for 64 frames... +[2023-02-28 10:39:23,673][13091] Decorrelating experience for 64 frames... +[2023-02-28 10:39:24,782][13088] Decorrelating experience for 32 frames... +[2023-02-28 10:39:24,926][13094] Decorrelating experience for 32 frames... +[2023-02-28 10:39:25,017][13089] Decorrelating experience for 64 frames... +[2023-02-28 10:39:25,034][13093] Decorrelating experience for 96 frames... +[2023-02-28 10:39:25,557][13095] Decorrelating experience for 64 frames... +[2023-02-28 10:39:26,248][13090] Decorrelating experience for 64 frames... +[2023-02-28 10:39:26,482][13091] Decorrelating experience for 96 frames... +[2023-02-28 10:39:26,717][13092] Decorrelating experience for 96 frames... +[2023-02-28 10:39:26,754][13088] Decorrelating experience for 64 frames... +[2023-02-28 10:39:26,819][00359] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-28 10:39:26,839][13089] Decorrelating experience for 96 frames... +[2023-02-28 10:39:27,669][13088] Decorrelating experience for 96 frames... +[2023-02-28 10:39:27,821][13094] Decorrelating experience for 64 frames... +[2023-02-28 10:39:28,113][13090] Decorrelating experience for 96 frames... +[2023-02-28 10:39:28,551][13095] Decorrelating experience for 96 frames... +[2023-02-28 10:39:28,553][13094] Decorrelating experience for 96 frames... +[2023-02-28 10:39:31,817][00359] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 46.5. Samples: 698. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-28 10:39:31,820][00359] Avg episode reward: [(0, '1.572')] +[2023-02-28 10:39:32,410][13073] Signal inference workers to stop experience collection... +[2023-02-28 10:39:32,423][13087] InferenceWorker_p0-w0: stopping experience collection +[2023-02-28 10:39:35,065][13073] Signal inference workers to resume experience collection... +[2023-02-28 10:39:35,066][13087] InferenceWorker_p0-w0: resuming experience collection +[2023-02-28 10:39:36,817][00359] Fps is (10 sec: 1229.1, 60 sec: 614.4, 300 sec: 614.4). Total num frames: 12288. Throughput: 0: 159.8. Samples: 3196. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-28 10:39:36,819][00359] Avg episode reward: [(0, '2.639')] +[2023-02-28 10:39:41,819][00359] Fps is (10 sec: 2457.1, 60 sec: 983.0, 300 sec: 983.0). Total num frames: 24576. Throughput: 0: 227.0. Samples: 5676. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-28 10:39:41,825][00359] Avg episode reward: [(0, '3.622')] +[2023-02-28 10:39:45,997][13087] Updated weights for policy 0, policy_version 10 (0.0013) +[2023-02-28 10:39:46,817][00359] Fps is (10 sec: 2867.2, 60 sec: 1365.3, 300 sec: 1365.3). Total num frames: 40960. Throughput: 0: 338.2. Samples: 10146. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:39:46,823][00359] Avg episode reward: [(0, '4.050')] +[2023-02-28 10:39:51,817][00359] Fps is (10 sec: 3687.1, 60 sec: 1755.4, 300 sec: 1755.4). Total num frames: 61440. Throughput: 0: 468.3. Samples: 16392. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2023-02-28 10:39:51,822][00359] Avg episode reward: [(0, '4.426')] +[2023-02-28 10:39:55,458][13087] Updated weights for policy 0, policy_version 20 (0.0014) +[2023-02-28 10:39:56,817][00359] Fps is (10 sec: 4505.7, 60 sec: 2150.4, 300 sec: 2150.4). Total num frames: 86016. Throughput: 0: 498.6. Samples: 19942. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:39:56,824][00359] Avg episode reward: [(0, '4.439')] +[2023-02-28 10:40:01,817][00359] Fps is (10 sec: 4095.9, 60 sec: 2275.5, 300 sec: 2275.5). Total num frames: 102400. Throughput: 0: 571.3. Samples: 25710. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:40:01,820][00359] Avg episode reward: [(0, '4.404')] +[2023-02-28 10:40:01,830][13073] Saving new best policy, reward=4.404! +[2023-02-28 10:40:06,817][00359] Fps is (10 sec: 3276.8, 60 sec: 2375.7, 300 sec: 2375.7). Total num frames: 118784. Throughput: 0: 668.9. Samples: 30102. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:40:06,824][00359] Avg episode reward: [(0, '4.386')] +[2023-02-28 10:40:07,787][13087] Updated weights for policy 0, policy_version 30 (0.0022) +[2023-02-28 10:40:11,817][00359] Fps is (10 sec: 3686.5, 60 sec: 2532.1, 300 sec: 2532.1). Total num frames: 139264. Throughput: 0: 738.4. Samples: 33224. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:40:11,824][00359] Avg episode reward: [(0, '4.416')] +[2023-02-28 10:40:11,836][13073] Saving new best policy, reward=4.416! +[2023-02-28 10:40:16,655][13087] Updated weights for policy 0, policy_version 40 (0.0011) +[2023-02-28 10:40:16,817][00359] Fps is (10 sec: 4505.6, 60 sec: 2730.7, 300 sec: 2730.7). Total num frames: 163840. Throughput: 0: 879.9. Samples: 40294. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:40:16,825][00359] Avg episode reward: [(0, '4.475')] +[2023-02-28 10:40:16,828][13073] Saving new best policy, reward=4.475! +[2023-02-28 10:40:21,818][00359] Fps is (10 sec: 4095.6, 60 sec: 3003.7, 300 sec: 2772.6). Total num frames: 180224. Throughput: 0: 937.8. Samples: 45400. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:40:21,824][00359] Avg episode reward: [(0, '4.487')] +[2023-02-28 10:40:21,832][13073] Saving new best policy, reward=4.487! +[2023-02-28 10:40:26,817][00359] Fps is (10 sec: 2457.6, 60 sec: 3140.4, 300 sec: 2691.7). Total num frames: 188416. Throughput: 0: 921.1. Samples: 47122. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:40:26,821][00359] Avg episode reward: [(0, '4.352')] +[2023-02-28 10:40:31,817][00359] Fps is (10 sec: 2048.2, 60 sec: 3345.1, 300 sec: 2676.1). Total num frames: 200704. Throughput: 0: 902.3. Samples: 50750. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:40:31,819][00359] Avg episode reward: [(0, '4.326')] +[2023-02-28 10:40:31,871][13087] Updated weights for policy 0, policy_version 50 (0.0039) +[2023-02-28 10:40:36,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 2764.8). Total num frames: 221184. Throughput: 0: 886.4. Samples: 56280. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:40:36,825][00359] Avg episode reward: [(0, '4.250')] +[2023-02-28 10:40:41,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 2843.1). Total num frames: 241664. Throughput: 0: 887.4. Samples: 59876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:40:41,827][00359] Avg episode reward: [(0, '4.448')] +[2023-02-28 10:40:41,894][13087] Updated weights for policy 0, policy_version 60 (0.0023) +[2023-02-28 10:40:46,821][00359] Fps is (10 sec: 3684.8, 60 sec: 3617.9, 300 sec: 2867.1). Total num frames: 258048. Throughput: 0: 869.4. Samples: 64838. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:40:46,825][00359] Avg episode reward: [(0, '4.578')] +[2023-02-28 10:40:46,833][13073] Saving new best policy, reward=4.578! +[2023-02-28 10:40:51,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 2888.8). Total num frames: 274432. Throughput: 0: 880.1. Samples: 69708. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:40:51,820][00359] Avg episode reward: [(0, '4.526')] +[2023-02-28 10:40:51,826][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000067_274432.pth... +[2023-02-28 10:40:53,881][13087] Updated weights for policy 0, policy_version 70 (0.0036) +[2023-02-28 10:40:56,817][00359] Fps is (10 sec: 4097.8, 60 sec: 3549.9, 300 sec: 2990.1). Total num frames: 299008. Throughput: 0: 888.1. Samples: 73190. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:40:56,819][00359] Avg episode reward: [(0, '4.545')] +[2023-02-28 10:41:01,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3042.7). Total num frames: 319488. Throughput: 0: 886.8. Samples: 80198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:41:01,819][00359] Avg episode reward: [(0, '4.585')] +[2023-02-28 10:41:01,836][13073] Saving new best policy, reward=4.585! +[2023-02-28 10:41:03,737][13087] Updated weights for policy 0, policy_version 80 (0.0026) +[2023-02-28 10:41:06,823][00359] Fps is (10 sec: 3684.3, 60 sec: 3617.8, 300 sec: 3053.2). Total num frames: 335872. Throughput: 0: 869.4. Samples: 84528. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:41:06,828][00359] Avg episode reward: [(0, '4.559')] +[2023-02-28 10:41:11,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3063.1). Total num frames: 352256. Throughput: 0: 882.3. Samples: 86826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:41:11,819][00359] Avg episode reward: [(0, '4.508')] +[2023-02-28 10:41:14,981][13087] Updated weights for policy 0, policy_version 90 (0.0019) +[2023-02-28 10:41:16,817][00359] Fps is (10 sec: 4098.3, 60 sec: 3549.9, 300 sec: 3140.3). Total num frames: 376832. Throughput: 0: 951.7. Samples: 93578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:41:16,819][00359] Avg episode reward: [(0, '4.420')] +[2023-02-28 10:41:21,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3178.5). Total num frames: 397312. Throughput: 0: 978.6. Samples: 100318. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:41:21,822][00359] Avg episode reward: [(0, '4.425')] +[2023-02-28 10:41:25,007][13087] Updated weights for policy 0, policy_version 100 (0.0020) +[2023-02-28 10:41:26,818][00359] Fps is (10 sec: 3686.0, 60 sec: 3754.6, 300 sec: 3182.3). Total num frames: 413696. Throughput: 0: 948.7. Samples: 102568. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:41:26,822][00359] Avg episode reward: [(0, '4.488')] +[2023-02-28 10:41:31,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3185.8). Total num frames: 430080. Throughput: 0: 940.0. Samples: 107132. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:41:31,825][00359] Avg episode reward: [(0, '4.673')] +[2023-02-28 10:41:31,833][13073] Saving new best policy, reward=4.673! +[2023-02-28 10:41:36,011][13087] Updated weights for policy 0, policy_version 110 (0.0014) +[2023-02-28 10:41:36,817][00359] Fps is (10 sec: 3686.8, 60 sec: 3822.9, 300 sec: 3218.3). Total num frames: 450560. Throughput: 0: 985.6. Samples: 114062. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:41:36,822][00359] Avg episode reward: [(0, '4.444')] +[2023-02-28 10:41:41,827][00359] Fps is (10 sec: 4501.1, 60 sec: 3890.6, 300 sec: 3276.6). Total num frames: 475136. Throughput: 0: 986.7. Samples: 117602. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:41:41,833][00359] Avg episode reward: [(0, '4.584')] +[2023-02-28 10:41:46,729][13087] Updated weights for policy 0, policy_version 120 (0.0012) +[2023-02-28 10:41:46,819][00359] Fps is (10 sec: 4095.2, 60 sec: 3891.4, 300 sec: 3276.8). Total num frames: 491520. Throughput: 0: 942.4. Samples: 122608. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:41:46,824][00359] Avg episode reward: [(0, '4.586')] +[2023-02-28 10:41:51,817][00359] Fps is (10 sec: 3280.1, 60 sec: 3891.2, 300 sec: 3276.8). Total num frames: 507904. Throughput: 0: 956.1. Samples: 127546. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:41:51,822][00359] Avg episode reward: [(0, '4.790')] +[2023-02-28 10:41:51,833][13073] Saving new best policy, reward=4.790! +[2023-02-28 10:41:56,817][00359] Fps is (10 sec: 3687.1, 60 sec: 3822.9, 300 sec: 3302.4). Total num frames: 528384. Throughput: 0: 982.4. Samples: 131032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:41:56,819][00359] Avg episode reward: [(0, '4.996')] +[2023-02-28 10:41:56,831][13073] Saving new best policy, reward=4.996! +[2023-02-28 10:41:57,127][13087] Updated weights for policy 0, policy_version 130 (0.0016) +[2023-02-28 10:42:01,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3351.3). Total num frames: 552960. Throughput: 0: 986.9. Samples: 137990. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:42:01,820][00359] Avg episode reward: [(0, '4.796')] +[2023-02-28 10:42:06,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3823.3, 300 sec: 3325.0). Total num frames: 565248. Throughput: 0: 937.0. Samples: 142484. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:42:06,824][00359] Avg episode reward: [(0, '4.789')] +[2023-02-28 10:42:08,593][13087] Updated weights for policy 0, policy_version 140 (0.0016) +[2023-02-28 10:42:11,817][00359] Fps is (10 sec: 2867.2, 60 sec: 3822.9, 300 sec: 3323.6). Total num frames: 581632. Throughput: 0: 937.6. Samples: 144758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:42:11,819][00359] Avg episode reward: [(0, '4.393')] +[2023-02-28 10:42:16,818][00359] Fps is (10 sec: 4095.5, 60 sec: 3822.9, 300 sec: 3367.8). Total num frames: 606208. Throughput: 0: 980.3. Samples: 151248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:42:16,821][00359] Avg episode reward: [(0, '4.359')] +[2023-02-28 10:42:18,079][13087] Updated weights for policy 0, policy_version 150 (0.0011) +[2023-02-28 10:42:21,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3387.5). Total num frames: 626688. Throughput: 0: 975.8. Samples: 157972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:42:21,821][00359] Avg episode reward: [(0, '4.448')] +[2023-02-28 10:42:26,817][00359] Fps is (10 sec: 3686.9, 60 sec: 3823.0, 300 sec: 3384.6). Total num frames: 643072. Throughput: 0: 945.4. Samples: 160136. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:42:26,821][00359] Avg episode reward: [(0, '4.562')] +[2023-02-28 10:42:30,389][13087] Updated weights for policy 0, policy_version 160 (0.0015) +[2023-02-28 10:42:31,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3381.8). Total num frames: 659456. Throughput: 0: 933.1. Samples: 164596. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:42:31,825][00359] Avg episode reward: [(0, '4.518')] +[2023-02-28 10:42:36,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3420.2). Total num frames: 684032. Throughput: 0: 976.6. Samples: 171494. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:42:36,823][00359] Avg episode reward: [(0, '4.506')] +[2023-02-28 10:42:39,261][13087] Updated weights for policy 0, policy_version 170 (0.0033) +[2023-02-28 10:42:41,817][00359] Fps is (10 sec: 4505.4, 60 sec: 3823.5, 300 sec: 3436.6). Total num frames: 704512. Throughput: 0: 979.0. Samples: 175088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:42:41,821][00359] Avg episode reward: [(0, '4.761')] +[2023-02-28 10:42:46,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3823.1, 300 sec: 3432.8). Total num frames: 720896. Throughput: 0: 937.0. Samples: 180156. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:42:46,824][00359] Avg episode reward: [(0, '4.695')] +[2023-02-28 10:42:51,435][13087] Updated weights for policy 0, policy_version 180 (0.0037) +[2023-02-28 10:42:51,817][00359] Fps is (10 sec: 3277.0, 60 sec: 3822.9, 300 sec: 3429.2). Total num frames: 737280. Throughput: 0: 946.8. Samples: 185090. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:42:51,826][00359] Avg episode reward: [(0, '4.677')] +[2023-02-28 10:42:51,836][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth... +[2023-02-28 10:42:56,817][00359] Fps is (10 sec: 4095.9, 60 sec: 3891.2, 300 sec: 3463.0). Total num frames: 761856. Throughput: 0: 974.7. Samples: 188618. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:42:56,819][00359] Avg episode reward: [(0, '4.834')] +[2023-02-28 10:43:00,222][13087] Updated weights for policy 0, policy_version 190 (0.0015) +[2023-02-28 10:43:01,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3477.0). Total num frames: 782336. Throughput: 0: 990.1. Samples: 195800. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:43:01,824][00359] Avg episode reward: [(0, '4.919')] +[2023-02-28 10:43:06,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3472.7). Total num frames: 798720. Throughput: 0: 941.4. Samples: 200334. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-28 10:43:06,820][00359] Avg episode reward: [(0, '5.064')] +[2023-02-28 10:43:06,822][13073] Saving new best policy, reward=5.064! +[2023-02-28 10:43:11,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3468.5). Total num frames: 815104. Throughput: 0: 939.3. Samples: 202404. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:43:11,823][00359] Avg episode reward: [(0, '4.995')] +[2023-02-28 10:43:12,529][13087] Updated weights for policy 0, policy_version 200 (0.0030) +[2023-02-28 10:43:16,817][00359] Fps is (10 sec: 3686.5, 60 sec: 3823.0, 300 sec: 3481.6). Total num frames: 835584. Throughput: 0: 986.6. Samples: 208994. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:43:16,824][00359] Avg episode reward: [(0, '4.937')] +[2023-02-28 10:43:21,140][13087] Updated weights for policy 0, policy_version 210 (0.0013) +[2023-02-28 10:43:21,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3510.9). Total num frames: 860160. Throughput: 0: 986.2. Samples: 215874. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:43:21,826][00359] Avg episode reward: [(0, '5.259')] +[2023-02-28 10:43:21,842][13073] Saving new best policy, reward=5.259! +[2023-02-28 10:43:26,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3489.8). Total num frames: 872448. Throughput: 0: 953.6. Samples: 218000. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:43:26,820][00359] Avg episode reward: [(0, '5.502')] +[2023-02-28 10:43:26,931][13073] Saving new best policy, reward=5.502! +[2023-02-28 10:43:31,817][00359] Fps is (10 sec: 2867.2, 60 sec: 3822.9, 300 sec: 3485.6). Total num frames: 888832. Throughput: 0: 940.4. Samples: 222476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:43:31,819][00359] Avg episode reward: [(0, '5.588')] +[2023-02-28 10:43:31,844][13073] Saving new best policy, reward=5.588! +[2023-02-28 10:43:33,776][13087] Updated weights for policy 0, policy_version 220 (0.0015) +[2023-02-28 10:43:36,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3513.1). Total num frames: 913408. Throughput: 0: 984.5. Samples: 229394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:43:36,823][00359] Avg episode reward: [(0, '5.733')] +[2023-02-28 10:43:36,827][13073] Saving new best policy, reward=5.733! +[2023-02-28 10:43:41,823][00359] Fps is (10 sec: 4502.7, 60 sec: 3822.6, 300 sec: 3524.0). Total num frames: 933888. Throughput: 0: 983.6. Samples: 232884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:43:41,829][00359] Avg episode reward: [(0, '5.948')] +[2023-02-28 10:43:41,845][13073] Saving new best policy, reward=5.948! +[2023-02-28 10:43:43,559][13087] Updated weights for policy 0, policy_version 230 (0.0013) +[2023-02-28 10:43:46,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3519.5). Total num frames: 950272. Throughput: 0: 931.2. Samples: 237702. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:43:46,820][00359] Avg episode reward: [(0, '5.580')] +[2023-02-28 10:43:51,817][00359] Fps is (10 sec: 3278.9, 60 sec: 3822.9, 300 sec: 3515.1). Total num frames: 966656. Throughput: 0: 941.6. Samples: 242704. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:43:51,820][00359] Avg episode reward: [(0, '5.659')] +[2023-02-28 10:43:54,676][13087] Updated weights for policy 0, policy_version 240 (0.0013) +[2023-02-28 10:43:56,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3540.1). Total num frames: 991232. Throughput: 0: 973.6. Samples: 246216. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:43:56,819][00359] Avg episode reward: [(0, '5.948')] +[2023-02-28 10:44:01,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3549.9). Total num frames: 1011712. Throughput: 0: 985.7. Samples: 253352. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:44:01,821][00359] Avg episode reward: [(0, '6.124')] +[2023-02-28 10:44:01,836][13073] Saving new best policy, reward=6.124! +[2023-02-28 10:44:05,293][13087] Updated weights for policy 0, policy_version 250 (0.0012) +[2023-02-28 10:44:06,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3545.2). Total num frames: 1028096. Throughput: 0: 929.5. Samples: 257700. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:06,827][00359] Avg episode reward: [(0, '6.003')] +[2023-02-28 10:44:11,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3540.6). Total num frames: 1044480. Throughput: 0: 933.2. Samples: 259994. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:44:11,825][00359] Avg episode reward: [(0, '6.030')] +[2023-02-28 10:44:15,873][13087] Updated weights for policy 0, policy_version 260 (0.0024) +[2023-02-28 10:44:16,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3623.9). Total num frames: 1069056. Throughput: 0: 980.7. Samples: 266608. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:16,822][00359] Avg episode reward: [(0, '6.200')] +[2023-02-28 10:44:16,833][13073] Saving new best policy, reward=6.200! +[2023-02-28 10:44:21,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3693.4). Total num frames: 1089536. Throughput: 0: 975.5. Samples: 273290. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:21,829][00359] Avg episode reward: [(0, '6.813')] +[2023-02-28 10:44:21,844][13073] Saving new best policy, reward=6.813! +[2023-02-28 10:44:26,820][00359] Fps is (10 sec: 3275.6, 60 sec: 3822.7, 300 sec: 3735.0). Total num frames: 1101824. Throughput: 0: 945.7. Samples: 275440. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:26,823][00359] Avg episode reward: [(0, '6.859')] +[2023-02-28 10:44:26,830][13073] Saving new best policy, reward=6.859! +[2023-02-28 10:44:27,381][13087] Updated weights for policy 0, policy_version 270 (0.0032) +[2023-02-28 10:44:31,823][00359] Fps is (10 sec: 2456.0, 60 sec: 3754.3, 300 sec: 3734.9). Total num frames: 1114112. Throughput: 0: 918.8. Samples: 279052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:44:31,826][00359] Avg episode reward: [(0, '6.872')] +[2023-02-28 10:44:31,838][13073] Saving new best policy, reward=6.872! +[2023-02-28 10:44:36,817][00359] Fps is (10 sec: 2458.5, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 1126400. Throughput: 0: 893.4. Samples: 282906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:44:36,819][00359] Avg episode reward: [(0, '6.235')] +[2023-02-28 10:44:40,798][13087] Updated weights for policy 0, policy_version 280 (0.0046) +[2023-02-28 10:44:41,817][00359] Fps is (10 sec: 3688.8, 60 sec: 3618.5, 300 sec: 3762.8). Total num frames: 1150976. Throughput: 0: 883.6. Samples: 285976. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:41,819][00359] Avg episode reward: [(0, '7.133')] +[2023-02-28 10:44:41,835][13073] Saving new best policy, reward=7.133! +[2023-02-28 10:44:46,819][00359] Fps is (10 sec: 4095.1, 60 sec: 3618.0, 300 sec: 3748.9). Total num frames: 1167360. Throughput: 0: 866.6. Samples: 292352. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:44:46,824][00359] Avg episode reward: [(0, '7.113')] +[2023-02-28 10:44:51,819][00359] Fps is (10 sec: 3276.0, 60 sec: 3618.0, 300 sec: 3721.1). Total num frames: 1183744. Throughput: 0: 870.0. Samples: 296850. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:44:51,822][00359] Avg episode reward: [(0, '7.193')] +[2023-02-28 10:44:51,843][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000289_1183744.pth... +[2023-02-28 10:44:51,972][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000067_274432.pth +[2023-02-28 10:44:51,988][13073] Saving new best policy, reward=7.193! +[2023-02-28 10:44:52,486][13087] Updated weights for policy 0, policy_version 290 (0.0033) +[2023-02-28 10:44:56,817][00359] Fps is (10 sec: 3687.2, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 1204224. Throughput: 0: 869.2. Samples: 299106. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:44:56,819][00359] Avg episode reward: [(0, '7.472')] +[2023-02-28 10:44:56,822][13073] Saving new best policy, reward=7.472! +[2023-02-28 10:45:01,817][00359] Fps is (10 sec: 4097.0, 60 sec: 3549.9, 300 sec: 3748.9). Total num frames: 1224704. Throughput: 0: 877.1. Samples: 306078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:45:01,824][00359] Avg episode reward: [(0, '8.992')] +[2023-02-28 10:45:01,838][13073] Saving new best policy, reward=8.992! +[2023-02-28 10:45:02,168][13087] Updated weights for policy 0, policy_version 300 (0.0016) +[2023-02-28 10:45:06,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 1245184. Throughput: 0: 863.7. Samples: 312156. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-28 10:45:06,826][00359] Avg episode reward: [(0, '9.732')] +[2023-02-28 10:45:06,830][13073] Saving new best policy, reward=9.732! +[2023-02-28 10:45:11,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3707.2). Total num frames: 1257472. Throughput: 0: 865.0. Samples: 314360. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:45:11,828][00359] Avg episode reward: [(0, '9.530')] +[2023-02-28 10:45:14,617][13087] Updated weights for policy 0, policy_version 310 (0.0023) +[2023-02-28 10:45:16,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3721.1). Total num frames: 1277952. Throughput: 0: 893.8. Samples: 319266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:45:16,819][00359] Avg episode reward: [(0, '8.823')] +[2023-02-28 10:45:21,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3549.9, 300 sec: 3776.6). Total num frames: 1302528. Throughput: 0: 966.6. Samples: 326404. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:45:21,819][00359] Avg episode reward: [(0, '9.281')] +[2023-02-28 10:45:23,284][13087] Updated weights for policy 0, policy_version 320 (0.0015) +[2023-02-28 10:45:26,819][00359] Fps is (10 sec: 4504.7, 60 sec: 3686.5, 300 sec: 3804.4). Total num frames: 1323008. Throughput: 0: 977.5. Samples: 329964. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:45:26,824][00359] Avg episode reward: [(0, '9.878')] +[2023-02-28 10:45:26,828][13073] Saving new best policy, reward=9.878! +[2023-02-28 10:45:31,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3686.8, 300 sec: 3776.7). Total num frames: 1335296. Throughput: 0: 934.4. Samples: 334400. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:45:31,822][00359] Avg episode reward: [(0, '10.216')] +[2023-02-28 10:45:31,912][13073] Saving new best policy, reward=10.216! +[2023-02-28 10:45:35,722][13087] Updated weights for policy 0, policy_version 330 (0.0015) +[2023-02-28 10:45:36,817][00359] Fps is (10 sec: 3277.5, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1355776. Throughput: 0: 950.4. Samples: 339614. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:45:36,824][00359] Avg episode reward: [(0, '11.318')] +[2023-02-28 10:45:36,829][13073] Saving new best policy, reward=11.318! +[2023-02-28 10:45:41,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3804.5). Total num frames: 1380352. Throughput: 0: 978.6. Samples: 343142. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:45:41,824][00359] Avg episode reward: [(0, '11.072')] +[2023-02-28 10:45:44,294][13087] Updated weights for policy 0, policy_version 340 (0.0012) +[2023-02-28 10:45:46,818][00359] Fps is (10 sec: 4505.0, 60 sec: 3891.3, 300 sec: 3818.3). Total num frames: 1400832. Throughput: 0: 972.8. Samples: 349856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:45:46,824][00359] Avg episode reward: [(0, '11.846')] +[2023-02-28 10:45:46,827][13073] Saving new best policy, reward=11.846! +[2023-02-28 10:45:51,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3823.1, 300 sec: 3776.7). Total num frames: 1413120. Throughput: 0: 936.3. Samples: 354288. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:45:51,821][00359] Avg episode reward: [(0, '11.943')] +[2023-02-28 10:45:51,842][13073] Saving new best policy, reward=11.943! +[2023-02-28 10:45:56,817][13087] Updated weights for policy 0, policy_version 350 (0.0018) +[2023-02-28 10:45:56,827][00359] Fps is (10 sec: 3273.9, 60 sec: 3822.3, 300 sec: 3776.5). Total num frames: 1433600. Throughput: 0: 936.0. Samples: 356490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:45:56,832][00359] Avg episode reward: [(0, '12.309')] +[2023-02-28 10:45:56,843][13073] Saving new best policy, reward=12.309! +[2023-02-28 10:46:01,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3790.6). Total num frames: 1454080. Throughput: 0: 980.0. Samples: 363366. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-28 10:46:01,819][00359] Avg episode reward: [(0, '12.794')] +[2023-02-28 10:46:01,836][13073] Saving new best policy, reward=12.794! +[2023-02-28 10:46:06,199][13087] Updated weights for policy 0, policy_version 360 (0.0018) +[2023-02-28 10:46:06,819][00359] Fps is (10 sec: 4099.2, 60 sec: 3822.8, 300 sec: 3804.4). Total num frames: 1474560. Throughput: 0: 958.2. Samples: 369524. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:46:06,822][00359] Avg episode reward: [(0, '12.707')] +[2023-02-28 10:46:11,819][00359] Fps is (10 sec: 3685.8, 60 sec: 3891.1, 300 sec: 3776.6). Total num frames: 1490944. Throughput: 0: 930.3. Samples: 371826. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-28 10:46:11,825][00359] Avg episode reward: [(0, '12.332')] +[2023-02-28 10:46:16,817][00359] Fps is (10 sec: 3277.6, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1507328. Throughput: 0: 941.5. Samples: 376768. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:46:16,820][00359] Avg episode reward: [(0, '12.254')] +[2023-02-28 10:46:18,077][13087] Updated weights for policy 0, policy_version 370 (0.0018) +[2023-02-28 10:46:21,817][00359] Fps is (10 sec: 4096.7, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1531904. Throughput: 0: 984.5. Samples: 383918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:46:21,820][00359] Avg episode reward: [(0, '13.823')] +[2023-02-28 10:46:21,833][13073] Saving new best policy, reward=13.823! +[2023-02-28 10:46:26,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3823.1, 300 sec: 3804.4). Total num frames: 1552384. Throughput: 0: 980.9. Samples: 387284. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:46:26,823][00359] Avg episode reward: [(0, '14.861')] +[2023-02-28 10:46:26,825][13073] Saving new best policy, reward=14.861! +[2023-02-28 10:46:28,325][13087] Updated weights for policy 0, policy_version 380 (0.0013) +[2023-02-28 10:46:31,817][00359] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 1564672. Throughput: 0: 928.4. Samples: 391634. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:46:31,822][00359] Avg episode reward: [(0, '14.701')] +[2023-02-28 10:46:36,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.9). Total num frames: 1585152. Throughput: 0: 946.5. Samples: 396880. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-28 10:46:36,820][00359] Avg episode reward: [(0, '15.957')] +[2023-02-28 10:46:36,827][13073] Saving new best policy, reward=15.957! +[2023-02-28 10:46:39,592][13087] Updated weights for policy 0, policy_version 390 (0.0017) +[2023-02-28 10:46:41,817][00359] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 1605632. Throughput: 0: 973.2. Samples: 400276. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-28 10:46:41,822][00359] Avg episode reward: [(0, '15.422')] +[2023-02-28 10:46:46,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3754.8, 300 sec: 3790.5). Total num frames: 1626112. Throughput: 0: 968.9. Samples: 406968. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:46:46,819][00359] Avg episode reward: [(0, '15.282')] +[2023-02-28 10:46:50,060][13087] Updated weights for policy 0, policy_version 400 (0.0014) +[2023-02-28 10:46:51,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1642496. Throughput: 0: 930.3. Samples: 411386. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:46:51,820][00359] Avg episode reward: [(0, '16.495')] +[2023-02-28 10:46:51,832][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000401_1642496.pth... +[2023-02-28 10:46:51,957][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000180_737280.pth +[2023-02-28 10:46:51,986][13073] Saving new best policy, reward=16.495! +[2023-02-28 10:46:56,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3755.3, 300 sec: 3748.9). Total num frames: 1658880. Throughput: 0: 926.3. Samples: 413510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:46:56,825][00359] Avg episode reward: [(0, '16.991')] +[2023-02-28 10:46:56,828][13073] Saving new best policy, reward=16.991! +[2023-02-28 10:47:00,790][13087] Updated weights for policy 0, policy_version 410 (0.0019) +[2023-02-28 10:47:01,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 1683456. Throughput: 0: 971.4. Samples: 420480. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-28 10:47:01,822][00359] Avg episode reward: [(0, '17.725')] +[2023-02-28 10:47:01,835][13073] Saving new best policy, reward=17.725! +[2023-02-28 10:47:06,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3823.1, 300 sec: 3804.4). Total num frames: 1703936. Throughput: 0: 950.4. Samples: 426686. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:47:06,820][00359] Avg episode reward: [(0, '17.923')] +[2023-02-28 10:47:06,821][13073] Saving new best policy, reward=17.923! +[2023-02-28 10:47:11,821][00359] Fps is (10 sec: 3275.3, 60 sec: 3754.5, 300 sec: 3762.7). Total num frames: 1716224. Throughput: 0: 925.1. Samples: 428918. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:47:11,824][00359] Avg episode reward: [(0, '18.933')] +[2023-02-28 10:47:11,834][13073] Saving new best policy, reward=18.933! +[2023-02-28 10:47:12,111][13087] Updated weights for policy 0, policy_version 420 (0.0020) +[2023-02-28 10:47:16,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1736704. Throughput: 0: 933.4. Samples: 433638. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:47:16,824][00359] Avg episode reward: [(0, '18.064')] +[2023-02-28 10:47:21,817][00359] Fps is (10 sec: 4097.8, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 1757184. Throughput: 0: 974.1. Samples: 440716. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-28 10:47:21,819][00359] Avg episode reward: [(0, '17.998')] +[2023-02-28 10:47:21,839][13087] Updated weights for policy 0, policy_version 430 (0.0050) +[2023-02-28 10:47:26,821][00359] Fps is (10 sec: 4094.2, 60 sec: 3754.4, 300 sec: 3790.5). Total num frames: 1777664. Throughput: 0: 976.9. Samples: 444240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:47:26,824][00359] Avg episode reward: [(0, '17.833')] +[2023-02-28 10:47:31,822][00359] Fps is (10 sec: 3684.3, 60 sec: 3822.6, 300 sec: 3762.7). Total num frames: 1794048. Throughput: 0: 931.7. Samples: 448900. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:47:31,830][00359] Avg episode reward: [(0, '18.890')] +[2023-02-28 10:47:33,794][13087] Updated weights for policy 0, policy_version 440 (0.0035) +[2023-02-28 10:47:36,817][00359] Fps is (10 sec: 3688.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1814528. Throughput: 0: 950.0. Samples: 454138. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:47:36,826][00359] Avg episode reward: [(0, '19.300')] +[2023-02-28 10:47:36,829][13073] Saving new best policy, reward=19.300! +[2023-02-28 10:47:41,817][00359] Fps is (10 sec: 4098.3, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 1835008. Throughput: 0: 980.1. Samples: 457614. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:47:41,820][00359] Avg episode reward: [(0, '19.309')] +[2023-02-28 10:47:41,829][13073] Saving new best policy, reward=19.309! +[2023-02-28 10:47:42,972][13087] Updated weights for policy 0, policy_version 450 (0.0022) +[2023-02-28 10:47:46,819][00359] Fps is (10 sec: 4095.0, 60 sec: 3822.8, 300 sec: 3790.5). Total num frames: 1855488. Throughput: 0: 974.7. Samples: 464344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:47:46,822][00359] Avg episode reward: [(0, '20.647')] +[2023-02-28 10:47:46,824][13073] Saving new best policy, reward=20.647! +[2023-02-28 10:47:51,821][00359] Fps is (10 sec: 3684.7, 60 sec: 3822.6, 300 sec: 3762.7). Total num frames: 1871872. Throughput: 0: 932.7. Samples: 468662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:47:51,826][00359] Avg episode reward: [(0, '20.643')] +[2023-02-28 10:47:55,304][13087] Updated weights for policy 0, policy_version 460 (0.0032) +[2023-02-28 10:47:56,817][00359] Fps is (10 sec: 3277.7, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 1888256. Throughput: 0: 933.9. Samples: 470938. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:47:56,823][00359] Avg episode reward: [(0, '20.428')] +[2023-02-28 10:48:01,817][00359] Fps is (10 sec: 4097.8, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1912832. Throughput: 0: 984.2. Samples: 477928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:01,825][00359] Avg episode reward: [(0, '19.356')] +[2023-02-28 10:48:03,971][13087] Updated weights for policy 0, policy_version 470 (0.0016) +[2023-02-28 10:48:06,823][00359] Fps is (10 sec: 4502.6, 60 sec: 3822.5, 300 sec: 3790.5). Total num frames: 1933312. Throughput: 0: 967.5. Samples: 484258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:06,831][00359] Avg episode reward: [(0, '17.959')] +[2023-02-28 10:48:11,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.5, 300 sec: 3776.7). Total num frames: 1949696. Throughput: 0: 938.6. Samples: 486472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:11,825][00359] Avg episode reward: [(0, '16.838')] +[2023-02-28 10:48:16,351][13087] Updated weights for policy 0, policy_version 480 (0.0019) +[2023-02-28 10:48:16,817][00359] Fps is (10 sec: 3279.0, 60 sec: 3822.9, 300 sec: 3748.9). Total num frames: 1966080. Throughput: 0: 941.7. Samples: 491270. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:48:16,819][00359] Avg episode reward: [(0, '16.060')] +[2023-02-28 10:48:21,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 1990656. Throughput: 0: 981.7. Samples: 498316. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:48:21,824][00359] Avg episode reward: [(0, '16.728')] +[2023-02-28 10:48:25,055][13087] Updated weights for policy 0, policy_version 490 (0.0012) +[2023-02-28 10:48:26,817][00359] Fps is (10 sec: 4505.5, 60 sec: 3891.5, 300 sec: 3804.4). Total num frames: 2011136. Throughput: 0: 983.3. Samples: 501864. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:26,824][00359] Avg episode reward: [(0, '16.334')] +[2023-02-28 10:48:31,819][00359] Fps is (10 sec: 3276.2, 60 sec: 3823.2, 300 sec: 3762.7). Total num frames: 2023424. Throughput: 0: 925.6. Samples: 505996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:48:31,821][00359] Avg episode reward: [(0, '16.200')] +[2023-02-28 10:48:36,817][00359] Fps is (10 sec: 2457.7, 60 sec: 3686.4, 300 sec: 3735.1). Total num frames: 2035712. Throughput: 0: 910.0. Samples: 509606. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:48:36,822][00359] Avg episode reward: [(0, '17.152')] +[2023-02-28 10:48:40,986][13087] Updated weights for policy 0, policy_version 500 (0.0029) +[2023-02-28 10:48:41,817][00359] Fps is (10 sec: 2458.0, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 2048000. Throughput: 0: 901.3. Samples: 511496. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:41,819][00359] Avg episode reward: [(0, '18.793')] +[2023-02-28 10:48:46,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3618.3, 300 sec: 3748.9). Total num frames: 2072576. Throughput: 0: 891.2. Samples: 518030. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:48:46,821][00359] Avg episode reward: [(0, '19.202')] +[2023-02-28 10:48:49,691][13087] Updated weights for policy 0, policy_version 510 (0.0015) +[2023-02-28 10:48:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3686.7, 300 sec: 3735.0). Total num frames: 2093056. Throughput: 0: 887.1. Samples: 524170. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:48:51,820][00359] Avg episode reward: [(0, '19.822')] +[2023-02-28 10:48:51,832][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth... +[2023-02-28 10:48:51,981][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000289_1183744.pth +[2023-02-28 10:48:56,819][00359] Fps is (10 sec: 3685.6, 60 sec: 3686.3, 300 sec: 3721.1). Total num frames: 2109440. Throughput: 0: 885.3. Samples: 526314. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:48:56,821][00359] Avg episode reward: [(0, '20.236')] +[2023-02-28 10:49:01,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3721.1). Total num frames: 2125824. Throughput: 0: 893.3. Samples: 531468. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:49:01,819][00359] Avg episode reward: [(0, '21.713')] +[2023-02-28 10:49:01,836][13073] Saving new best policy, reward=21.713! +[2023-02-28 10:49:02,146][13087] Updated weights for policy 0, policy_version 520 (0.0029) +[2023-02-28 10:49:06,817][00359] Fps is (10 sec: 4096.9, 60 sec: 3618.5, 300 sec: 3748.9). Total num frames: 2150400. Throughput: 0: 891.9. Samples: 538452. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:49:06,819][00359] Avg episode reward: [(0, '20.358')] +[2023-02-28 10:49:11,360][13087] Updated weights for policy 0, policy_version 530 (0.0012) +[2023-02-28 10:49:11,818][00359] Fps is (10 sec: 4504.9, 60 sec: 3686.3, 300 sec: 3735.0). Total num frames: 2170880. Throughput: 0: 890.9. Samples: 541954. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:49:11,821][00359] Avg episode reward: [(0, '20.662')] +[2023-02-28 10:49:16,817][00359] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3707.2). Total num frames: 2183168. Throughput: 0: 897.6. Samples: 546386. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:49:16,821][00359] Avg episode reward: [(0, '20.508')] +[2023-02-28 10:49:21,817][00359] Fps is (10 sec: 3277.3, 60 sec: 3549.9, 300 sec: 3735.0). Total num frames: 2203648. Throughput: 0: 943.4. Samples: 552058. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:49:21,819][00359] Avg episode reward: [(0, '20.354')] +[2023-02-28 10:49:22,967][13087] Updated weights for policy 0, policy_version 540 (0.0015) +[2023-02-28 10:49:26,817][00359] Fps is (10 sec: 4505.7, 60 sec: 3618.2, 300 sec: 3776.7). Total num frames: 2228224. Throughput: 0: 980.6. Samples: 555624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:49:26,819][00359] Avg episode reward: [(0, '19.364')] +[2023-02-28 10:49:31,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3754.8, 300 sec: 3804.4). Total num frames: 2248704. Throughput: 0: 980.4. Samples: 562148. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:49:31,820][00359] Avg episode reward: [(0, '19.589')] +[2023-02-28 10:49:32,831][13087] Updated weights for policy 0, policy_version 550 (0.0019) +[2023-02-28 10:49:36,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 2265088. Throughput: 0: 945.1. Samples: 566700. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:49:36,821][00359] Avg episode reward: [(0, '19.502')] +[2023-02-28 10:49:41,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2281472. Throughput: 0: 954.4. Samples: 569260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:49:41,820][00359] Avg episode reward: [(0, '19.033')] +[2023-02-28 10:49:43,593][13087] Updated weights for policy 0, policy_version 560 (0.0016) +[2023-02-28 10:49:46,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3804.5). Total num frames: 2306048. Throughput: 0: 999.4. Samples: 576440. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:49:46,819][00359] Avg episode reward: [(0, '18.391')] +[2023-02-28 10:49:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 2326528. Throughput: 0: 977.1. Samples: 582420. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:49:51,820][00359] Avg episode reward: [(0, '19.102')] +[2023-02-28 10:49:54,126][13087] Updated weights for policy 0, policy_version 570 (0.0022) +[2023-02-28 10:49:56,818][00359] Fps is (10 sec: 3276.5, 60 sec: 3823.0, 300 sec: 3776.6). Total num frames: 2338816. Throughput: 0: 950.2. Samples: 584714. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:49:56,822][00359] Avg episode reward: [(0, '19.533')] +[2023-02-28 10:50:01,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3790.5). Total num frames: 2363392. Throughput: 0: 970.9. Samples: 590078. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:50:01,825][00359] Avg episode reward: [(0, '18.901')] +[2023-02-28 10:50:04,437][13087] Updated weights for policy 0, policy_version 580 (0.0024) +[2023-02-28 10:50:06,817][00359] Fps is (10 sec: 4506.0, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2383872. Throughput: 0: 1004.0. Samples: 597238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:06,823][00359] Avg episode reward: [(0, '20.927')] +[2023-02-28 10:50:11,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3818.3). Total num frames: 2404352. Throughput: 0: 997.0. Samples: 600490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:11,821][00359] Avg episode reward: [(0, '20.060')] +[2023-02-28 10:50:15,372][13087] Updated weights for policy 0, policy_version 590 (0.0011) +[2023-02-28 10:50:16,817][00359] Fps is (10 sec: 3276.6, 60 sec: 3891.2, 300 sec: 3776.6). Total num frames: 2416640. Throughput: 0: 951.3. Samples: 604956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:16,823][00359] Avg episode reward: [(0, '21.672')] +[2023-02-28 10:50:21,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3790.6). Total num frames: 2441216. Throughput: 0: 980.4. Samples: 610820. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:21,820][00359] Avg episode reward: [(0, '21.126')] +[2023-02-28 10:50:25,097][13087] Updated weights for policy 0, policy_version 600 (0.0027) +[2023-02-28 10:50:26,817][00359] Fps is (10 sec: 4505.9, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2461696. Throughput: 0: 1002.4. Samples: 614370. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:50:26,820][00359] Avg episode reward: [(0, '21.598')] +[2023-02-28 10:50:31,817][00359] Fps is (10 sec: 4095.7, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2482176. Throughput: 0: 982.9. Samples: 620672. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:50:31,820][00359] Avg episode reward: [(0, '21.683')] +[2023-02-28 10:50:36,771][13087] Updated weights for policy 0, policy_version 610 (0.0035) +[2023-02-28 10:50:36,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 2498560. Throughput: 0: 949.6. Samples: 625154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:36,819][00359] Avg episode reward: [(0, '21.805')] +[2023-02-28 10:50:36,821][13073] Saving new best policy, reward=21.805! +[2023-02-28 10:50:41,817][00359] Fps is (10 sec: 3277.0, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 2514944. Throughput: 0: 954.6. Samples: 627668. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:50:41,819][00359] Avg episode reward: [(0, '21.935')] +[2023-02-28 10:50:41,833][13073] Saving new best policy, reward=21.935! +[2023-02-28 10:50:46,321][13087] Updated weights for policy 0, policy_version 620 (0.0019) +[2023-02-28 10:50:46,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2539520. Throughput: 0: 992.5. Samples: 634740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:50:46,819][00359] Avg episode reward: [(0, '21.525')] +[2023-02-28 10:50:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3818.4). Total num frames: 2560000. Throughput: 0: 962.2. Samples: 640538. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:50:51,823][00359] Avg episode reward: [(0, '21.567')] +[2023-02-28 10:50:51,836][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000625_2560000.pth... +[2023-02-28 10:50:52,034][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000401_1642496.pth +[2023-02-28 10:50:56,819][00359] Fps is (10 sec: 3276.0, 60 sec: 3891.1, 300 sec: 3790.5). Total num frames: 2572288. Throughput: 0: 937.7. Samples: 642690. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:50:56,824][00359] Avg episode reward: [(0, '22.404')] +[2023-02-28 10:50:56,827][13073] Saving new best policy, reward=22.404! +[2023-02-28 10:50:58,646][13087] Updated weights for policy 0, policy_version 630 (0.0035) +[2023-02-28 10:51:01,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3790.6). Total num frames: 2592768. Throughput: 0: 955.7. Samples: 647964. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:51:01,818][00359] Avg episode reward: [(0, '23.729')] +[2023-02-28 10:51:01,833][13073] Saving new best policy, reward=23.729! +[2023-02-28 10:51:06,817][00359] Fps is (10 sec: 4506.7, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2617344. Throughput: 0: 981.2. Samples: 654974. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:51:06,819][00359] Avg episode reward: [(0, '22.447')] +[2023-02-28 10:51:07,496][13087] Updated weights for policy 0, policy_version 640 (0.0016) +[2023-02-28 10:51:11,817][00359] Fps is (10 sec: 4095.8, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 2633728. Throughput: 0: 971.5. Samples: 658088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:51:11,820][00359] Avg episode reward: [(0, '24.366')] +[2023-02-28 10:51:11,828][13073] Saving new best policy, reward=24.366! +[2023-02-28 10:51:16,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3790.5). Total num frames: 2650112. Throughput: 0: 929.3. Samples: 662488. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:51:16,822][00359] Avg episode reward: [(0, '24.464')] +[2023-02-28 10:51:16,826][13073] Saving new best policy, reward=24.464! +[2023-02-28 10:51:20,002][13087] Updated weights for policy 0, policy_version 650 (0.0032) +[2023-02-28 10:51:21,817][00359] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3790.5). Total num frames: 2670592. Throughput: 0: 957.9. Samples: 668258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:51:21,820][00359] Avg episode reward: [(0, '25.693')] +[2023-02-28 10:51:21,829][13073] Saving new best policy, reward=25.693! +[2023-02-28 10:51:26,822][00359] Fps is (10 sec: 4503.2, 60 sec: 3890.8, 300 sec: 3832.1). Total num frames: 2695168. Throughput: 0: 978.7. Samples: 671714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:51:26,832][00359] Avg episode reward: [(0, '23.682')] +[2023-02-28 10:51:28,589][13087] Updated weights for policy 0, policy_version 660 (0.0020) +[2023-02-28 10:51:31,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3818.3). Total num frames: 2711552. Throughput: 0: 963.3. Samples: 678090. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:51:31,824][00359] Avg episode reward: [(0, '23.970')] +[2023-02-28 10:51:36,817][00359] Fps is (10 sec: 2868.8, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 2723840. Throughput: 0: 932.9. Samples: 682520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:51:36,819][00359] Avg episode reward: [(0, '23.975')] +[2023-02-28 10:51:40,809][13087] Updated weights for policy 0, policy_version 670 (0.0012) +[2023-02-28 10:51:41,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 2748416. Throughput: 0: 947.2. Samples: 685314. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:51:41,825][00359] Avg episode reward: [(0, '23.503')] +[2023-02-28 10:51:46,817][00359] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 2772992. Throughput: 0: 989.9. Samples: 692508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:51:46,819][00359] Avg episode reward: [(0, '22.806')] +[2023-02-28 10:51:49,932][13087] Updated weights for policy 0, policy_version 680 (0.0020) +[2023-02-28 10:51:51,817][00359] Fps is (10 sec: 4095.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 2789376. Throughput: 0: 960.1. Samples: 698178. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:51:51,820][00359] Avg episode reward: [(0, '21.260')] +[2023-02-28 10:51:56,817][00359] Fps is (10 sec: 3276.7, 60 sec: 3891.3, 300 sec: 3804.4). Total num frames: 2805760. Throughput: 0: 941.7. Samples: 700466. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:51:56,824][00359] Avg episode reward: [(0, '21.467')] +[2023-02-28 10:52:01,561][13087] Updated weights for policy 0, policy_version 690 (0.0029) +[2023-02-28 10:52:01,817][00359] Fps is (10 sec: 3686.6, 60 sec: 3891.2, 300 sec: 3804.4). Total num frames: 2826240. Throughput: 0: 969.3. Samples: 706108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:52:01,824][00359] Avg episode reward: [(0, '20.273')] +[2023-02-28 10:52:06,817][00359] Fps is (10 sec: 4505.8, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 2850816. Throughput: 0: 1002.6. Samples: 713376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:52:06,825][00359] Avg episode reward: [(0, '19.283')] +[2023-02-28 10:52:10,870][13087] Updated weights for policy 0, policy_version 700 (0.0012) +[2023-02-28 10:52:11,817][00359] Fps is (10 sec: 4095.8, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 2867200. Throughput: 0: 992.7. Samples: 716382. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:52:11,822][00359] Avg episode reward: [(0, '19.019')] +[2023-02-28 10:52:16,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 2883584. Throughput: 0: 951.3. Samples: 720900. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:52:16,824][00359] Avg episode reward: [(0, '19.624')] +[2023-02-28 10:52:21,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3818.4). Total num frames: 2904064. Throughput: 0: 988.3. Samples: 726996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:52:21,824][00359] Avg episode reward: [(0, '19.475')] +[2023-02-28 10:52:22,184][13087] Updated weights for policy 0, policy_version 710 (0.0016) +[2023-02-28 10:52:26,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.5, 300 sec: 3846.1). Total num frames: 2928640. Throughput: 0: 1004.7. Samples: 730524. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:52:26,822][00359] Avg episode reward: [(0, '20.881')] +[2023-02-28 10:52:31,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 2945024. Throughput: 0: 981.1. Samples: 736660. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:52:31,819][00359] Avg episode reward: [(0, '21.020')] +[2023-02-28 10:52:32,408][13087] Updated weights for policy 0, policy_version 720 (0.0022) +[2023-02-28 10:52:36,818][00359] Fps is (10 sec: 2866.8, 60 sec: 3891.1, 300 sec: 3804.4). Total num frames: 2957312. Throughput: 0: 936.6. Samples: 740328. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:52:36,822][00359] Avg episode reward: [(0, '20.503')] +[2023-02-28 10:52:41,817][00359] Fps is (10 sec: 2457.7, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 2969600. Throughput: 0: 926.1. Samples: 742138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:52:41,822][00359] Avg episode reward: [(0, '21.789')] +[2023-02-28 10:52:46,611][13087] Updated weights for policy 0, policy_version 730 (0.0029) +[2023-02-28 10:52:46,817][00359] Fps is (10 sec: 3277.2, 60 sec: 3618.1, 300 sec: 3790.6). Total num frames: 2990080. Throughput: 0: 906.1. Samples: 746882. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:52:46,824][00359] Avg episode reward: [(0, '23.301')] +[2023-02-28 10:52:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3818.3). Total num frames: 3014656. Throughput: 0: 902.8. Samples: 754004. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:52:51,819][00359] Avg episode reward: [(0, '23.458')] +[2023-02-28 10:52:51,834][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000736_3014656.pth... +[2023-02-28 10:52:51,997][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth +[2023-02-28 10:52:56,820][00359] Fps is (10 sec: 3685.2, 60 sec: 3686.2, 300 sec: 3776.6). Total num frames: 3026944. Throughput: 0: 884.9. Samples: 756204. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:52:56,823][00359] Avg episode reward: [(0, '22.535')] +[2023-02-28 10:52:57,263][13087] Updated weights for policy 0, policy_version 740 (0.0013) +[2023-02-28 10:53:01,817][00359] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3762.9). Total num frames: 3043328. Throughput: 0: 886.6. Samples: 760798. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:53:01,820][00359] Avg episode reward: [(0, '23.516')] +[2023-02-28 10:53:06,817][00359] Fps is (10 sec: 4097.4, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 3067904. Throughput: 0: 901.2. Samples: 767548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:53:06,819][00359] Avg episode reward: [(0, '23.657')] +[2023-02-28 10:53:07,539][13087] Updated weights for policy 0, policy_version 750 (0.0027) +[2023-02-28 10:53:11,818][00359] Fps is (10 sec: 4505.0, 60 sec: 3686.4, 300 sec: 3804.4). Total num frames: 3088384. Throughput: 0: 901.5. Samples: 771094. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:53:11,822][00359] Avg episode reward: [(0, '22.494')] +[2023-02-28 10:53:16,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 3104768. Throughput: 0: 886.2. Samples: 776540. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:53:16,819][00359] Avg episode reward: [(0, '22.620')] +[2023-02-28 10:53:18,770][13087] Updated weights for policy 0, policy_version 760 (0.0024) +[2023-02-28 10:53:21,817][00359] Fps is (10 sec: 3277.2, 60 sec: 3618.2, 300 sec: 3762.8). Total num frames: 3121152. Throughput: 0: 904.8. Samples: 781042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:53:21,819][00359] Avg episode reward: [(0, '23.340')] +[2023-02-28 10:53:26,817][00359] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3804.4). Total num frames: 3145728. Throughput: 0: 943.1. Samples: 784580. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:53:26,820][00359] Avg episode reward: [(0, '23.681')] +[2023-02-28 10:53:28,448][13087] Updated weights for policy 0, policy_version 770 (0.0025) +[2023-02-28 10:53:31,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 3166208. Throughput: 0: 998.3. Samples: 791806. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:53:31,820][00359] Avg episode reward: [(0, '22.746')] +[2023-02-28 10:53:36,820][00359] Fps is (10 sec: 3685.2, 60 sec: 3754.5, 300 sec: 3846.0). Total num frames: 3182592. Throughput: 0: 948.3. Samples: 796680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:53:36,823][00359] Avg episode reward: [(0, '23.583')] +[2023-02-28 10:53:40,087][13087] Updated weights for policy 0, policy_version 780 (0.0026) +[2023-02-28 10:53:41,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3818.3). Total num frames: 3198976. Throughput: 0: 950.6. Samples: 798978. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:53:41,823][00359] Avg episode reward: [(0, '24.760')] +[2023-02-28 10:53:46,819][00359] Fps is (10 sec: 4096.7, 60 sec: 3891.1, 300 sec: 3832.2). Total num frames: 3223552. Throughput: 0: 992.1. Samples: 805446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:53:46,825][00359] Avg episode reward: [(0, '24.643')] +[2023-02-28 10:53:49,205][13087] Updated weights for policy 0, policy_version 790 (0.0028) +[2023-02-28 10:53:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 3244032. Throughput: 0: 1000.2. Samples: 812558. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:53:51,822][00359] Avg episode reward: [(0, '24.643')] +[2023-02-28 10:53:56,817][00359] Fps is (10 sec: 3686.9, 60 sec: 3891.4, 300 sec: 3846.1). Total num frames: 3260416. Throughput: 0: 972.0. Samples: 814834. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:53:56,822][00359] Avg episode reward: [(0, '24.438')] +[2023-02-28 10:54:01,124][13087] Updated weights for policy 0, policy_version 800 (0.0014) +[2023-02-28 10:54:01,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 3276800. Throughput: 0: 952.4. Samples: 819400. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:54:01,820][00359] Avg episode reward: [(0, '26.023')] +[2023-02-28 10:54:01,833][13073] Saving new best policy, reward=26.023! +[2023-02-28 10:54:06,817][00359] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3301376. Throughput: 0: 1003.3. Samples: 826190. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:54:06,820][00359] Avg episode reward: [(0, '24.366')] +[2023-02-28 10:54:09,801][13087] Updated weights for policy 0, policy_version 810 (0.0014) +[2023-02-28 10:54:11,817][00359] Fps is (10 sec: 4915.2, 60 sec: 3959.6, 300 sec: 3873.8). Total num frames: 3325952. Throughput: 0: 1005.1. Samples: 829810. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:54:11,819][00359] Avg episode reward: [(0, '23.716')] +[2023-02-28 10:54:16,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 3338240. Throughput: 0: 961.9. Samples: 835092. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:54:16,823][00359] Avg episode reward: [(0, '23.865')] +[2023-02-28 10:54:21,821][00359] Fps is (10 sec: 2865.9, 60 sec: 3890.9, 300 sec: 3818.2). Total num frames: 3354624. Throughput: 0: 961.8. Samples: 839960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:54:21,829][00359] Avg episode reward: [(0, '23.673')] +[2023-02-28 10:54:21,859][13087] Updated weights for policy 0, policy_version 820 (0.0036) +[2023-02-28 10:54:26,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3379200. Throughput: 0: 989.8. Samples: 843520. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-28 10:54:26,825][00359] Avg episode reward: [(0, '22.602')] +[2023-02-28 10:54:30,451][13087] Updated weights for policy 0, policy_version 830 (0.0018) +[2023-02-28 10:54:31,817][00359] Fps is (10 sec: 4917.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3403776. Throughput: 0: 1006.2. Samples: 850722. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:54:31,820][00359] Avg episode reward: [(0, '23.023')] +[2023-02-28 10:54:36,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.4, 300 sec: 3846.1). Total num frames: 3416064. Throughput: 0: 955.2. Samples: 855544. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:54:36,823][00359] Avg episode reward: [(0, '24.137')] +[2023-02-28 10:54:41,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 3436544. Throughput: 0: 954.2. Samples: 857774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:54:41,820][00359] Avg episode reward: [(0, '24.581')] +[2023-02-28 10:54:42,787][13087] Updated weights for policy 0, policy_version 840 (0.0020) +[2023-02-28 10:54:46,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.3, 300 sec: 3832.2). Total num frames: 3457024. Throughput: 0: 997.4. Samples: 864282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:54:46,819][00359] Avg episode reward: [(0, '23.648')] +[2023-02-28 10:54:51,355][13087] Updated weights for policy 0, policy_version 850 (0.0020) +[2023-02-28 10:54:51,817][00359] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3873.9). Total num frames: 3481600. Throughput: 0: 1002.5. Samples: 871302. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-28 10:54:51,820][00359] Avg episode reward: [(0, '24.607')] +[2023-02-28 10:54:51,836][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000850_3481600.pth... +[2023-02-28 10:54:51,982][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000625_2560000.pth +[2023-02-28 10:54:56,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3493888. Throughput: 0: 970.4. Samples: 873478. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:54:56,819][00359] Avg episode reward: [(0, '25.302')] +[2023-02-28 10:55:01,817][00359] Fps is (10 sec: 2867.3, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 3510272. Throughput: 0: 954.8. Samples: 878060. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:55:01,819][00359] Avg episode reward: [(0, '25.143')] +[2023-02-28 10:55:03,529][13087] Updated weights for policy 0, policy_version 860 (0.0023) +[2023-02-28 10:55:06,817][00359] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 3538944. Throughput: 0: 1002.9. Samples: 885088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:55:06,823][00359] Avg episode reward: [(0, '23.350')] +[2023-02-28 10:55:11,817][00359] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 3559424. Throughput: 0: 1003.3. Samples: 888670. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:55:11,819][00359] Avg episode reward: [(0, '24.681')] +[2023-02-28 10:55:12,567][13087] Updated weights for policy 0, policy_version 870 (0.0018) +[2023-02-28 10:55:16,817][00359] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3571712. Throughput: 0: 958.3. Samples: 893846. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:55:16,822][00359] Avg episode reward: [(0, '24.808')] +[2023-02-28 10:55:21,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3959.8, 300 sec: 3832.2). Total num frames: 3592192. Throughput: 0: 959.0. Samples: 898700. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-28 10:55:21,822][00359] Avg episode reward: [(0, '24.211')] +[2023-02-28 10:55:24,240][13087] Updated weights for policy 0, policy_version 880 (0.0025) +[2023-02-28 10:55:26,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 3616768. Throughput: 0: 988.5. Samples: 902258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:55:26,822][00359] Avg episode reward: [(0, '24.015')] +[2023-02-28 10:55:31,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 3637248. Throughput: 0: 1004.0. Samples: 909464. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:55:31,819][00359] Avg episode reward: [(0, '25.143')] +[2023-02-28 10:55:33,749][13087] Updated weights for policy 0, policy_version 890 (0.0014) +[2023-02-28 10:55:36,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3653632. Throughput: 0: 954.3. Samples: 914246. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:55:36,819][00359] Avg episode reward: [(0, '27.737')] +[2023-02-28 10:55:36,822][13073] Saving new best policy, reward=27.737! +[2023-02-28 10:55:41,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 3670016. Throughput: 0: 954.9. Samples: 916450. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:55:41,826][00359] Avg episode reward: [(0, '28.109')] +[2023-02-28 10:55:41,836][13073] Saving new best policy, reward=28.109! +[2023-02-28 10:55:45,135][13087] Updated weights for policy 0, policy_version 900 (0.0012) +[2023-02-28 10:55:46,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 3694592. Throughput: 0: 995.6. Samples: 922860. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:55:46,828][00359] Avg episode reward: [(0, '28.449')] +[2023-02-28 10:55:46,831][13073] Saving new best policy, reward=28.449! +[2023-02-28 10:55:51,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3873.9). Total num frames: 3715072. Throughput: 0: 991.5. Samples: 929704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:55:51,819][00359] Avg episode reward: [(0, '29.074')] +[2023-02-28 10:55:51,837][13073] Saving new best policy, reward=29.074! +[2023-02-28 10:55:55,344][13087] Updated weights for policy 0, policy_version 910 (0.0013) +[2023-02-28 10:55:56,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3731456. Throughput: 0: 960.0. Samples: 931870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:55:56,822][00359] Avg episode reward: [(0, '28.978')] +[2023-02-28 10:56:01,817][00359] Fps is (10 sec: 2867.2, 60 sec: 3891.2, 300 sec: 3818.3). Total num frames: 3743744. Throughput: 0: 946.8. Samples: 936450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:56:01,825][00359] Avg episode reward: [(0, '29.005')] +[2023-02-28 10:56:06,034][13087] Updated weights for policy 0, policy_version 920 (0.0025) +[2023-02-28 10:56:06,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 3768320. Throughput: 0: 992.6. Samples: 943366. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:56:06,819][00359] Avg episode reward: [(0, '27.164')] +[2023-02-28 10:56:11,817][00359] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 3792896. Throughput: 0: 993.2. Samples: 946954. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:56:11,823][00359] Avg episode reward: [(0, '26.230')] +[2023-02-28 10:56:16,709][13087] Updated weights for policy 0, policy_version 930 (0.0032) +[2023-02-28 10:56:16,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 3809280. Throughput: 0: 950.9. Samples: 952254. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:56:16,822][00359] Avg episode reward: [(0, '26.660')] +[2023-02-28 10:56:21,817][00359] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3832.3). Total num frames: 3825664. Throughput: 0: 950.6. Samples: 957022. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-28 10:56:21,820][00359] Avg episode reward: [(0, '25.987')] +[2023-02-28 10:56:26,811][13087] Updated weights for policy 0, policy_version 940 (0.0031) +[2023-02-28 10:56:26,817][00359] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 3850240. Throughput: 0: 981.2. Samples: 960604. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:56:26,825][00359] Avg episode reward: [(0, '25.342')] +[2023-02-28 10:56:31,817][00359] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 3870720. Throughput: 0: 1000.8. Samples: 967894. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:56:31,819][00359] Avg episode reward: [(0, '25.178')] +[2023-02-28 10:56:36,821][00359] Fps is (10 sec: 3684.7, 60 sec: 3890.9, 300 sec: 3859.9). Total num frames: 3887104. Throughput: 0: 947.6. Samples: 972352. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:56:36,832][00359] Avg episode reward: [(0, '25.620')] +[2023-02-28 10:56:38,283][13087] Updated weights for policy 0, policy_version 950 (0.0014) +[2023-02-28 10:56:41,819][00359] Fps is (10 sec: 2866.7, 60 sec: 3822.8, 300 sec: 3818.3). Total num frames: 3899392. Throughput: 0: 938.9. Samples: 974122. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-28 10:56:41,821][00359] Avg episode reward: [(0, '25.282')] +[2023-02-28 10:56:46,817][00359] Fps is (10 sec: 2458.7, 60 sec: 3618.1, 300 sec: 3804.4). Total num frames: 3911680. Throughput: 0: 923.2. Samples: 977996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:56:46,820][00359] Avg episode reward: [(0, '25.405')] +[2023-02-28 10:56:51,112][13087] Updated weights for policy 0, policy_version 960 (0.0036) +[2023-02-28 10:56:51,817][00359] Fps is (10 sec: 3277.4, 60 sec: 3618.1, 300 sec: 3818.3). Total num frames: 3932160. Throughput: 0: 907.9. Samples: 984220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-28 10:56:51,821][00359] Avg episode reward: [(0, '24.392')] +[2023-02-28 10:56:51,839][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000960_3932160.pth... +[2023-02-28 10:56:51,976][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000736_3014656.pth +[2023-02-28 10:56:56,817][00359] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 3952640. Throughput: 0: 901.9. Samples: 987540. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:56:56,823][00359] Avg episode reward: [(0, '24.672')] +[2023-02-28 10:57:01,817][00359] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 3969024. Throughput: 0: 884.9. Samples: 992074. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-28 10:57:01,819][00359] Avg episode reward: [(0, '25.284')] +[2023-02-28 10:57:03,010][13087] Updated weights for policy 0, policy_version 970 (0.0022) +[2023-02-28 10:57:06,817][00359] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 3985408. Throughput: 0: 905.0. Samples: 997746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-28 10:57:06,819][00359] Avg episode reward: [(0, '25.079')] +[2023-02-28 10:57:10,218][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-28 10:57:10,219][00359] Component Batcher_0 stopped! +[2023-02-28 10:57:10,219][13073] Stopping Batcher_0... +[2023-02-28 10:57:10,230][13073] Loop batcher_evt_loop terminating... +[2023-02-28 10:57:10,275][13093] Stopping RolloutWorker_w5... +[2023-02-28 10:57:10,276][00359] Component RolloutWorker_w5 stopped! +[2023-02-28 10:57:10,279][00359] Component RolloutWorker_w6 stopped! +[2023-02-28 10:57:10,284][13092] Stopping RolloutWorker_w6... +[2023-02-28 10:57:10,285][13087] Weights refcount: 2 0 +[2023-02-28 10:57:10,292][00359] Component InferenceWorker_p0-w0 stopped! +[2023-02-28 10:57:10,294][13087] Stopping InferenceWorker_p0-w0... +[2023-02-28 10:57:10,295][13087] Loop inference_proc0-0_evt_loop terminating... +[2023-02-28 10:57:10,284][13092] Loop rollout_proc6_evt_loop terminating... +[2023-02-28 10:57:10,277][13093] Loop rollout_proc5_evt_loop terminating... +[2023-02-28 10:57:10,313][00359] Component RolloutWorker_w2 stopped! +[2023-02-28 10:57:10,312][13089] Stopping RolloutWorker_w2... +[2023-02-28 10:57:10,322][13090] Stopping RolloutWorker_w3... +[2023-02-28 10:57:10,322][00359] Component RolloutWorker_w3 stopped! +[2023-02-28 10:57:10,316][13089] Loop rollout_proc2_evt_loop terminating... +[2023-02-28 10:57:10,330][13095] Stopping RolloutWorker_w1... +[2023-02-28 10:57:10,331][13090] Loop rollout_proc3_evt_loop terminating... +[2023-02-28 10:57:10,330][00359] Component RolloutWorker_w1 stopped! +[2023-02-28 10:57:10,335][13095] Loop rollout_proc1_evt_loop terminating... +[2023-02-28 10:57:10,334][00359] Component RolloutWorker_w4 stopped! +[2023-02-28 10:57:10,341][13094] Stopping RolloutWorker_w7... +[2023-02-28 10:57:10,341][13094] Loop rollout_proc7_evt_loop terminating... +[2023-02-28 10:57:10,343][13088] Stopping RolloutWorker_w0... +[2023-02-28 10:57:10,343][13088] Loop rollout_proc0_evt_loop terminating... +[2023-02-28 10:57:10,341][00359] Component RolloutWorker_w7 stopped! +[2023-02-28 10:57:10,332][13091] Stopping RolloutWorker_w4... +[2023-02-28 10:57:10,346][00359] Component RolloutWorker_w0 stopped! +[2023-02-28 10:57:10,347][13091] Loop rollout_proc4_evt_loop terminating... +[2023-02-28 10:57:10,381][13073] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000850_3481600.pth +[2023-02-28 10:57:10,394][13073] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-28 10:57:10,550][00359] Component LearnerWorker_p0 stopped! +[2023-02-28 10:57:10,557][00359] Waiting for process learner_proc0 to stop... +[2023-02-28 10:57:10,561][13073] Stopping LearnerWorker_p0... +[2023-02-28 10:57:10,562][13073] Loop learner_proc0_evt_loop terminating... +[2023-02-28 10:57:12,415][00359] Waiting for process inference_proc0-0 to join... +[2023-02-28 10:57:12,928][00359] Waiting for process rollout_proc0 to join... +[2023-02-28 10:57:12,931][00359] Waiting for process rollout_proc1 to join... +[2023-02-28 10:57:13,617][00359] Waiting for process rollout_proc2 to join... +[2023-02-28 10:57:13,619][00359] Waiting for process rollout_proc3 to join... +[2023-02-28 10:57:13,628][00359] Waiting for process rollout_proc4 to join... +[2023-02-28 10:57:13,629][00359] Waiting for process rollout_proc5 to join... +[2023-02-28 10:57:13,632][00359] Waiting for process rollout_proc6 to join... +[2023-02-28 10:57:13,635][00359] Waiting for process rollout_proc7 to join... +[2023-02-28 10:57:13,637][00359] Batcher 0 profile tree view: +batching: 26.2766, releasing_batches: 0.0205 +[2023-02-28 10:57:13,639][00359] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0046 + wait_policy_total: 522.3144 +update_model: 7.5178 + weight_update: 0.0012 +one_step: 0.0021 + handle_policy_step: 500.6087 + deserialize: 14.1537, stack: 3.0566, obs_to_device_normalize: 112.3597, forward: 238.4747, send_messages: 25.5776 + prepare_outputs: 81.4323 + to_cpu: 51.4396 +[2023-02-28 10:57:13,643][00359] Learner 0 profile tree view: +misc: 0.0052, prepare_batch: 15.7004 +train: 74.7901 + epoch_init: 0.0056, minibatch_init: 0.0059, losses_postprocess: 0.6190, kl_divergence: 0.5438, after_optimizer: 32.6475 + calculate_losses: 26.3938 + losses_init: 0.0036, forward_head: 1.8450, bptt_initial: 17.2795, tail: 1.0619, advantages_returns: 0.3091, losses: 3.3634 + bptt: 2.2688 + bptt_forward_core: 2.1838 + update: 14.0007 + clip: 1.3738 +[2023-02-28 10:57:13,652][00359] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3231, enqueue_policy_requests: 139.8807, env_step: 805.5452, overhead: 20.2135, complete_rollouts: 6.6789 +save_policy_outputs: 19.5601 + split_output_tensors: 9.4226 +[2023-02-28 10:57:13,653][00359] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3852, enqueue_policy_requests: 138.7508, env_step: 807.8148, overhead: 19.7247, complete_rollouts: 6.7552 +save_policy_outputs: 19.2049 + split_output_tensors: 9.2623 +[2023-02-28 10:57:13,654][00359] Loop Runner_EvtLoop terminating... +[2023-02-28 10:57:13,656][00359] Runner profile tree view: +main_loop: 1096.5054 +[2023-02-28 10:57:13,657][00359] Collected {0: 4005888}, FPS: 3653.3 +[2023-02-28 11:06:58,950][00359] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-28 11:06:58,952][00359] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-28 11:06:58,954][00359] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-28 11:06:58,956][00359] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-28 11:06:58,958][00359] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-28 11:06:58,960][00359] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-28 11:06:58,961][00359] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-28 11:06:58,963][00359] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-28 11:06:58,964][00359] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-28 11:06:58,965][00359] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-28 11:06:58,967][00359] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-28 11:06:58,968][00359] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-28 11:06:58,969][00359] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-28 11:06:58,970][00359] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-28 11:06:58,973][00359] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-28 11:06:59,005][00359] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-28 11:06:59,009][00359] RunningMeanStd input shape: (3, 72, 128) +[2023-02-28 11:06:59,014][00359] RunningMeanStd input shape: (1,) +[2023-02-28 11:06:59,040][00359] ConvEncoder: input_channels=3 +[2023-02-28 11:06:59,820][00359] Conv encoder output size: 512 +[2023-02-28 11:06:59,825][00359] Policy head output size: 512 +[2023-02-28 11:07:02,799][00359] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-28 11:07:04,084][00359] Num frames 100... +[2023-02-28 11:07:04,202][00359] Num frames 200... +[2023-02-28 11:07:04,317][00359] Num frames 300... +[2023-02-28 11:07:04,432][00359] Num frames 400... +[2023-02-28 11:07:04,552][00359] Num frames 500... +[2023-02-28 11:07:04,671][00359] Num frames 600... +[2023-02-28 11:07:04,786][00359] Num frames 700... +[2023-02-28 11:07:04,907][00359] Num frames 800... +[2023-02-28 11:07:05,022][00359] Num frames 900... +[2023-02-28 11:07:05,143][00359] Num frames 1000... +[2023-02-28 11:07:05,255][00359] Num frames 1100... +[2023-02-28 11:07:05,372][00359] Num frames 1200... +[2023-02-28 11:07:05,486][00359] Num frames 1300... +[2023-02-28 11:07:05,600][00359] Num frames 1400... +[2023-02-28 11:07:05,713][00359] Num frames 1500... +[2023-02-28 11:07:05,831][00359] Num frames 1600... +[2023-02-28 11:07:05,941][00359] Num frames 1700... +[2023-02-28 11:07:06,059][00359] Num frames 1800... +[2023-02-28 11:07:06,171][00359] Num frames 1900... +[2023-02-28 11:07:06,319][00359] Avg episode rewards: #0: 47.839, true rewards: #0: 19.840 +[2023-02-28 11:07:06,322][00359] Avg episode reward: 47.839, avg true_objective: 19.840 +[2023-02-28 11:07:06,343][00359] Num frames 2000... +[2023-02-28 11:07:06,465][00359] Num frames 2100... +[2023-02-28 11:07:06,587][00359] Num frames 2200... +[2023-02-28 11:07:06,708][00359] Num frames 2300... +[2023-02-28 11:07:06,827][00359] Num frames 2400... +[2023-02-28 11:07:06,948][00359] Num frames 2500... +[2023-02-28 11:07:07,065][00359] Num frames 2600... +[2023-02-28 11:07:07,179][00359] Num frames 2700... +[2023-02-28 11:07:07,302][00359] Num frames 2800... +[2023-02-28 11:07:07,415][00359] Num frames 2900... +[2023-02-28 11:07:07,537][00359] Num frames 3000... +[2023-02-28 11:07:07,653][00359] Num frames 3100... +[2023-02-28 11:07:07,765][00359] Num frames 3200... +[2023-02-28 11:07:07,891][00359] Num frames 3300... +[2023-02-28 11:07:08,006][00359] Num frames 3400... +[2023-02-28 11:07:08,128][00359] Num frames 3500... +[2023-02-28 11:07:08,241][00359] Num frames 3600... +[2023-02-28 11:07:08,362][00359] Num frames 3700... +[2023-02-28 11:07:08,477][00359] Num frames 3800... +[2023-02-28 11:07:08,602][00359] Num frames 3900... +[2023-02-28 11:07:08,719][00359] Num frames 4000... +[2023-02-28 11:07:08,869][00359] Avg episode rewards: #0: 54.919, true rewards: #0: 20.420 +[2023-02-28 11:07:08,871][00359] Avg episode reward: 54.919, avg true_objective: 20.420 +[2023-02-28 11:07:08,893][00359] Num frames 4100... +[2023-02-28 11:07:09,019][00359] Num frames 4200... +[2023-02-28 11:07:09,139][00359] Num frames 4300... +[2023-02-28 11:07:09,257][00359] Num frames 4400... +[2023-02-28 11:07:09,375][00359] Num frames 4500... +[2023-02-28 11:07:09,487][00359] Num frames 4600... +[2023-02-28 11:07:09,609][00359] Num frames 4700... +[2023-02-28 11:07:09,724][00359] Num frames 4800... +[2023-02-28 11:07:09,855][00359] Num frames 4900... +[2023-02-28 11:07:09,972][00359] Num frames 5000... +[2023-02-28 11:07:10,089][00359] Num frames 5100... +[2023-02-28 11:07:10,213][00359] Num frames 5200... +[2023-02-28 11:07:10,334][00359] Num frames 5300... +[2023-02-28 11:07:10,459][00359] Num frames 5400... +[2023-02-28 11:07:10,575][00359] Num frames 5500... +[2023-02-28 11:07:10,687][00359] Num frames 5600... +[2023-02-28 11:07:10,810][00359] Num frames 5700... +[2023-02-28 11:07:10,932][00359] Num frames 5800... +[2023-02-28 11:07:11,053][00359] Num frames 5900... +[2023-02-28 11:07:11,173][00359] Num frames 6000... +[2023-02-28 11:07:11,294][00359] Num frames 6100... +[2023-02-28 11:07:11,444][00359] Avg episode rewards: #0: 55.612, true rewards: #0: 20.613 +[2023-02-28 11:07:11,446][00359] Avg episode reward: 55.612, avg true_objective: 20.613 +[2023-02-28 11:07:11,467][00359] Num frames 6200... +[2023-02-28 11:07:11,588][00359] Num frames 6300... +[2023-02-28 11:07:11,702][00359] Num frames 6400... +[2023-02-28 11:07:11,818][00359] Num frames 6500... +[2023-02-28 11:07:11,941][00359] Num frames 6600... +[2023-02-28 11:07:12,070][00359] Num frames 6700... +[2023-02-28 11:07:12,230][00359] Num frames 6800... +[2023-02-28 11:07:12,401][00359] Num frames 6900... +[2023-02-28 11:07:12,562][00359] Num frames 7000... +[2023-02-28 11:07:12,649][00359] Avg episode rewards: #0: 46.539, true rewards: #0: 17.540 +[2023-02-28 11:07:12,655][00359] Avg episode reward: 46.539, avg true_objective: 17.540 +[2023-02-28 11:07:12,795][00359] Num frames 7100... +[2023-02-28 11:07:12,970][00359] Num frames 7200... +[2023-02-28 11:07:13,134][00359] Num frames 7300... +[2023-02-28 11:07:13,303][00359] Num frames 7400... +[2023-02-28 11:07:13,460][00359] Num frames 7500... +[2023-02-28 11:07:13,624][00359] Num frames 7600... +[2023-02-28 11:07:13,780][00359] Num frames 7700... +[2023-02-28 11:07:13,929][00359] Avg episode rewards: #0: 40.113, true rewards: #0: 15.514 +[2023-02-28 11:07:13,931][00359] Avg episode reward: 40.113, avg true_objective: 15.514 +[2023-02-28 11:07:14,010][00359] Num frames 7800... +[2023-02-28 11:07:14,181][00359] Num frames 7900... +[2023-02-28 11:07:14,346][00359] Num frames 8000... +[2023-02-28 11:07:14,505][00359] Num frames 8100... +[2023-02-28 11:07:14,671][00359] Num frames 8200... +[2023-02-28 11:07:14,835][00359] Num frames 8300... +[2023-02-28 11:07:14,998][00359] Num frames 8400... +[2023-02-28 11:07:15,163][00359] Num frames 8500... +[2023-02-28 11:07:15,331][00359] Num frames 8600... +[2023-02-28 11:07:15,490][00359] Num frames 8700... +[2023-02-28 11:07:15,642][00359] Num frames 8800... +[2023-02-28 11:07:15,753][00359] Num frames 8900... +[2023-02-28 11:07:15,872][00359] Num frames 9000... +[2023-02-28 11:07:16,006][00359] Avg episode rewards: #0: 38.615, true rewards: #0: 15.115 +[2023-02-28 11:07:16,007][00359] Avg episode reward: 38.615, avg true_objective: 15.115 +[2023-02-28 11:07:16,053][00359] Num frames 9100... +[2023-02-28 11:07:16,170][00359] Num frames 9200... +[2023-02-28 11:07:16,284][00359] Num frames 9300... +[2023-02-28 11:07:16,398][00359] Num frames 9400... +[2023-02-28 11:07:16,514][00359] Num frames 9500... +[2023-02-28 11:07:16,630][00359] Num frames 9600... +[2023-02-28 11:07:16,743][00359] Num frames 9700... +[2023-02-28 11:07:16,861][00359] Num frames 9800... +[2023-02-28 11:07:16,974][00359] Num frames 9900... +[2023-02-28 11:07:17,092][00359] Num frames 10000... +[2023-02-28 11:07:17,213][00359] Num frames 10100... +[2023-02-28 11:07:17,328][00359] Num frames 10200... +[2023-02-28 11:07:17,445][00359] Num frames 10300... +[2023-02-28 11:07:17,570][00359] Num frames 10400... +[2023-02-28 11:07:17,683][00359] Num frames 10500... +[2023-02-28 11:07:17,822][00359] Avg episode rewards: #0: 38.675, true rewards: #0: 15.104 +[2023-02-28 11:07:17,824][00359] Avg episode reward: 38.675, avg true_objective: 15.104 +[2023-02-28 11:07:17,860][00359] Num frames 10600... +[2023-02-28 11:07:17,974][00359] Num frames 10700... +[2023-02-28 11:07:18,097][00359] Num frames 10800... +[2023-02-28 11:07:18,214][00359] Num frames 10900... +[2023-02-28 11:07:18,327][00359] Num frames 11000... +[2023-02-28 11:07:18,453][00359] Num frames 11100... +[2023-02-28 11:07:18,570][00359] Num frames 11200... +[2023-02-28 11:07:18,693][00359] Num frames 11300... +[2023-02-28 11:07:18,761][00359] Avg episode rewards: #0: 36.011, true rewards: #0: 14.136 +[2023-02-28 11:07:18,763][00359] Avg episode reward: 36.011, avg true_objective: 14.136 +[2023-02-28 11:07:18,865][00359] Num frames 11400... +[2023-02-28 11:07:18,978][00359] Num frames 11500... +[2023-02-28 11:07:19,102][00359] Num frames 11600... +[2023-02-28 11:07:19,217][00359] Num frames 11700... +[2023-02-28 11:07:19,339][00359] Num frames 11800... +[2023-02-28 11:07:19,453][00359] Num frames 11900... +[2023-02-28 11:07:19,573][00359] Num frames 12000... +[2023-02-28 11:07:19,692][00359] Num frames 12100... +[2023-02-28 11:07:19,804][00359] Num frames 12200... +[2023-02-28 11:07:19,904][00359] Avg episode rewards: #0: 34.152, true rewards: #0: 13.597 +[2023-02-28 11:07:19,906][00359] Avg episode reward: 34.152, avg true_objective: 13.597 +[2023-02-28 11:07:19,979][00359] Num frames 12300... +[2023-02-28 11:07:20,101][00359] Num frames 12400... +[2023-02-28 11:07:20,222][00359] Num frames 12500... +[2023-02-28 11:07:20,337][00359] Num frames 12600... +[2023-02-28 11:07:20,447][00359] Num frames 12700... +[2023-02-28 11:07:20,595][00359] Avg episode rewards: #0: 31.883, true rewards: #0: 12.783 +[2023-02-28 11:07:20,597][00359] Avg episode reward: 31.883, avg true_objective: 12.783 +[2023-02-28 11:08:37,867][00359] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-28 11:09:47,250][00359] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-28 11:09:47,253][00359] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-28 11:09:47,257][00359] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-28 11:09:47,258][00359] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-28 11:09:47,260][00359] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-28 11:09:47,263][00359] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-28 11:09:47,264][00359] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-28 11:09:47,266][00359] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-28 11:09:47,269][00359] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-28 11:09:47,270][00359] Adding new argument 'hf_repository'='eldraco/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-28 11:09:47,271][00359] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-28 11:09:47,273][00359] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-28 11:09:47,274][00359] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-28 11:09:47,276][00359] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-28 11:09:47,277][00359] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-28 11:09:47,302][00359] RunningMeanStd input shape: (3, 72, 128) +[2023-02-28 11:09:47,304][00359] RunningMeanStd input shape: (1,) +[2023-02-28 11:09:47,321][00359] ConvEncoder: input_channels=3 +[2023-02-28 11:09:47,359][00359] Conv encoder output size: 512 +[2023-02-28 11:09:47,360][00359] Policy head output size: 512 +[2023-02-28 11:09:47,380][00359] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-28 11:09:47,816][00359] Num frames 100... +[2023-02-28 11:09:47,929][00359] Num frames 200... +[2023-02-28 11:09:48,047][00359] Num frames 300... +[2023-02-28 11:09:48,167][00359] Num frames 400... +[2023-02-28 11:09:48,289][00359] Num frames 500... +[2023-02-28 11:09:48,440][00359] Avg episode rewards: #0: 9.760, true rewards: #0: 5.760 +[2023-02-28 11:09:48,441][00359] Avg episode reward: 9.760, avg true_objective: 5.760 +[2023-02-28 11:09:48,473][00359] Num frames 600... +[2023-02-28 11:09:48,590][00359] Num frames 700... +[2023-02-28 11:09:48,703][00359] Num frames 800... +[2023-02-28 11:09:48,815][00359] Num frames 900... +[2023-02-28 11:09:48,930][00359] Num frames 1000... +[2023-02-28 11:09:49,049][00359] Num frames 1100... +[2023-02-28 11:09:49,170][00359] Num frames 1200... +[2023-02-28 11:09:49,325][00359] Avg episode rewards: #0: 12.900, true rewards: #0: 6.400 +[2023-02-28 11:09:49,329][00359] Avg episode reward: 12.900, avg true_objective: 6.400 +[2023-02-28 11:09:49,361][00359] Num frames 1300... +[2023-02-28 11:09:49,476][00359] Num frames 1400... +[2023-02-28 11:09:49,599][00359] Num frames 1500... +[2023-02-28 11:09:49,713][00359] Num frames 1600... +[2023-02-28 11:09:49,825][00359] Num frames 1700... +[2023-02-28 11:09:49,945][00359] Num frames 1800... +[2023-02-28 11:09:50,060][00359] Num frames 1900... +[2023-02-28 11:09:50,200][00359] Num frames 2000... +[2023-02-28 11:09:50,323][00359] Num frames 2100... +[2023-02-28 11:09:50,436][00359] Num frames 2200... +[2023-02-28 11:09:50,578][00359] Num frames 2300... +[2023-02-28 11:09:50,698][00359] Num frames 2400... +[2023-02-28 11:09:50,813][00359] Num frames 2500... +[2023-02-28 11:09:50,932][00359] Num frames 2600... +[2023-02-28 11:09:51,055][00359] Num frames 2700... +[2023-02-28 11:09:51,172][00359] Num frames 2800... +[2023-02-28 11:09:51,250][00359] Avg episode rewards: #0: 22.400, true rewards: #0: 9.400 +[2023-02-28 11:09:51,252][00359] Avg episode reward: 22.400, avg true_objective: 9.400 +[2023-02-28 11:09:51,346][00359] Num frames 2900... +[2023-02-28 11:09:51,506][00359] Num frames 3000... +[2023-02-28 11:09:51,666][00359] Num frames 3100... +[2023-02-28 11:09:51,822][00359] Num frames 3200... +[2023-02-28 11:09:51,983][00359] Num frames 3300... +[2023-02-28 11:09:52,140][00359] Num frames 3400... +[2023-02-28 11:09:52,322][00359] Num frames 3500... +[2023-02-28 11:09:52,486][00359] Num frames 3600... +[2023-02-28 11:09:52,647][00359] Num frames 3700... +[2023-02-28 11:09:52,807][00359] Num frames 3800... +[2023-02-28 11:09:52,968][00359] Num frames 3900... +[2023-02-28 11:09:53,137][00359] Num frames 4000... +[2023-02-28 11:09:53,296][00359] Num frames 4100... +[2023-02-28 11:09:53,476][00359] Num frames 4200... +[2023-02-28 11:09:53,664][00359] Num frames 4300... +[2023-02-28 11:09:53,829][00359] Num frames 4400... +[2023-02-28 11:09:53,999][00359] Num frames 4500... +[2023-02-28 11:09:54,167][00359] Num frames 4600... +[2023-02-28 11:09:54,333][00359] Num frames 4700... +[2023-02-28 11:09:54,510][00359] Avg episode rewards: #0: 30.680, true rewards: #0: 11.930 +[2023-02-28 11:09:54,513][00359] Avg episode reward: 30.680, avg true_objective: 11.930 +[2023-02-28 11:09:54,567][00359] Num frames 4800... +[2023-02-28 11:09:54,727][00359] Num frames 4900... +[2023-02-28 11:09:54,891][00359] Num frames 5000... +[2023-02-28 11:09:55,055][00359] Num frames 5100... +[2023-02-28 11:09:55,172][00359] Num frames 5200... +[2023-02-28 11:09:55,284][00359] Num frames 5300... +[2023-02-28 11:09:55,410][00359] Num frames 5400... +[2023-02-28 11:09:55,559][00359] Avg episode rewards: #0: 27.172, true rewards: #0: 10.972 +[2023-02-28 11:09:55,560][00359] Avg episode reward: 27.172, avg true_objective: 10.972 +[2023-02-28 11:09:55,581][00359] Num frames 5500... +[2023-02-28 11:09:55,702][00359] Num frames 5600... +[2023-02-28 11:09:55,814][00359] Num frames 5700... +[2023-02-28 11:09:55,936][00359] Num frames 5800... +[2023-02-28 11:09:56,054][00359] Num frames 5900... +[2023-02-28 11:09:56,166][00359] Num frames 6000... +[2023-02-28 11:09:56,283][00359] Num frames 6100... +[2023-02-28 11:09:56,400][00359] Num frames 6200... +[2023-02-28 11:09:56,521][00359] Num frames 6300... +[2023-02-28 11:09:56,643][00359] Avg episode rewards: #0: 26.083, true rewards: #0: 10.583 +[2023-02-28 11:09:56,645][00359] Avg episode reward: 26.083, avg true_objective: 10.583 +[2023-02-28 11:09:56,704][00359] Num frames 6400... +[2023-02-28 11:09:56,814][00359] Num frames 6500... +[2023-02-28 11:09:56,941][00359] Num frames 6600... +[2023-02-28 11:09:57,062][00359] Num frames 6700... +[2023-02-28 11:09:57,160][00359] Avg episode rewards: #0: 23.194, true rewards: #0: 9.623 +[2023-02-28 11:09:57,161][00359] Avg episode reward: 23.194, avg true_objective: 9.623 +[2023-02-28 11:09:57,238][00359] Num frames 6800... +[2023-02-28 11:09:57,359][00359] Num frames 6900... +[2023-02-28 11:09:57,483][00359] Num frames 7000... +[2023-02-28 11:09:57,605][00359] Num frames 7100... +[2023-02-28 11:09:57,723][00359] Num frames 7200... +[2023-02-28 11:09:57,834][00359] Num frames 7300... +[2023-02-28 11:09:57,951][00359] Num frames 7400... +[2023-02-28 11:09:58,075][00359] Num frames 7500... +[2023-02-28 11:09:58,191][00359] Num frames 7600... +[2023-02-28 11:09:58,315][00359] Num frames 7700... +[2023-02-28 11:09:58,438][00359] Num frames 7800... +[2023-02-28 11:09:58,570][00359] Num frames 7900... +[2023-02-28 11:09:58,688][00359] Num frames 8000... +[2023-02-28 11:09:58,802][00359] Num frames 8100... +[2023-02-28 11:09:58,872][00359] Avg episode rewards: #0: 24.140, true rewards: #0: 10.140 +[2023-02-28 11:09:58,874][00359] Avg episode reward: 24.140, avg true_objective: 10.140 +[2023-02-28 11:09:58,986][00359] Num frames 8200... +[2023-02-28 11:09:59,101][00359] Num frames 8300... +[2023-02-28 11:09:59,227][00359] Num frames 8400... +[2023-02-28 11:09:59,343][00359] Num frames 8500... +[2023-02-28 11:09:59,468][00359] Num frames 8600... +[2023-02-28 11:09:59,589][00359] Num frames 8700... +[2023-02-28 11:09:59,702][00359] Num frames 8800... +[2023-02-28 11:09:59,822][00359] Num frames 8900... +[2023-02-28 11:09:59,937][00359] Num frames 9000... +[2023-02-28 11:10:00,062][00359] Num frames 9100... +[2023-02-28 11:10:00,179][00359] Num frames 9200... +[2023-02-28 11:10:00,302][00359] Num frames 9300... +[2023-02-28 11:10:00,415][00359] Num frames 9400... +[2023-02-28 11:10:00,542][00359] Num frames 9500... +[2023-02-28 11:10:00,659][00359] Num frames 9600... +[2023-02-28 11:10:00,777][00359] Num frames 9700... +[2023-02-28 11:10:00,848][00359] Avg episode rewards: #0: 26.569, true rewards: #0: 10.791 +[2023-02-28 11:10:00,849][00359] Avg episode reward: 26.569, avg true_objective: 10.791 +[2023-02-28 11:10:00,963][00359] Num frames 9800... +[2023-02-28 11:10:01,084][00359] Num frames 9900... +[2023-02-28 11:10:01,200][00359] Num frames 10000... +[2023-02-28 11:10:01,322][00359] Num frames 10100... +[2023-02-28 11:10:01,441][00359] Num frames 10200... +[2023-02-28 11:10:01,560][00359] Num frames 10300... +[2023-02-28 11:10:01,681][00359] Avg episode rewards: #0: 25.252, true rewards: #0: 10.352 +[2023-02-28 11:10:01,683][00359] Avg episode reward: 25.252, avg true_objective: 10.352 +[2023-02-28 11:11:04,971][00359] Replay video saved to /content/train_dir/default_experiment/replay.mp4!