diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,975 @@ +[2024-12-18 20:47:29,081][00245] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-12-18 20:47:29,084][00245] Rollout worker 0 uses device cpu +[2024-12-18 20:47:29,085][00245] Rollout worker 1 uses device cpu +[2024-12-18 20:47:29,086][00245] Rollout worker 2 uses device cpu +[2024-12-18 20:47:29,087][00245] Rollout worker 3 uses device cpu +[2024-12-18 20:47:29,089][00245] Rollout worker 4 uses device cpu +[2024-12-18 20:47:29,090][00245] Rollout worker 5 uses device cpu +[2024-12-18 20:47:29,091][00245] Rollout worker 6 uses device cpu +[2024-12-18 20:47:29,095][00245] Rollout worker 7 uses device cpu +[2024-12-18 20:47:29,240][00245] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-18 20:47:29,242][00245] InferenceWorker_p0-w0: min num requests: 2 +[2024-12-18 20:47:29,276][00245] Starting all processes... +[2024-12-18 20:47:29,278][00245] Starting process learner_proc0 +[2024-12-18 20:47:29,324][00245] Starting all processes... +[2024-12-18 20:47:29,331][00245] Starting process inference_proc0-0 +[2024-12-18 20:47:29,331][00245] Starting process rollout_proc0 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc1 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc2 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc3 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc4 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc5 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc6 +[2024-12-18 20:47:29,333][00245] Starting process rollout_proc7 +[2024-12-18 20:47:45,882][06275] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-18 20:47:45,882][06275] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-12-18 20:47:45,951][06275] Num visible devices: 1 +[2024-12-18 20:47:45,997][06275] Starting seed is not provided +[2024-12-18 20:47:45,998][06275] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-18 20:47:45,999][06275] Initializing actor-critic model on device cuda:0 +[2024-12-18 20:47:46,000][06275] RunningMeanStd input shape: (3, 72, 128) +[2024-12-18 20:47:46,003][06275] RunningMeanStd input shape: (1,) +[2024-12-18 20:47:46,044][06296] Worker 6 uses CPU cores [0] +[2024-12-18 20:47:46,117][06275] ConvEncoder: input_channels=3 +[2024-12-18 20:47:46,360][06292] Worker 3 uses CPU cores [1] +[2024-12-18 20:47:46,407][06295] Worker 7 uses CPU cores [1] +[2024-12-18 20:47:46,435][06291] Worker 2 uses CPU cores [0] +[2024-12-18 20:47:46,465][06290] Worker 1 uses CPU cores [1] +[2024-12-18 20:47:46,545][06294] Worker 5 uses CPU cores [1] +[2024-12-18 20:47:46,564][06289] Worker 0 uses CPU cores [0] +[2024-12-18 20:47:46,569][06288] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-18 20:47:46,570][06288] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-12-18 20:47:46,619][06288] Num visible devices: 1 +[2024-12-18 20:47:46,700][06293] Worker 4 uses CPU cores [0] +[2024-12-18 20:47:46,777][06275] Conv encoder output size: 512 +[2024-12-18 20:47:46,777][06275] Policy head output size: 512 +[2024-12-18 20:47:46,845][06275] Created Actor Critic model with architecture: +[2024-12-18 20:47:46,845][06275] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-12-18 20:47:47,181][06275] Using optimizer +[2024-12-18 20:47:49,235][00245] Heartbeat connected on Batcher_0 +[2024-12-18 20:47:49,242][00245] Heartbeat connected on InferenceWorker_p0-w0 +[2024-12-18 20:47:49,250][00245] Heartbeat connected on RolloutWorker_w0 +[2024-12-18 20:47:49,253][00245] Heartbeat connected on RolloutWorker_w1 +[2024-12-18 20:47:49,257][00245] Heartbeat connected on RolloutWorker_w2 +[2024-12-18 20:47:49,261][00245] Heartbeat connected on RolloutWorker_w3 +[2024-12-18 20:47:49,265][00245] Heartbeat connected on RolloutWorker_w4 +[2024-12-18 20:47:49,267][00245] Heartbeat connected on RolloutWorker_w5 +[2024-12-18 20:47:49,272][00245] Heartbeat connected on RolloutWorker_w6 +[2024-12-18 20:47:49,276][00245] Heartbeat connected on RolloutWorker_w7 +[2024-12-18 20:47:52,471][06275] No checkpoints found +[2024-12-18 20:47:52,471][06275] Did not load from checkpoint, starting from scratch! +[2024-12-18 20:47:52,472][06275] Initialized policy 0 weights for model version 0 +[2024-12-18 20:47:52,475][06275] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-12-18 20:47:52,489][06275] LearnerWorker_p0 finished initialization! +[2024-12-18 20:47:52,503][00245] Heartbeat connected on LearnerWorker_p0 +[2024-12-18 20:47:52,585][06288] RunningMeanStd input shape: (3, 72, 128) +[2024-12-18 20:47:52,587][06288] RunningMeanStd input shape: (1,) +[2024-12-18 20:47:52,599][06288] ConvEncoder: input_channels=3 +[2024-12-18 20:47:52,703][06288] Conv encoder output size: 512 +[2024-12-18 20:47:52,703][06288] Policy head output size: 512 +[2024-12-18 20:47:52,756][00245] Inference worker 0-0 is ready! +[2024-12-18 20:47:52,758][00245] All inference workers are ready! Signal rollout workers to start! +[2024-12-18 20:47:53,055][06293] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,059][06294] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,114][06295] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,117][06290] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,128][06292] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,146][06291] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,163][06296] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,167][06289] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 20:47:53,818][00245] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-18 20:47:54,496][06296] Decorrelating experience for 0 frames... +[2024-12-18 20:47:54,498][06293] Decorrelating experience for 0 frames... +[2024-12-18 20:47:54,498][06291] Decorrelating experience for 0 frames... +[2024-12-18 20:47:54,498][06292] Decorrelating experience for 0 frames... +[2024-12-18 20:47:54,500][06295] Decorrelating experience for 0 frames... +[2024-12-18 20:47:54,497][06294] Decorrelating experience for 0 frames... +[2024-12-18 20:47:55,214][06290] Decorrelating experience for 0 frames... +[2024-12-18 20:47:55,232][06294] Decorrelating experience for 32 frames... +[2024-12-18 20:47:55,669][06296] Decorrelating experience for 32 frames... +[2024-12-18 20:47:55,678][06291] Decorrelating experience for 32 frames... +[2024-12-18 20:47:55,715][06289] Decorrelating experience for 0 frames... +[2024-12-18 20:47:55,727][06290] Decorrelating experience for 32 frames... +[2024-12-18 20:47:56,740][06294] Decorrelating experience for 64 frames... +[2024-12-18 20:47:57,178][06290] Decorrelating experience for 64 frames... +[2024-12-18 20:47:57,329][06293] Decorrelating experience for 32 frames... +[2024-12-18 20:47:57,356][06289] Decorrelating experience for 32 frames... +[2024-12-18 20:47:57,649][06296] Decorrelating experience for 64 frames... +[2024-12-18 20:47:58,330][06294] Decorrelating experience for 96 frames... +[2024-12-18 20:47:58,595][06290] Decorrelating experience for 96 frames... +[2024-12-18 20:47:58,679][06292] Decorrelating experience for 32 frames... +[2024-12-18 20:47:58,819][00245] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-18 20:47:59,271][06293] Decorrelating experience for 64 frames... +[2024-12-18 20:47:59,273][06291] Decorrelating experience for 64 frames... +[2024-12-18 20:47:59,275][06289] Decorrelating experience for 64 frames... +[2024-12-18 20:47:59,528][06292] Decorrelating experience for 64 frames... +[2024-12-18 20:48:00,227][06296] Decorrelating experience for 96 frames... +[2024-12-18 20:48:00,952][06291] Decorrelating experience for 96 frames... +[2024-12-18 20:48:00,957][06289] Decorrelating experience for 96 frames... +[2024-12-18 20:48:00,965][06293] Decorrelating experience for 96 frames... +[2024-12-18 20:48:01,772][06295] Decorrelating experience for 32 frames... +[2024-12-18 20:48:03,819][00245] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 72.6. Samples: 726. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-18 20:48:03,821][00245] Avg episode reward: [(0, '2.019')] +[2024-12-18 20:48:03,817][06292] Decorrelating experience for 96 frames... +[2024-12-18 20:48:05,551][06275] Signal inference workers to stop experience collection... +[2024-12-18 20:48:05,582][06288] InferenceWorker_p0-w0: stopping experience collection +[2024-12-18 20:48:05,950][06295] Decorrelating experience for 64 frames... +[2024-12-18 20:48:06,969][06295] Decorrelating experience for 96 frames... +[2024-12-18 20:48:08,819][00245] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 157.2. Samples: 2358. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-12-18 20:48:08,821][00245] Avg episode reward: [(0, '2.558')] +[2024-12-18 20:48:08,912][06275] Signal inference workers to resume experience collection... +[2024-12-18 20:48:08,912][06288] InferenceWorker_p0-w0: resuming experience collection +[2024-12-18 20:48:13,819][00245] Fps is (10 sec: 2457.6, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 24576. Throughput: 0: 374.0. Samples: 7480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 3.0) +[2024-12-18 20:48:13,823][00245] Avg episode reward: [(0, '3.863')] +[2024-12-18 20:48:16,708][06288] Updated weights for policy 0, policy_version 10 (0.0027) +[2024-12-18 20:48:18,819][00245] Fps is (10 sec: 4915.1, 60 sec: 1966.1, 300 sec: 1966.1). Total num frames: 49152. Throughput: 0: 437.8. Samples: 10946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:48:18,824][00245] Avg episode reward: [(0, '4.433')] +[2024-12-18 20:48:23,819][00245] Fps is (10 sec: 4096.0, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 65536. Throughput: 0: 547.1. Samples: 16414. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-18 20:48:23,825][00245] Avg episode reward: [(0, '4.462')] +[2024-12-18 20:48:28,178][06288] Updated weights for policy 0, policy_version 20 (0.0032) +[2024-12-18 20:48:28,819][00245] Fps is (10 sec: 3276.9, 60 sec: 2340.6, 300 sec: 2340.6). Total num frames: 81920. Throughput: 0: 625.3. Samples: 21884. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:48:28,825][00245] Avg episode reward: [(0, '4.458')] +[2024-12-18 20:48:33,819][00245] Fps is (10 sec: 4096.0, 60 sec: 2662.4, 300 sec: 2662.4). Total num frames: 106496. Throughput: 0: 636.1. Samples: 25446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:48:33,821][00245] Avg episode reward: [(0, '4.356')] +[2024-12-18 20:48:33,830][06275] Saving new best policy, reward=4.356! +[2024-12-18 20:48:37,494][06288] Updated weights for policy 0, policy_version 30 (0.0044) +[2024-12-18 20:48:38,819][00245] Fps is (10 sec: 4095.8, 60 sec: 2730.6, 300 sec: 2730.6). Total num frames: 122880. Throughput: 0: 699.9. Samples: 31494. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:48:38,822][00245] Avg episode reward: [(0, '4.459')] +[2024-12-18 20:48:38,865][06275] Saving new best policy, reward=4.459! +[2024-12-18 20:48:43,823][00245] Fps is (10 sec: 3275.3, 60 sec: 2785.0, 300 sec: 2785.0). Total num frames: 139264. Throughput: 0: 799.8. Samples: 35996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:48:43,828][00245] Avg episode reward: [(0, '4.414')] +[2024-12-18 20:48:48,387][06288] Updated weights for policy 0, policy_version 40 (0.0037) +[2024-12-18 20:48:48,819][00245] Fps is (10 sec: 4096.2, 60 sec: 2978.9, 300 sec: 2978.9). Total num frames: 163840. Throughput: 0: 861.9. Samples: 39512. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:48:48,826][00245] Avg episode reward: [(0, '4.342')] +[2024-12-18 20:48:53,818][00245] Fps is (10 sec: 4917.4, 60 sec: 3140.3, 300 sec: 3140.3). Total num frames: 188416. Throughput: 0: 984.4. Samples: 46654. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:48:53,821][00245] Avg episode reward: [(0, '4.459')] +[2024-12-18 20:48:58,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3087.8). Total num frames: 200704. Throughput: 0: 972.4. Samples: 51236. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:48:58,826][00245] Avg episode reward: [(0, '4.454')] +[2024-12-18 20:48:59,428][06288] Updated weights for policy 0, policy_version 50 (0.0022) +[2024-12-18 20:49:03,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3159.8). Total num frames: 221184. Throughput: 0: 958.7. Samples: 54088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:49:03,820][00245] Avg episode reward: [(0, '4.397')] +[2024-12-18 20:49:08,417][06288] Updated weights for policy 0, policy_version 60 (0.0014) +[2024-12-18 20:49:08,819][00245] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3276.8). Total num frames: 245760. Throughput: 0: 995.8. Samples: 61224. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:49:08,820][00245] Avg episode reward: [(0, '4.603')] +[2024-12-18 20:49:08,823][06275] Saving new best policy, reward=4.603! +[2024-12-18 20:49:13,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3276.8). Total num frames: 262144. Throughput: 0: 995.7. Samples: 66692. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:49:13,824][00245] Avg episode reward: [(0, '4.431')] +[2024-12-18 20:49:18,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3276.8). Total num frames: 278528. Throughput: 0: 964.6. Samples: 68854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:49:18,827][00245] Avg episode reward: [(0, '4.133')] +[2024-12-18 20:49:19,894][06288] Updated weights for policy 0, policy_version 70 (0.0033) +[2024-12-18 20:49:23,819][00245] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3367.8). Total num frames: 303104. Throughput: 0: 984.9. Samples: 75814. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:49:23,827][00245] Avg episode reward: [(0, '4.235')] +[2024-12-18 20:49:23,841][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000074_303104.pth... +[2024-12-18 20:49:28,818][00245] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3406.1). Total num frames: 323584. Throughput: 0: 1024.6. Samples: 82100. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:49:28,822][00245] Avg episode reward: [(0, '4.370')] +[2024-12-18 20:49:29,415][06288] Updated weights for policy 0, policy_version 80 (0.0023) +[2024-12-18 20:49:33,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3399.7). Total num frames: 339968. Throughput: 0: 993.1. Samples: 84202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:49:33,827][00245] Avg episode reward: [(0, '4.331')] +[2024-12-18 20:49:38,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3959.5, 300 sec: 3432.8). Total num frames: 360448. Throughput: 0: 968.1. Samples: 90220. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:49:38,825][00245] Avg episode reward: [(0, '4.415')] +[2024-12-18 20:49:39,841][06288] Updated weights for policy 0, policy_version 90 (0.0021) +[2024-12-18 20:49:43,818][00245] Fps is (10 sec: 4505.8, 60 sec: 4096.3, 300 sec: 3500.2). Total num frames: 385024. Throughput: 0: 1028.0. Samples: 97494. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:49:43,823][00245] Avg episode reward: [(0, '4.606')] +[2024-12-18 20:49:43,829][06275] Saving new best policy, reward=4.606! +[2024-12-18 20:49:48,819][00245] Fps is (10 sec: 4095.9, 60 sec: 3959.4, 300 sec: 3490.5). Total num frames: 401408. Throughput: 0: 1014.6. Samples: 99746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:49:48,824][00245] Avg episode reward: [(0, '4.616')] +[2024-12-18 20:49:48,829][06275] Saving new best policy, reward=4.616! +[2024-12-18 20:49:51,208][06288] Updated weights for policy 0, policy_version 100 (0.0017) +[2024-12-18 20:49:53,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3481.6). Total num frames: 417792. Throughput: 0: 968.0. Samples: 104782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:49:53,824][00245] Avg episode reward: [(0, '4.442')] +[2024-12-18 20:49:58,819][00245] Fps is (10 sec: 4505.8, 60 sec: 4096.0, 300 sec: 3571.7). Total num frames: 446464. Throughput: 0: 1005.9. Samples: 111958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:49:58,820][00245] Avg episode reward: [(0, '4.365')] +[2024-12-18 20:49:59,643][06288] Updated weights for policy 0, policy_version 110 (0.0017) +[2024-12-18 20:50:03,819][00245] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3560.4). Total num frames: 462848. Throughput: 0: 1030.4. Samples: 115222. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:50:03,823][00245] Avg episode reward: [(0, '4.427')] +[2024-12-18 20:50:08,818][00245] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3549.9). Total num frames: 479232. Throughput: 0: 969.5. Samples: 119442. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:50:08,821][00245] Avg episode reward: [(0, '4.594')] +[2024-12-18 20:50:11,252][06288] Updated weights for policy 0, policy_version 120 (0.0025) +[2024-12-18 20:50:13,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3569.4). Total num frames: 499712. Throughput: 0: 970.6. Samples: 125778. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-18 20:50:13,821][00245] Avg episode reward: [(0, '4.549')] +[2024-12-18 20:50:18,823][00245] Fps is (10 sec: 3275.3, 60 sec: 3890.9, 300 sec: 3530.9). Total num frames: 512000. Throughput: 0: 968.4. Samples: 127786. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:50:18,825][00245] Avg episode reward: [(0, '4.585')] +[2024-12-18 20:50:23,819][00245] Fps is (10 sec: 2457.6, 60 sec: 3686.4, 300 sec: 3495.3). Total num frames: 524288. Throughput: 0: 923.6. Samples: 131782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:50:23,825][00245] Avg episode reward: [(0, '4.434')] +[2024-12-18 20:50:25,453][06288] Updated weights for policy 0, policy_version 130 (0.0025) +[2024-12-18 20:50:28,819][00245] Fps is (10 sec: 3278.2, 60 sec: 3686.4, 300 sec: 3514.6). Total num frames: 544768. Throughput: 0: 883.5. Samples: 137250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:50:28,826][00245] Avg episode reward: [(0, '4.535')] +[2024-12-18 20:50:33,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3823.0, 300 sec: 3558.4). Total num frames: 569344. Throughput: 0: 913.0. Samples: 140832. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:50:33,828][00245] Avg episode reward: [(0, '4.606')] +[2024-12-18 20:50:34,463][06288] Updated weights for policy 0, policy_version 140 (0.0014) +[2024-12-18 20:50:38,819][00245] Fps is (10 sec: 4096.1, 60 sec: 3754.7, 300 sec: 3549.9). Total num frames: 585728. Throughput: 0: 940.0. Samples: 147082. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:50:38,825][00245] Avg episode reward: [(0, '4.503')] +[2024-12-18 20:50:43,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3541.8). Total num frames: 602112. Throughput: 0: 882.4. Samples: 151668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:50:43,822][00245] Avg episode reward: [(0, '4.413')] +[2024-12-18 20:50:45,896][06288] Updated weights for policy 0, policy_version 150 (0.0039) +[2024-12-18 20:50:48,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3581.1). Total num frames: 626688. Throughput: 0: 888.3. Samples: 155194. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:50:48,821][00245] Avg episode reward: [(0, '4.427')] +[2024-12-18 20:50:53,820][00245] Fps is (10 sec: 4505.3, 60 sec: 3822.9, 300 sec: 3595.4). Total num frames: 647168. Throughput: 0: 947.2. Samples: 162068. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:50:53,822][00245] Avg episode reward: [(0, '4.456')] +[2024-12-18 20:50:55,899][06288] Updated weights for policy 0, policy_version 160 (0.0017) +[2024-12-18 20:50:58,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3564.6). Total num frames: 659456. Throughput: 0: 899.2. Samples: 166244. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:50:58,821][00245] Avg episode reward: [(0, '4.527')] +[2024-12-18 20:51:03,819][00245] Fps is (10 sec: 3277.0, 60 sec: 3618.1, 300 sec: 3578.6). Total num frames: 679936. Throughput: 0: 914.3. Samples: 168928. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:51:03,821][00245] Avg episode reward: [(0, '4.547')] +[2024-12-18 20:51:08,016][06288] Updated weights for policy 0, policy_version 170 (0.0018) +[2024-12-18 20:51:08,819][00245] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3570.9). Total num frames: 696320. Throughput: 0: 953.2. Samples: 174678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:51:08,824][00245] Avg episode reward: [(0, '4.457')] +[2024-12-18 20:51:13,819][00245] Fps is (10 sec: 2867.3, 60 sec: 3481.6, 300 sec: 3543.0). Total num frames: 708608. Throughput: 0: 911.6. Samples: 178274. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:51:13,821][00245] Avg episode reward: [(0, '4.504')] +[2024-12-18 20:51:18,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3550.1, 300 sec: 3536.5). Total num frames: 724992. Throughput: 0: 876.0. Samples: 180250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:51:18,821][00245] Avg episode reward: [(0, '4.549')] +[2024-12-18 20:51:21,589][06288] Updated weights for policy 0, policy_version 180 (0.0018) +[2024-12-18 20:51:23,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3549.9). Total num frames: 745472. Throughput: 0: 874.8. Samples: 186448. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:51:23,824][00245] Avg episode reward: [(0, '4.622')] +[2024-12-18 20:51:23,833][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000182_745472.pth... +[2024-12-18 20:51:23,960][06275] Saving new best policy, reward=4.622! +[2024-12-18 20:51:28,820][00245] Fps is (10 sec: 4505.0, 60 sec: 3754.6, 300 sec: 3581.6). Total num frames: 770048. Throughput: 0: 924.5. Samples: 193272. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:51:28,823][00245] Avg episode reward: [(0, '4.459')] +[2024-12-18 20:51:31,262][06288] Updated weights for policy 0, policy_version 190 (0.0020) +[2024-12-18 20:51:33,821][00245] Fps is (10 sec: 3685.5, 60 sec: 3549.7, 300 sec: 3556.0). Total num frames: 782336. Throughput: 0: 893.5. Samples: 195404. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:51:33,827][00245] Avg episode reward: [(0, '4.556')] +[2024-12-18 20:51:38,819][00245] Fps is (10 sec: 3277.3, 60 sec: 3618.1, 300 sec: 3568.1). Total num frames: 802816. Throughput: 0: 862.2. Samples: 200866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:51:38,820][00245] Avg episode reward: [(0, '4.764')] +[2024-12-18 20:51:38,823][06275] Saving new best policy, reward=4.764! +[2024-12-18 20:51:41,686][06288] Updated weights for policy 0, policy_version 200 (0.0019) +[2024-12-18 20:51:43,825][00245] Fps is (10 sec: 4503.8, 60 sec: 3754.3, 300 sec: 3597.3). Total num frames: 827392. Throughput: 0: 923.5. Samples: 207808. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:51:43,833][00245] Avg episode reward: [(0, '4.701')] +[2024-12-18 20:51:48,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3590.5). Total num frames: 843776. Throughput: 0: 927.0. Samples: 210644. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:51:48,828][00245] Avg episode reward: [(0, '4.395')] +[2024-12-18 20:51:53,074][06288] Updated weights for policy 0, policy_version 210 (0.0034) +[2024-12-18 20:51:53,819][00245] Fps is (10 sec: 3278.9, 60 sec: 3549.9, 300 sec: 3584.0). Total num frames: 860160. Throughput: 0: 902.1. Samples: 215274. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:51:53,821][00245] Avg episode reward: [(0, '4.444')] +[2024-12-18 20:51:58,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3611.2). Total num frames: 884736. Throughput: 0: 979.3. Samples: 222342. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:51:58,821][00245] Avg episode reward: [(0, '4.504')] +[2024-12-18 20:52:01,716][06288] Updated weights for policy 0, policy_version 220 (0.0014) +[2024-12-18 20:52:03,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3620.9). Total num frames: 905216. Throughput: 0: 1014.0. Samples: 225880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:52:03,821][00245] Avg episode reward: [(0, '4.408')] +[2024-12-18 20:52:08,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3614.1). Total num frames: 921600. Throughput: 0: 978.0. Samples: 230456. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0) +[2024-12-18 20:52:08,825][00245] Avg episode reward: [(0, '4.639')] +[2024-12-18 20:52:13,304][06288] Updated weights for policy 0, policy_version 230 (0.0029) +[2024-12-18 20:52:13,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3623.4). Total num frames: 942080. Throughput: 0: 964.5. Samples: 236674. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:52:13,821][00245] Avg episode reward: [(0, '4.660')] +[2024-12-18 20:52:18,818][00245] Fps is (10 sec: 4505.7, 60 sec: 4027.7, 300 sec: 3647.8). Total num frames: 966656. Throughput: 0: 996.9. Samples: 240260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:52:18,826][00245] Avg episode reward: [(0, '4.718')] +[2024-12-18 20:52:23,358][06288] Updated weights for policy 0, policy_version 240 (0.0023) +[2024-12-18 20:52:23,819][00245] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3640.9). Total num frames: 983040. Throughput: 0: 1001.0. Samples: 245910. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:52:23,820][00245] Avg episode reward: [(0, '4.835')] +[2024-12-18 20:52:23,826][06275] Saving new best policy, reward=4.835! +[2024-12-18 20:52:28,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3634.3). Total num frames: 999424. Throughput: 0: 963.3. Samples: 251152. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:52:28,825][00245] Avg episode reward: [(0, '4.789')] +[2024-12-18 20:52:33,308][06288] Updated weights for policy 0, policy_version 250 (0.0015) +[2024-12-18 20:52:33,819][00245] Fps is (10 sec: 4096.0, 60 sec: 4027.9, 300 sec: 3657.1). Total num frames: 1024000. Throughput: 0: 977.1. Samples: 254612. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:52:33,826][00245] Avg episode reward: [(0, '4.569')] +[2024-12-18 20:52:38,818][00245] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3664.8). Total num frames: 1044480. Throughput: 0: 1017.1. Samples: 261042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:52:38,828][00245] Avg episode reward: [(0, '4.482')] +[2024-12-18 20:52:43,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3823.3, 300 sec: 3644.0). Total num frames: 1056768. Throughput: 0: 956.7. Samples: 265394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:52:43,824][00245] Avg episode reward: [(0, '4.617')] +[2024-12-18 20:52:45,091][06288] Updated weights for policy 0, policy_version 260 (0.0018) +[2024-12-18 20:52:48,818][00245] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3665.6). Total num frames: 1081344. Throughput: 0: 955.7. Samples: 268886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:52:48,821][00245] Avg episode reward: [(0, '4.806')] +[2024-12-18 20:52:53,819][00245] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3735.0). Total num frames: 1101824. Throughput: 0: 1009.2. Samples: 275870. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:52:53,826][00245] Avg episode reward: [(0, '4.662')] +[2024-12-18 20:52:53,840][06288] Updated weights for policy 0, policy_version 270 (0.0018) +[2024-12-18 20:52:58,820][00245] Fps is (10 sec: 3685.9, 60 sec: 3891.1, 300 sec: 3790.5). Total num frames: 1118208. Throughput: 0: 977.8. Samples: 280678. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:52:58,822][00245] Avg episode reward: [(0, '4.759')] +[2024-12-18 20:53:03,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 1138688. Throughput: 0: 955.8. Samples: 283270. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:03,821][00245] Avg episode reward: [(0, '4.837')] +[2024-12-18 20:53:03,833][06275] Saving new best policy, reward=4.837! +[2024-12-18 20:53:05,291][06288] Updated weights for policy 0, policy_version 280 (0.0027) +[2024-12-18 20:53:08,818][00245] Fps is (10 sec: 4506.3, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 1163264. Throughput: 0: 985.7. Samples: 290266. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-18 20:53:08,823][00245] Avg episode reward: [(0, '4.704')] +[2024-12-18 20:53:13,819][00245] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 1179648. Throughput: 0: 991.1. Samples: 295752. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:13,822][00245] Avg episode reward: [(0, '4.513')] +[2024-12-18 20:53:16,451][06288] Updated weights for policy 0, policy_version 290 (0.0023) +[2024-12-18 20:53:18,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 1196032. Throughput: 0: 961.4. Samples: 297874. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:18,824][00245] Avg episode reward: [(0, '4.566')] +[2024-12-18 20:53:23,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 1220608. Throughput: 0: 966.1. Samples: 304516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:23,820][00245] Avg episode reward: [(0, '5.036')] +[2024-12-18 20:53:23,828][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000298_1220608.pth... +[2024-12-18 20:53:23,964][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000074_303104.pth +[2024-12-18 20:53:23,975][06275] Saving new best policy, reward=5.036! +[2024-12-18 20:53:25,790][06288] Updated weights for policy 0, policy_version 300 (0.0025) +[2024-12-18 20:53:28,819][00245] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 1236992. Throughput: 0: 1010.0. Samples: 310844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:53:28,825][00245] Avg episode reward: [(0, '5.007')] +[2024-12-18 20:53:33,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 1253376. Throughput: 0: 975.8. Samples: 312798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:33,821][00245] Avg episode reward: [(0, '4.906')] +[2024-12-18 20:53:38,819][00245] Fps is (10 sec: 2457.6, 60 sec: 3618.1, 300 sec: 3804.5). Total num frames: 1261568. Throughput: 0: 896.0. Samples: 316188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:53:38,824][00245] Avg episode reward: [(0, '5.108')] +[2024-12-18 20:53:38,941][06275] Saving new best policy, reward=5.108! +[2024-12-18 20:53:40,261][06288] Updated weights for policy 0, policy_version 310 (0.0033) +[2024-12-18 20:53:43,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3790.5). Total num frames: 1282048. Throughput: 0: 921.3. Samples: 322134. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:53:43,821][00245] Avg episode reward: [(0, '4.861')] +[2024-12-18 20:53:48,823][00245] Fps is (10 sec: 4094.2, 60 sec: 3686.1, 300 sec: 3776.6). Total num frames: 1302528. Throughput: 0: 941.7. Samples: 325650. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:53:48,825][00245] Avg episode reward: [(0, '4.751')] +[2024-12-18 20:53:50,446][06288] Updated weights for policy 0, policy_version 320 (0.0021) +[2024-12-18 20:53:53,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 1318912. Throughput: 0: 885.7. Samples: 330122. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:53:53,821][00245] Avg episode reward: [(0, '4.940')] +[2024-12-18 20:53:58,819][00245] Fps is (10 sec: 4097.8, 60 sec: 3754.8, 300 sec: 3804.4). Total num frames: 1343488. Throughput: 0: 906.8. Samples: 336556. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:53:58,825][00245] Avg episode reward: [(0, '4.814')] +[2024-12-18 20:54:00,395][06288] Updated weights for policy 0, policy_version 330 (0.0022) +[2024-12-18 20:54:03,819][00245] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 1368064. Throughput: 0: 939.4. Samples: 340146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:54:03,824][00245] Avg episode reward: [(0, '4.577')] +[2024-12-18 20:54:08,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3790.5). Total num frames: 1380352. Throughput: 0: 912.8. Samples: 345590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:54:08,823][00245] Avg episode reward: [(0, '4.707')] +[2024-12-18 20:54:11,951][06288] Updated weights for policy 0, policy_version 340 (0.0038) +[2024-12-18 20:54:13,822][00245] Fps is (10 sec: 2866.1, 60 sec: 3617.9, 300 sec: 3790.5). Total num frames: 1396736. Throughput: 0: 892.2. Samples: 350996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:54:13,827][00245] Avg episode reward: [(0, '4.577')] +[2024-12-18 20:54:18,818][00245] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3804.4). Total num frames: 1425408. Throughput: 0: 925.9. Samples: 354464. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:54:18,821][00245] Avg episode reward: [(0, '4.456')] +[2024-12-18 20:54:20,463][06288] Updated weights for policy 0, policy_version 350 (0.0014) +[2024-12-18 20:54:23,821][00245] Fps is (10 sec: 4506.2, 60 sec: 3686.3, 300 sec: 3790.5). Total num frames: 1441792. Throughput: 0: 994.2. Samples: 360930. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:54:23,823][00245] Avg episode reward: [(0, '4.570')] +[2024-12-18 20:54:28,818][00245] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3776.7). Total num frames: 1454080. Throughput: 0: 940.9. Samples: 364476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:54:28,822][00245] Avg episode reward: [(0, '4.611')] +[2024-12-18 20:54:33,819][00245] Fps is (10 sec: 2867.9, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 1470464. Throughput: 0: 906.6. Samples: 366444. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:54:33,824][00245] Avg episode reward: [(0, '4.560')] +[2024-12-18 20:54:34,431][06288] Updated weights for policy 0, policy_version 360 (0.0054) +[2024-12-18 20:54:38,823][00245] Fps is (10 sec: 4094.2, 60 sec: 3890.9, 300 sec: 3762.7). Total num frames: 1495040. Throughput: 0: 954.7. Samples: 373086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:54:38,830][00245] Avg episode reward: [(0, '4.654')] +[2024-12-18 20:54:43,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1511424. Throughput: 0: 938.7. Samples: 378796. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:54:43,821][00245] Avg episode reward: [(0, '4.686')] +[2024-12-18 20:54:44,649][06288] Updated weights for policy 0, policy_version 370 (0.0028) +[2024-12-18 20:54:48,819][00245] Fps is (10 sec: 3278.2, 60 sec: 3754.9, 300 sec: 3762.8). Total num frames: 1527808. Throughput: 0: 906.4. Samples: 380936. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:54:48,824][00245] Avg episode reward: [(0, '4.628')] +[2024-12-18 20:54:53,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 1548288. Throughput: 0: 927.4. Samples: 387324. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:54:53,821][00245] Avg episode reward: [(0, '4.548')] +[2024-12-18 20:54:54,869][06288] Updated weights for policy 0, policy_version 380 (0.0013) +[2024-12-18 20:54:58,819][00245] Fps is (10 sec: 4505.5, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1572864. Throughput: 0: 957.5. Samples: 394082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:54:58,823][00245] Avg episode reward: [(0, '4.389')] +[2024-12-18 20:55:03,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3748.9). Total num frames: 1585152. Throughput: 0: 926.7. Samples: 396164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:55:03,824][00245] Avg episode reward: [(0, '4.419')] +[2024-12-18 20:55:06,380][06288] Updated weights for policy 0, policy_version 390 (0.0034) +[2024-12-18 20:55:08,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3748.9). Total num frames: 1605632. Throughput: 0: 903.3. Samples: 401576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:55:08,821][00245] Avg episode reward: [(0, '4.505')] +[2024-12-18 20:55:13,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3891.4, 300 sec: 3790.6). Total num frames: 1630208. Throughput: 0: 973.3. Samples: 408276. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:55:13,821][00245] Avg episode reward: [(0, '4.618')] +[2024-12-18 20:55:15,777][06288] Updated weights for policy 0, policy_version 400 (0.0027) +[2024-12-18 20:55:18,821][00245] Fps is (10 sec: 3685.5, 60 sec: 3618.0, 300 sec: 3790.5). Total num frames: 1642496. Throughput: 0: 987.0. Samples: 410860. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:55:18,826][00245] Avg episode reward: [(0, '4.625')] +[2024-12-18 20:55:23,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3618.3, 300 sec: 3776.7). Total num frames: 1658880. Throughput: 0: 931.2. Samples: 414984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:55:23,825][00245] Avg episode reward: [(0, '4.702')] +[2024-12-18 20:55:23,832][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000405_1658880.pth... +[2024-12-18 20:55:23,951][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000182_745472.pth +[2024-12-18 20:55:27,802][06288] Updated weights for policy 0, policy_version 410 (0.0016) +[2024-12-18 20:55:28,819][00245] Fps is (10 sec: 4097.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 1683456. Throughput: 0: 952.1. Samples: 421640. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:55:28,825][00245] Avg episode reward: [(0, '4.610')] +[2024-12-18 20:55:33,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.6). Total num frames: 1699840. Throughput: 0: 974.1. Samples: 424770. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-18 20:55:33,824][00245] Avg episode reward: [(0, '4.552')] +[2024-12-18 20:55:38,819][00245] Fps is (10 sec: 3276.6, 60 sec: 3686.6, 300 sec: 3776.6). Total num frames: 1716224. Throughput: 0: 925.1. Samples: 428956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:55:38,824][00245] Avg episode reward: [(0, '4.617')] +[2024-12-18 20:55:39,905][06288] Updated weights for policy 0, policy_version 420 (0.0041) +[2024-12-18 20:55:43,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 1736704. Throughput: 0: 906.5. Samples: 434876. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:55:43,821][00245] Avg episode reward: [(0, '4.726')] +[2024-12-18 20:55:48,819][00245] Fps is (10 sec: 4096.2, 60 sec: 3822.9, 300 sec: 3762.8). Total num frames: 1757184. Throughput: 0: 935.0. Samples: 438238. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:55:48,823][00245] Avg episode reward: [(0, '4.869')] +[2024-12-18 20:55:49,005][06288] Updated weights for policy 0, policy_version 430 (0.0017) +[2024-12-18 20:55:53,820][00245] Fps is (10 sec: 3685.8, 60 sec: 3754.6, 300 sec: 3776.6). Total num frames: 1773568. Throughput: 0: 930.2. Samples: 443438. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-12-18 20:55:53,823][00245] Avg episode reward: [(0, '4.806')] +[2024-12-18 20:55:58,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 1789952. Throughput: 0: 890.7. Samples: 448358. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:55:58,821][00245] Avg episode reward: [(0, '4.773')] +[2024-12-18 20:56:01,279][06288] Updated weights for policy 0, policy_version 440 (0.0019) +[2024-12-18 20:56:03,819][00245] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 1810432. Throughput: 0: 905.2. Samples: 451590. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:56:03,825][00245] Avg episode reward: [(0, '4.744')] +[2024-12-18 20:56:08,821][00245] Fps is (10 sec: 4095.0, 60 sec: 3754.5, 300 sec: 3804.4). Total num frames: 1830912. Throughput: 0: 950.9. Samples: 457778. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:56:08,824][00245] Avg episode reward: [(0, '4.606')] +[2024-12-18 20:56:13,293][06288] Updated weights for policy 0, policy_version 450 (0.0015) +[2024-12-18 20:56:13,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3790.5). Total num frames: 1843200. Throughput: 0: 889.6. Samples: 461674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:56:13,821][00245] Avg episode reward: [(0, '4.502')] +[2024-12-18 20:56:18,819][00245] Fps is (10 sec: 3277.6, 60 sec: 3686.5, 300 sec: 3790.5). Total num frames: 1863680. Throughput: 0: 892.0. Samples: 464912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:56:18,821][00245] Avg episode reward: [(0, '4.643')] +[2024-12-18 20:56:22,985][06288] Updated weights for policy 0, policy_version 460 (0.0018) +[2024-12-18 20:56:23,820][00245] Fps is (10 sec: 4095.3, 60 sec: 3754.6, 300 sec: 3776.6). Total num frames: 1884160. Throughput: 0: 943.8. Samples: 471430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:56:23,827][00245] Avg episode reward: [(0, '4.598')] +[2024-12-18 20:56:28,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3790.6). Total num frames: 1900544. Throughput: 0: 906.8. Samples: 475684. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:56:28,830][00245] Avg episode reward: [(0, '4.610')] +[2024-12-18 20:56:33,819][00245] Fps is (10 sec: 3277.3, 60 sec: 3618.1, 300 sec: 3776.7). Total num frames: 1916928. Throughput: 0: 884.8. Samples: 478054. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:56:33,821][00245] Avg episode reward: [(0, '4.732')] +[2024-12-18 20:56:35,368][06288] Updated weights for policy 0, policy_version 470 (0.0019) +[2024-12-18 20:56:38,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 1937408. Throughput: 0: 908.7. Samples: 484328. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:56:38,821][00245] Avg episode reward: [(0, '4.952')] +[2024-12-18 20:56:43,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 1953792. Throughput: 0: 916.1. Samples: 489582. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:56:43,821][00245] Avg episode reward: [(0, '4.838')] +[2024-12-18 20:56:47,789][06288] Updated weights for policy 0, policy_version 480 (0.0039) +[2024-12-18 20:56:48,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3748.9). Total num frames: 1966080. Throughput: 0: 884.8. Samples: 491406. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:56:48,823][00245] Avg episode reward: [(0, '4.602')] +[2024-12-18 20:56:53,819][00245] Fps is (10 sec: 2457.5, 60 sec: 3413.4, 300 sec: 3707.2). Total num frames: 1978368. Throughput: 0: 825.9. Samples: 494944. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:56:53,829][00245] Avg episode reward: [(0, '4.519')] +[2024-12-18 20:56:58,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3707.2). Total num frames: 1998848. Throughput: 0: 868.5. Samples: 500756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:56:58,825][00245] Avg episode reward: [(0, '4.478')] +[2024-12-18 20:57:00,578][06288] Updated weights for policy 0, policy_version 490 (0.0030) +[2024-12-18 20:57:03,819][00245] Fps is (10 sec: 3686.6, 60 sec: 3413.3, 300 sec: 3707.2). Total num frames: 2015232. Throughput: 0: 851.9. Samples: 503246. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:57:03,821][00245] Avg episode reward: [(0, '4.411')] +[2024-12-18 20:57:08,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3345.2, 300 sec: 3693.3). Total num frames: 2031616. Throughput: 0: 801.7. Samples: 507504. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:57:08,821][00245] Avg episode reward: [(0, '4.493')] +[2024-12-18 20:57:12,474][06288] Updated weights for policy 0, policy_version 500 (0.0016) +[2024-12-18 20:57:13,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3679.5). Total num frames: 2052096. Throughput: 0: 848.4. Samples: 513862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:57:13,821][00245] Avg episode reward: [(0, '4.712')] +[2024-12-18 20:57:18,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 3679.5). Total num frames: 2068480. Throughput: 0: 867.4. Samples: 517086. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:57:18,822][00245] Avg episode reward: [(0, '4.760')] +[2024-12-18 20:57:23,820][00245] Fps is (10 sec: 3276.3, 60 sec: 3345.1, 300 sec: 3679.4). Total num frames: 2084864. Throughput: 0: 815.8. Samples: 521042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:57:23,823][00245] Avg episode reward: [(0, '4.799')] +[2024-12-18 20:57:23,839][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000509_2084864.pth... +[2024-12-18 20:57:24,044][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000298_1220608.pth +[2024-12-18 20:57:24,914][06288] Updated weights for policy 0, policy_version 510 (0.0024) +[2024-12-18 20:57:28,819][00245] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3665.6). Total num frames: 2105344. Throughput: 0: 829.5. Samples: 526910. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:57:28,824][00245] Avg episode reward: [(0, '4.746')] +[2024-12-18 20:57:33,819][00245] Fps is (10 sec: 4096.6, 60 sec: 3481.6, 300 sec: 3665.6). Total num frames: 2125824. Throughput: 0: 861.8. Samples: 530188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:57:33,826][00245] Avg episode reward: [(0, '4.715')] +[2024-12-18 20:57:34,766][06288] Updated weights for policy 0, policy_version 520 (0.0015) +[2024-12-18 20:57:38,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3665.6). Total num frames: 2138112. Throughput: 0: 889.8. Samples: 534984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:57:38,821][00245] Avg episode reward: [(0, '4.731')] +[2024-12-18 20:57:43,819][00245] Fps is (10 sec: 2048.0, 60 sec: 3208.5, 300 sec: 3610.0). Total num frames: 2146304. Throughput: 0: 827.6. Samples: 537996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:57:43,822][00245] Avg episode reward: [(0, '4.586')] +[2024-12-18 20:57:48,819][00245] Fps is (10 sec: 2457.6, 60 sec: 3276.8, 300 sec: 3596.1). Total num frames: 2162688. Throughput: 0: 813.8. Samples: 539866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:57:48,821][00245] Avg episode reward: [(0, '4.410')] +[2024-12-18 20:57:50,051][06288] Updated weights for policy 0, policy_version 530 (0.0036) +[2024-12-18 20:57:53,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3413.4, 300 sec: 3610.1). Total num frames: 2183168. Throughput: 0: 860.2. Samples: 546214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:57:53,821][00245] Avg episode reward: [(0, '4.476')] +[2024-12-18 20:57:58,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3596.2). Total num frames: 2199552. Throughput: 0: 813.7. Samples: 550480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:57:58,821][00245] Avg episode reward: [(0, '4.509')] +[2024-12-18 20:58:02,491][06288] Updated weights for policy 0, policy_version 540 (0.0052) +[2024-12-18 20:58:03,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3568.4). Total num frames: 2215936. Throughput: 0: 792.0. Samples: 552726. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:58:03,821][00245] Avg episode reward: [(0, '4.521')] +[2024-12-18 20:58:08,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3582.3). Total num frames: 2236416. Throughput: 0: 848.9. Samples: 559240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:58:08,822][00245] Avg episode reward: [(0, '4.685')] +[2024-12-18 20:58:12,459][06288] Updated weights for policy 0, policy_version 550 (0.0018) +[2024-12-18 20:58:13,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3582.3). Total num frames: 2252800. Throughput: 0: 836.8. Samples: 564568. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:58:13,821][00245] Avg episode reward: [(0, '4.652')] +[2024-12-18 20:58:18,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3554.5). Total num frames: 2269184. Throughput: 0: 806.6. Samples: 566484. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:58:18,823][00245] Avg episode reward: [(0, '4.660')] +[2024-12-18 20:58:23,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3413.4, 300 sec: 3568.4). Total num frames: 2289664. Throughput: 0: 826.3. Samples: 572166. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:58:23,825][00245] Avg episode reward: [(0, '4.690')] +[2024-12-18 20:58:24,308][06288] Updated weights for policy 0, policy_version 560 (0.0028) +[2024-12-18 20:58:28,820][00245] Fps is (10 sec: 4095.4, 60 sec: 3413.3, 300 sec: 3582.2). Total num frames: 2310144. Throughput: 0: 906.0. Samples: 578768. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 20:58:28,822][00245] Avg episode reward: [(0, '4.897')] +[2024-12-18 20:58:33,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3345.0, 300 sec: 3610.0). Total num frames: 2326528. Throughput: 0: 908.4. Samples: 580744. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:58:33,821][00245] Avg episode reward: [(0, '4.893')] +[2024-12-18 20:58:36,451][06288] Updated weights for policy 0, policy_version 570 (0.0018) +[2024-12-18 20:58:38,819][00245] Fps is (10 sec: 3277.3, 60 sec: 3413.3, 300 sec: 3596.1). Total num frames: 2342912. Throughput: 0: 877.3. Samples: 585694. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 20:58:38,826][00245] Avg episode reward: [(0, '4.745')] +[2024-12-18 20:58:43,819][00245] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 2363392. Throughput: 0: 930.0. Samples: 592332. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:58:43,825][00245] Avg episode reward: [(0, '4.674')] +[2024-12-18 20:58:46,168][06288] Updated weights for policy 0, policy_version 580 (0.0014) +[2024-12-18 20:58:48,820][00245] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3596.1). Total num frames: 2379776. Throughput: 0: 940.9. Samples: 595068. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-18 20:58:48,827][00245] Avg episode reward: [(0, '4.615')] +[2024-12-18 20:58:53,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 2396160. Throughput: 0: 884.6. Samples: 599048. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:58:53,822][00245] Avg episode reward: [(0, '4.587')] +[2024-12-18 20:58:58,254][06288] Updated weights for policy 0, policy_version 590 (0.0032) +[2024-12-18 20:58:58,819][00245] Fps is (10 sec: 3687.0, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 2416640. Throughput: 0: 910.7. Samples: 605548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:58:58,825][00245] Avg episode reward: [(0, '4.452')] +[2024-12-18 20:59:03,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 2437120. Throughput: 0: 938.6. Samples: 608720. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 20:59:03,825][00245] Avg episode reward: [(0, '4.574')] +[2024-12-18 20:59:08,825][00245] Fps is (10 sec: 3274.7, 60 sec: 3549.5, 300 sec: 3568.3). Total num frames: 2449408. Throughput: 0: 906.3. Samples: 612954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:59:08,827][00245] Avg episode reward: [(0, '4.650')] +[2024-12-18 20:59:10,608][06288] Updated weights for policy 0, policy_version 600 (0.0015) +[2024-12-18 20:59:13,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 2469888. Throughput: 0: 881.1. Samples: 618418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:59:13,822][00245] Avg episode reward: [(0, '4.771')] +[2024-12-18 20:59:18,818][00245] Fps is (10 sec: 4098.6, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 2490368. Throughput: 0: 909.5. Samples: 621670. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:59:18,820][00245] Avg episode reward: [(0, '4.765')] +[2024-12-18 20:59:19,910][06288] Updated weights for policy 0, policy_version 610 (0.0028) +[2024-12-18 20:59:23,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 2506752. Throughput: 0: 927.9. Samples: 627448. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:59:23,822][00245] Avg episode reward: [(0, '4.677')] +[2024-12-18 20:59:23,835][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000612_2506752.pth... +[2024-12-18 20:59:23,979][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000405_1658880.pth +[2024-12-18 20:59:28,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3568.4). Total num frames: 2523136. Throughput: 0: 882.4. Samples: 632042. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:59:28,821][00245] Avg episode reward: [(0, '4.732')] +[2024-12-18 20:59:31,692][06288] Updated weights for policy 0, policy_version 620 (0.0019) +[2024-12-18 20:59:33,820][00245] Fps is (10 sec: 4095.6, 60 sec: 3686.3, 300 sec: 3568.4). Total num frames: 2547712. Throughput: 0: 897.6. Samples: 635460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:59:33,823][00245] Avg episode reward: [(0, '4.860')] +[2024-12-18 20:59:38,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3582.3). Total num frames: 2568192. Throughput: 0: 958.1. Samples: 642164. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 20:59:38,821][00245] Avg episode reward: [(0, '4.779')] +[2024-12-18 20:59:43,134][06288] Updated weights for policy 0, policy_version 630 (0.0014) +[2024-12-18 20:59:43,819][00245] Fps is (10 sec: 3277.3, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 2580480. Throughput: 0: 906.7. Samples: 646350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:59:43,822][00245] Avg episode reward: [(0, '4.670')] +[2024-12-18 20:59:48,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3568.4). Total num frames: 2600960. Throughput: 0: 899.8. Samples: 649210. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 20:59:48,821][00245] Avg episode reward: [(0, '4.817')] +[2024-12-18 20:59:53,157][06288] Updated weights for policy 0, policy_version 640 (0.0038) +[2024-12-18 20:59:53,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3554.5). Total num frames: 2621440. Throughput: 0: 948.4. Samples: 655628. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 20:59:53,826][00245] Avg episode reward: [(0, '4.682')] +[2024-12-18 20:59:58,818][00245] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 2637824. Throughput: 0: 935.6. Samples: 660520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 20:59:58,822][00245] Avg episode reward: [(0, '4.745')] +[2024-12-18 21:00:03,822][00245] Fps is (10 sec: 2866.2, 60 sec: 3549.7, 300 sec: 3540.6). Total num frames: 2650112. Throughput: 0: 907.4. Samples: 662504. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:00:03,825][00245] Avg episode reward: [(0, '4.711')] +[2024-12-18 21:00:07,330][06288] Updated weights for policy 0, policy_version 650 (0.0032) +[2024-12-18 21:00:08,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3618.5, 300 sec: 3512.8). Total num frames: 2666496. Throughput: 0: 869.8. Samples: 666590. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:00:08,821][00245] Avg episode reward: [(0, '4.606')] +[2024-12-18 21:00:13,819][00245] Fps is (10 sec: 3277.9, 60 sec: 3549.9, 300 sec: 3526.8). Total num frames: 2682880. Throughput: 0: 890.8. Samples: 672126. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:00:13,825][00245] Avg episode reward: [(0, '4.859')] +[2024-12-18 21:00:18,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2699264. Throughput: 0: 859.7. Samples: 674144. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:00:18,826][00245] Avg episode reward: [(0, '4.967')] +[2024-12-18 21:00:19,824][06288] Updated weights for policy 0, policy_version 660 (0.0034) +[2024-12-18 21:00:23,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 2719744. Throughput: 0: 838.2. Samples: 679884. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:00:23,821][00245] Avg episode reward: [(0, '4.885')] +[2024-12-18 21:00:28,790][06288] Updated weights for policy 0, policy_version 670 (0.0017) +[2024-12-18 21:00:28,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 2744320. Throughput: 0: 896.2. Samples: 686680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:00:28,821][00245] Avg episode reward: [(0, '4.958')] +[2024-12-18 21:00:33,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3481.7, 300 sec: 3526.7). Total num frames: 2756608. Throughput: 0: 883.7. Samples: 688976. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:00:33,830][00245] Avg episode reward: [(0, '4.968')] +[2024-12-18 21:00:38,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 2772992. Throughput: 0: 844.5. Samples: 693630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:00:38,821][00245] Avg episode reward: [(0, '5.194')] +[2024-12-18 21:00:38,859][06275] Saving new best policy, reward=5.194! +[2024-12-18 21:00:40,831][06288] Updated weights for policy 0, policy_version 680 (0.0045) +[2024-12-18 21:00:43,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2797568. Throughput: 0: 885.0. Samples: 700344. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 21:00:43,824][00245] Avg episode reward: [(0, '5.176')] +[2024-12-18 21:00:48,819][00245] Fps is (10 sec: 4095.9, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 2813952. Throughput: 0: 912.1. Samples: 703544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:00:48,823][00245] Avg episode reward: [(0, '4.929')] +[2024-12-18 21:00:52,243][06288] Updated weights for policy 0, policy_version 690 (0.0029) +[2024-12-18 21:00:53,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2830336. Throughput: 0: 909.7. Samples: 707528. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2024-12-18 21:00:53,823][00245] Avg episode reward: [(0, '4.905')] +[2024-12-18 21:00:58,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 2842624. Throughput: 0: 878.4. Samples: 711652. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 21:00:58,826][00245] Avg episode reward: [(0, '4.939')] +[2024-12-18 21:01:03,819][00245] Fps is (10 sec: 2867.1, 60 sec: 3481.8, 300 sec: 3485.1). Total num frames: 2859008. Throughput: 0: 886.6. Samples: 714040. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:03,825][00245] Avg episode reward: [(0, '4.853')] +[2024-12-18 21:01:05,291][06288] Updated weights for policy 0, policy_version 700 (0.0016) +[2024-12-18 21:01:08,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3499.0). Total num frames: 2875392. Throughput: 0: 872.3. Samples: 719138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:08,825][00245] Avg episode reward: [(0, '4.889')] +[2024-12-18 21:01:13,819][00245] Fps is (10 sec: 3686.3, 60 sec: 3549.8, 300 sec: 3499.0). Total num frames: 2895872. Throughput: 0: 842.4. Samples: 724588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:13,821][00245] Avg episode reward: [(0, '5.133')] +[2024-12-18 21:01:16,239][06288] Updated weights for policy 0, policy_version 710 (0.0016) +[2024-12-18 21:01:18,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3499.0). Total num frames: 2916352. Throughput: 0: 870.9. Samples: 728166. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:18,826][00245] Avg episode reward: [(0, '5.277')] +[2024-12-18 21:01:18,829][06275] Saving new best policy, reward=5.277! +[2024-12-18 21:01:23,819][00245] Fps is (10 sec: 4096.2, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 2936832. Throughput: 0: 901.2. Samples: 734186. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:23,824][00245] Avg episode reward: [(0, '5.430')] +[2024-12-18 21:01:23,831][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000717_2936832.pth... +[2024-12-18 21:01:24,004][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000509_2084864.pth +[2024-12-18 21:01:24,022][06275] Saving new best policy, reward=5.430! +[2024-12-18 21:01:28,326][06288] Updated weights for policy 0, policy_version 720 (0.0033) +[2024-12-18 21:01:28,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 2949120. Throughput: 0: 845.3. Samples: 738384. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:28,831][00245] Avg episode reward: [(0, '5.161')] +[2024-12-18 21:01:33,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2969600. Throughput: 0: 850.2. Samples: 741804. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:01:33,821][00245] Avg episode reward: [(0, '5.258')] +[2024-12-18 21:01:37,604][06288] Updated weights for policy 0, policy_version 730 (0.0019) +[2024-12-18 21:01:38,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3526.7). Total num frames: 2994176. Throughput: 0: 906.8. Samples: 748332. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:01:38,823][00245] Avg episode reward: [(0, '5.302')] +[2024-12-18 21:01:43,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 3006464. Throughput: 0: 909.2. Samples: 752566. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:01:43,822][00245] Avg episode reward: [(0, '5.389')] +[2024-12-18 21:01:48,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3022848. Throughput: 0: 911.4. Samples: 755052. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-18 21:01:48,828][00245] Avg episode reward: [(0, '5.333')] +[2024-12-18 21:01:49,706][06288] Updated weights for policy 0, policy_version 740 (0.0023) +[2024-12-18 21:01:53,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 3043328. Throughput: 0: 940.8. Samples: 761472. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-12-18 21:01:53,826][00245] Avg episode reward: [(0, '5.304')] +[2024-12-18 21:01:58,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 3051520. Throughput: 0: 884.8. Samples: 764402. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:01:58,828][00245] Avg episode reward: [(0, '5.531')] +[2024-12-18 21:01:58,944][06275] Saving new best policy, reward=5.531! +[2024-12-18 21:02:03,819][00245] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 3067904. Throughput: 0: 845.9. Samples: 766232. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 21:02:03,821][00245] Avg episode reward: [(0, '5.357')] +[2024-12-18 21:02:04,060][06288] Updated weights for policy 0, policy_version 750 (0.0021) +[2024-12-18 21:02:08,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 3092480. Throughput: 0: 847.8. Samples: 772336. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:02:08,821][00245] Avg episode reward: [(0, '5.884')] +[2024-12-18 21:02:08,825][06275] Saving new best policy, reward=5.884! +[2024-12-18 21:02:13,819][00245] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 3108864. Throughput: 0: 886.2. Samples: 778264. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:02:13,821][00245] Avg episode reward: [(0, '5.850')] +[2024-12-18 21:02:14,642][06288] Updated weights for policy 0, policy_version 760 (0.0021) +[2024-12-18 21:02:18,820][00245] Fps is (10 sec: 2866.7, 60 sec: 3413.2, 300 sec: 3512.8). Total num frames: 3121152. Throughput: 0: 852.1. Samples: 780148. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:02:18,829][00245] Avg episode reward: [(0, '5.438')] +[2024-12-18 21:02:23,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 3141632. Throughput: 0: 820.2. Samples: 785242. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:02:23,824][00245] Avg episode reward: [(0, '5.491')] +[2024-12-18 21:02:26,459][06288] Updated weights for policy 0, policy_version 770 (0.0031) +[2024-12-18 21:02:28,819][00245] Fps is (10 sec: 4096.7, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 3162112. Throughput: 0: 867.1. Samples: 791586. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:02:28,824][00245] Avg episode reward: [(0, '5.354')] +[2024-12-18 21:02:33,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 3178496. Throughput: 0: 864.8. Samples: 793966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:02:33,824][00245] Avg episode reward: [(0, '5.131')] +[2024-12-18 21:02:38,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3540.6). Total num frames: 3190784. Throughput: 0: 816.4. Samples: 798212. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:02:38,825][00245] Avg episode reward: [(0, '5.083')] +[2024-12-18 21:02:38,832][06288] Updated weights for policy 0, policy_version 780 (0.0040) +[2024-12-18 21:02:43,819][00245] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 3215360. Throughput: 0: 894.1. Samples: 804638. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:02:43,825][00245] Avg episode reward: [(0, '4.599')] +[2024-12-18 21:02:48,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 3231744. Throughput: 0: 923.2. Samples: 807774. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:02:48,825][00245] Avg episode reward: [(0, '4.690')] +[2024-12-18 21:02:49,405][06288] Updated weights for policy 0, policy_version 790 (0.0019) +[2024-12-18 21:02:53,820][00245] Fps is (10 sec: 2866.8, 60 sec: 3345.0, 300 sec: 3540.6). Total num frames: 3244032. Throughput: 0: 869.8. Samples: 811476. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:02:53,824][00245] Avg episode reward: [(0, '4.730')] +[2024-12-18 21:02:58,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 3264512. Throughput: 0: 864.5. Samples: 817166. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:02:58,821][00245] Avg episode reward: [(0, '5.004')] +[2024-12-18 21:03:01,187][06288] Updated weights for policy 0, policy_version 800 (0.0016) +[2024-12-18 21:03:03,819][00245] Fps is (10 sec: 4096.5, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 3284992. Throughput: 0: 892.7. Samples: 820320. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 21:03:03,822][00245] Avg episode reward: [(0, '4.833')] +[2024-12-18 21:03:08,822][00245] Fps is (10 sec: 3685.0, 60 sec: 3481.4, 300 sec: 3554.5). Total num frames: 3301376. Throughput: 0: 887.7. Samples: 825190. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-12-18 21:03:08,827][00245] Avg episode reward: [(0, '4.792')] +[2024-12-18 21:03:13,442][06288] Updated weights for policy 0, policy_version 810 (0.0028) +[2024-12-18 21:03:13,822][00245] Fps is (10 sec: 3275.7, 60 sec: 3481.4, 300 sec: 3554.5). Total num frames: 3317760. Throughput: 0: 859.1. Samples: 830248. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-12-18 21:03:13,825][00245] Avg episode reward: [(0, '4.808')] +[2024-12-18 21:03:18,819][00245] Fps is (10 sec: 3278.0, 60 sec: 3550.0, 300 sec: 3540.6). Total num frames: 3334144. Throughput: 0: 865.6. Samples: 832916. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:03:18,820][00245] Avg episode reward: [(0, '4.814')] +[2024-12-18 21:03:23,819][00245] Fps is (10 sec: 2868.2, 60 sec: 3413.3, 300 sec: 3512.9). Total num frames: 3346432. Throughput: 0: 857.2. Samples: 836788. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:03:23,826][00245] Avg episode reward: [(0, '4.709')] +[2024-12-18 21:03:23,842][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000817_3346432.pth... +[2024-12-18 21:03:24,015][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000612_2506752.pth +[2024-12-18 21:03:28,118][06288] Updated weights for policy 0, policy_version 820 (0.0029) +[2024-12-18 21:03:28,819][00245] Fps is (10 sec: 2457.6, 60 sec: 3276.8, 300 sec: 3499.0). Total num frames: 3358720. Throughput: 0: 801.9. Samples: 840724. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:03:28,826][00245] Avg episode reward: [(0, '4.764')] +[2024-12-18 21:03:33,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3345.1, 300 sec: 3512.8). Total num frames: 3379200. Throughput: 0: 801.8. Samples: 843854. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 21:03:33,821][00245] Avg episode reward: [(0, '4.600')] +[2024-12-18 21:03:38,070][06288] Updated weights for policy 0, policy_version 830 (0.0021) +[2024-12-18 21:03:38,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 3399680. Throughput: 0: 860.6. Samples: 850204. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:03:38,821][00245] Avg episode reward: [(0, '4.443')] +[2024-12-18 21:03:43,819][00245] Fps is (10 sec: 3686.5, 60 sec: 3345.1, 300 sec: 3512.9). Total num frames: 3416064. Throughput: 0: 840.9. Samples: 855008. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:03:43,820][00245] Avg episode reward: [(0, '4.627')] +[2024-12-18 21:03:48,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3512.8). Total num frames: 3432448. Throughput: 0: 820.6. Samples: 857246. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:03:48,825][00245] Avg episode reward: [(0, '4.737')] +[2024-12-18 21:03:49,828][06288] Updated weights for policy 0, policy_version 840 (0.0023) +[2024-12-18 21:03:53,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3550.0, 300 sec: 3526.7). Total num frames: 3457024. Throughput: 0: 859.9. Samples: 863884. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:03:53,825][00245] Avg episode reward: [(0, '4.888')] +[2024-12-18 21:03:58,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 3473408. Throughput: 0: 877.3. Samples: 869722. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:03:58,826][00245] Avg episode reward: [(0, '4.826')] +[2024-12-18 21:04:00,743][06288] Updated weights for policy 0, policy_version 850 (0.0021) +[2024-12-18 21:04:03,819][00245] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3512.9). Total num frames: 3485696. Throughput: 0: 861.6. Samples: 871690. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:04:03,829][00245] Avg episode reward: [(0, '4.725')] +[2024-12-18 21:04:08,819][00245] Fps is (10 sec: 2867.0, 60 sec: 3345.2, 300 sec: 3499.0). Total num frames: 3502080. Throughput: 0: 874.9. Samples: 876160. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:04:08,827][00245] Avg episode reward: [(0, '4.781')] +[2024-12-18 21:04:13,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3345.3, 300 sec: 3485.1). Total num frames: 3518464. Throughput: 0: 890.8. Samples: 880812. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:04:13,823][00245] Avg episode reward: [(0, '4.699')] +[2024-12-18 21:04:14,360][06288] Updated weights for policy 0, policy_version 860 (0.0018) +[2024-12-18 21:04:18,819][00245] Fps is (10 sec: 2867.4, 60 sec: 3276.8, 300 sec: 3471.2). Total num frames: 3530752. Throughput: 0: 866.6. Samples: 882852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:04:18,821][00245] Avg episode reward: [(0, '4.870')] +[2024-12-18 21:04:23,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3485.1). Total num frames: 3551232. Throughput: 0: 829.1. Samples: 887512. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:04:23,822][00245] Avg episode reward: [(0, '4.710')] +[2024-12-18 21:04:26,348][06288] Updated weights for policy 0, policy_version 870 (0.0030) +[2024-12-18 21:04:28,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3571712. Throughput: 0: 870.5. Samples: 894180. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:04:28,826][00245] Avg episode reward: [(0, '4.652')] +[2024-12-18 21:04:33,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 3592192. Throughput: 0: 889.7. Samples: 897284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:04:33,823][00245] Avg episode reward: [(0, '4.601')] +[2024-12-18 21:04:38,156][06288] Updated weights for policy 0, policy_version 880 (0.0019) +[2024-12-18 21:04:38,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3471.2). Total num frames: 3604480. Throughput: 0: 833.6. Samples: 901394. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:04:38,825][00245] Avg episode reward: [(0, '4.378')] +[2024-12-18 21:04:43,819][00245] Fps is (10 sec: 3686.6, 60 sec: 3549.9, 300 sec: 3485.1). Total num frames: 3629056. Throughput: 0: 847.1. Samples: 907842. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:04:43,825][00245] Avg episode reward: [(0, '4.381')] +[2024-12-18 21:04:47,422][06288] Updated weights for policy 0, policy_version 890 (0.0029) +[2024-12-18 21:04:48,821][00245] Fps is (10 sec: 4504.5, 60 sec: 3618.0, 300 sec: 3485.0). Total num frames: 3649536. Throughput: 0: 876.8. Samples: 911146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:04:48,828][00245] Avg episode reward: [(0, '4.536')] +[2024-12-18 21:04:53,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3471.2). Total num frames: 3661824. Throughput: 0: 882.6. Samples: 915878. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:04:53,826][00245] Avg episode reward: [(0, '4.497')] +[2024-12-18 21:04:58,819][00245] Fps is (10 sec: 3277.6, 60 sec: 3481.6, 300 sec: 3499.0). Total num frames: 3682304. Throughput: 0: 897.1. Samples: 921180. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:04:58,828][00245] Avg episode reward: [(0, '4.721')] +[2024-12-18 21:04:59,589][06288] Updated weights for policy 0, policy_version 900 (0.0020) +[2024-12-18 21:05:03,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 3702784. Throughput: 0: 927.6. Samples: 924592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:05:03,820][00245] Avg episode reward: [(0, '4.781')] +[2024-12-18 21:05:08,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3526.7). Total num frames: 3723264. Throughput: 0: 961.5. Samples: 930778. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:05:08,821][00245] Avg episode reward: [(0, '4.556')] +[2024-12-18 21:05:10,096][06288] Updated weights for policy 0, policy_version 910 (0.0029) +[2024-12-18 21:05:13,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 3735552. Throughput: 0: 909.2. Samples: 935096. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:05:13,827][00245] Avg episode reward: [(0, '4.490')] +[2024-12-18 21:05:18,818][00245] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3526.7). Total num frames: 3760128. Throughput: 0: 917.1. Samples: 938552. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:05:18,820][00245] Avg episode reward: [(0, '4.481')] +[2024-12-18 21:05:20,294][06288] Updated weights for policy 0, policy_version 920 (0.0028) +[2024-12-18 21:05:23,821][00245] Fps is (10 sec: 4505.6, 60 sec: 3823.0, 300 sec: 3512.8). Total num frames: 3780608. Throughput: 0: 975.9. Samples: 945310. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:05:23,826][00245] Avg episode reward: [(0, '4.421')] +[2024-12-18 21:05:23,839][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000923_3780608.pth... +[2024-12-18 21:05:23,979][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000717_2936832.pth +[2024-12-18 21:05:28,819][00245] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 3512.8). Total num frames: 3792896. Throughput: 0: 932.8. Samples: 949820. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-12-18 21:05:28,824][00245] Avg episode reward: [(0, '4.571')] +[2024-12-18 21:05:32,243][06288] Updated weights for policy 0, policy_version 930 (0.0032) +[2024-12-18 21:05:33,819][00245] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3526.7). Total num frames: 3813376. Throughput: 0: 916.0. Samples: 952364. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:05:33,821][00245] Avg episode reward: [(0, '4.623')] +[2024-12-18 21:05:38,819][00245] Fps is (10 sec: 4505.5, 60 sec: 3891.2, 300 sec: 3526.7). Total num frames: 3837952. Throughput: 0: 962.9. Samples: 959210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:05:38,822][00245] Avg episode reward: [(0, '4.440')] +[2024-12-18 21:05:41,466][06288] Updated weights for policy 0, policy_version 940 (0.0034) +[2024-12-18 21:05:43,819][00245] Fps is (10 sec: 4095.9, 60 sec: 3754.6, 300 sec: 3526.7). Total num frames: 3854336. Throughput: 0: 969.0. Samples: 964786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:05:43,823][00245] Avg episode reward: [(0, '4.503')] +[2024-12-18 21:05:48,819][00245] Fps is (10 sec: 3276.9, 60 sec: 3686.6, 300 sec: 3526.7). Total num frames: 3870720. Throughput: 0: 939.2. Samples: 966858. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:05:48,823][00245] Avg episode reward: [(0, '4.579')] +[2024-12-18 21:05:53,034][06288] Updated weights for policy 0, policy_version 950 (0.0022) +[2024-12-18 21:05:53,819][00245] Fps is (10 sec: 3686.5, 60 sec: 3822.9, 300 sec: 3554.5). Total num frames: 3891200. Throughput: 0: 939.8. Samples: 973068. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:05:53,823][00245] Avg episode reward: [(0, '4.405')] +[2024-12-18 21:05:58,819][00245] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3582.3). Total num frames: 3915776. Throughput: 0: 989.9. Samples: 979642. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-12-18 21:05:58,824][00245] Avg episode reward: [(0, '4.544')] +[2024-12-18 21:06:03,824][00245] Fps is (10 sec: 3684.2, 60 sec: 3754.3, 300 sec: 3568.3). Total num frames: 3928064. Throughput: 0: 958.3. Samples: 981680. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-12-18 21:06:03,828][00245] Avg episode reward: [(0, '4.715')] +[2024-12-18 21:06:04,510][06288] Updated weights for policy 0, policy_version 960 (0.0035) +[2024-12-18 21:06:08,818][00245] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 3948544. Throughput: 0: 926.5. Samples: 987002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:06:08,822][00245] Avg episode reward: [(0, '4.830')] +[2024-12-18 21:06:13,748][06288] Updated weights for policy 0, policy_version 970 (0.0015) +[2024-12-18 21:06:13,819][00245] Fps is (10 sec: 4508.3, 60 sec: 3959.5, 300 sec: 3582.3). Total num frames: 3973120. Throughput: 0: 978.9. Samples: 993872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-12-18 21:06:13,826][00245] Avg episode reward: [(0, '5.047')] +[2024-12-18 21:06:18,819][00245] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3568.4). Total num frames: 3989504. Throughput: 0: 986.0. Samples: 996736. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-12-18 21:06:18,821][00245] Avg episode reward: [(0, '5.037')] +[2024-12-18 21:06:23,791][06275] Stopping Batcher_0... +[2024-12-18 21:06:23,793][06275] Loop batcher_evt_loop terminating... +[2024-12-18 21:06:23,791][00245] Component Batcher_0 stopped! +[2024-12-18 21:06:23,794][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-18 21:06:23,855][06288] Weights refcount: 2 0 +[2024-12-18 21:06:23,858][06288] Stopping InferenceWorker_p0-w0... +[2024-12-18 21:06:23,858][00245] Component InferenceWorker_p0-w0 stopped! +[2024-12-18 21:06:23,863][06288] Loop inference_proc0-0_evt_loop terminating... +[2024-12-18 21:06:23,940][06275] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000817_3346432.pth +[2024-12-18 21:06:23,951][06275] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-18 21:06:24,154][06275] Stopping LearnerWorker_p0... +[2024-12-18 21:06:24,154][00245] Component LearnerWorker_p0 stopped! +[2024-12-18 21:06:24,155][06275] Loop learner_proc0_evt_loop terminating... +[2024-12-18 21:06:24,167][00245] Component RolloutWorker_w4 stopped! +[2024-12-18 21:06:24,169][06293] Stopping RolloutWorker_w4... +[2024-12-18 21:06:24,172][06293] Loop rollout_proc4_evt_loop terminating... +[2024-12-18 21:06:24,224][00245] Component RolloutWorker_w6 stopped! +[2024-12-18 21:06:24,226][06296] Stopping RolloutWorker_w6... +[2024-12-18 21:06:24,228][06296] Loop rollout_proc6_evt_loop terminating... +[2024-12-18 21:06:24,234][00245] Component RolloutWorker_w2 stopped! +[2024-12-18 21:06:24,238][06291] Stopping RolloutWorker_w2... +[2024-12-18 21:06:24,239][06291] Loop rollout_proc2_evt_loop terminating... +[2024-12-18 21:06:24,250][00245] Component RolloutWorker_w0 stopped! +[2024-12-18 21:06:24,250][06295] Stopping RolloutWorker_w7... +[2024-12-18 21:06:24,253][00245] Component RolloutWorker_w7 stopped! +[2024-12-18 21:06:24,257][06289] Stopping RolloutWorker_w0... +[2024-12-18 21:06:24,257][06289] Loop rollout_proc0_evt_loop terminating... +[2024-12-18 21:06:24,255][06295] Loop rollout_proc7_evt_loop terminating... +[2024-12-18 21:06:24,326][06292] Stopping RolloutWorker_w3... +[2024-12-18 21:06:24,326][00245] Component RolloutWorker_w3 stopped! +[2024-12-18 21:06:24,327][06292] Loop rollout_proc3_evt_loop terminating... +[2024-12-18 21:06:24,343][06294] Stopping RolloutWorker_w5... +[2024-12-18 21:06:24,343][00245] Component RolloutWorker_w5 stopped! +[2024-12-18 21:06:24,344][06294] Loop rollout_proc5_evt_loop terminating... +[2024-12-18 21:06:24,357][06290] Stopping RolloutWorker_w1... +[2024-12-18 21:06:24,357][00245] Component RolloutWorker_w1 stopped! +[2024-12-18 21:06:24,362][00245] Waiting for process learner_proc0 to stop... +[2024-12-18 21:06:24,358][06290] Loop rollout_proc1_evt_loop terminating... +[2024-12-18 21:06:25,987][00245] Waiting for process inference_proc0-0 to join... +[2024-12-18 21:06:25,993][00245] Waiting for process rollout_proc0 to join... +[2024-12-18 21:06:29,029][00245] Waiting for process rollout_proc1 to join... +[2024-12-18 21:06:29,032][00245] Waiting for process rollout_proc2 to join... +[2024-12-18 21:06:29,039][00245] Waiting for process rollout_proc3 to join... +[2024-12-18 21:06:29,043][00245] Waiting for process rollout_proc4 to join... +[2024-12-18 21:06:29,047][00245] Waiting for process rollout_proc5 to join... +[2024-12-18 21:06:29,050][00245] Waiting for process rollout_proc6 to join... +[2024-12-18 21:06:29,053][00245] Waiting for process rollout_proc7 to join... +[2024-12-18 21:06:29,056][00245] Batcher 0 profile tree view: +batching: 26.6893, releasing_batches: 0.0278 +[2024-12-18 21:06:29,059][00245] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 433.1250 +update_model: 9.1067 + weight_update: 0.0028 +one_step: 0.0086 + handle_policy_step: 620.0738 + deserialize: 15.3986, stack: 3.3473, obs_to_device_normalize: 130.4539, forward: 312.7951, send_messages: 31.5056 + prepare_outputs: 95.5405 + to_cpu: 57.1327 +[2024-12-18 21:06:29,062][00245] Learner 0 profile tree view: +misc: 0.0050, prepare_batch: 14.3628 +train: 76.4511 + epoch_init: 0.0054, minibatch_init: 0.0064, losses_postprocess: 0.6850, kl_divergence: 0.7035, after_optimizer: 34.1337 + calculate_losses: 27.8635 + losses_init: 0.0037, forward_head: 1.3782, bptt_initial: 18.6042, tail: 1.1418, advantages_returns: 0.3142, losses: 3.9412 + bptt: 2.1147 + bptt_forward_core: 2.0109 + update: 12.4484 + clip: 0.9507 +[2024-12-18 21:06:29,063][00245] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.3864, enqueue_policy_requests: 109.9651, env_step: 862.8046, overhead: 14.5813, complete_rollouts: 7.7964 +save_policy_outputs: 23.0970 + split_output_tensors: 8.8886 +[2024-12-18 21:06:29,064][00245] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3013, enqueue_policy_requests: 111.7318, env_step: 855.3180, overhead: 14.3721, complete_rollouts: 6.8695 +save_policy_outputs: 22.7182 + split_output_tensors: 9.0535 +[2024-12-18 21:06:29,066][00245] Loop Runner_EvtLoop terminating... +[2024-12-18 21:06:29,068][00245] Runner profile tree view: +main_loop: 1139.7925 +[2024-12-18 21:06:29,070][00245] Collected {0: 4005888}, FPS: 3514.6 +[2024-12-18 21:06:29,476][00245] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-18 21:06:29,477][00245] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-18 21:06:29,482][00245] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-18 21:06:29,483][00245] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-18 21:06:29,485][00245] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-18 21:06:29,490][00245] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-18 21:06:29,492][00245] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-12-18 21:06:29,493][00245] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-18 21:06:29,495][00245] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-12-18 21:06:29,497][00245] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-12-18 21:06:29,499][00245] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-18 21:06:29,501][00245] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-18 21:06:29,502][00245] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-18 21:06:29,506][00245] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-18 21:06:29,507][00245] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-18 21:06:29,542][00245] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-12-18 21:06:29,547][00245] RunningMeanStd input shape: (3, 72, 128) +[2024-12-18 21:06:29,549][00245] RunningMeanStd input shape: (1,) +[2024-12-18 21:06:29,568][00245] ConvEncoder: input_channels=3 +[2024-12-18 21:06:29,676][00245] Conv encoder output size: 512 +[2024-12-18 21:06:29,677][00245] Policy head output size: 512 +[2024-12-18 21:06:29,847][00245] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-18 21:06:30,637][00245] Num frames 100... +[2024-12-18 21:06:30,759][00245] Num frames 200... +[2024-12-18 21:06:30,888][00245] Num frames 300... +[2024-12-18 21:06:30,969][00245] Avg episode rewards: #0: 4.200, true rewards: #0: 3.200 +[2024-12-18 21:06:30,971][00245] Avg episode reward: 4.200, avg true_objective: 3.200 +[2024-12-18 21:06:31,069][00245] Num frames 400... +[2024-12-18 21:06:31,197][00245] Num frames 500... +[2024-12-18 21:06:31,327][00245] Num frames 600... +[2024-12-18 21:06:31,452][00245] Num frames 700... +[2024-12-18 21:06:31,515][00245] Avg episode rewards: #0: 4.020, true rewards: #0: 3.520 +[2024-12-18 21:06:31,516][00245] Avg episode reward: 4.020, avg true_objective: 3.520 +[2024-12-18 21:06:31,636][00245] Num frames 800... +[2024-12-18 21:06:31,757][00245] Num frames 900... +[2024-12-18 21:06:31,885][00245] Num frames 1000... +[2024-12-18 21:06:32,043][00245] Avg episode rewards: #0: 3.960, true rewards: #0: 3.627 +[2024-12-18 21:06:32,045][00245] Avg episode reward: 3.960, avg true_objective: 3.627 +[2024-12-18 21:06:32,062][00245] Num frames 1100... +[2024-12-18 21:06:32,183][00245] Num frames 1200... +[2024-12-18 21:06:32,302][00245] Num frames 1300... +[2024-12-18 21:06:32,431][00245] Num frames 1400... +[2024-12-18 21:06:32,599][00245] Avg episode rewards: #0: 3.930, true rewards: #0: 3.680 +[2024-12-18 21:06:32,601][00245] Avg episode reward: 3.930, avg true_objective: 3.680 +[2024-12-18 21:06:32,650][00245] Num frames 1500... +[2024-12-18 21:06:32,815][00245] Num frames 1600... +[2024-12-18 21:06:32,978][00245] Num frames 1700... +[2024-12-18 21:06:33,145][00245] Num frames 1800... +[2024-12-18 21:06:33,310][00245] Num frames 1900... +[2024-12-18 21:06:33,482][00245] Num frames 2000... +[2024-12-18 21:06:33,568][00245] Avg episode rewards: #0: 5.034, true rewards: #0: 4.034 +[2024-12-18 21:06:33,570][00245] Avg episode reward: 5.034, avg true_objective: 4.034 +[2024-12-18 21:06:33,702][00245] Num frames 2100... +[2024-12-18 21:06:33,869][00245] Num frames 2200... +[2024-12-18 21:06:34,053][00245] Avg episode rewards: #0: 4.622, true rewards: #0: 3.788 +[2024-12-18 21:06:34,055][00245] Avg episode reward: 4.622, avg true_objective: 3.788 +[2024-12-18 21:06:34,102][00245] Num frames 2300... +[2024-12-18 21:06:34,273][00245] Num frames 2400... +[2024-12-18 21:06:34,455][00245] Num frames 2500... +[2024-12-18 21:06:34,639][00245] Num frames 2600... +[2024-12-18 21:06:34,817][00245] Num frames 2700... +[2024-12-18 21:06:34,995][00245] Num frames 2800... +[2024-12-18 21:06:35,072][00245] Avg episode rewards: #0: 5.024, true rewards: #0: 4.024 +[2024-12-18 21:06:35,073][00245] Avg episode reward: 5.024, avg true_objective: 4.024 +[2024-12-18 21:06:35,175][00245] Num frames 2900... +[2024-12-18 21:06:35,296][00245] Num frames 3000... +[2024-12-18 21:06:35,419][00245] Num frames 3100... +[2024-12-18 21:06:35,561][00245] Num frames 3200... +[2024-12-18 21:06:35,620][00245] Avg episode rewards: #0: 4.876, true rewards: #0: 4.001 +[2024-12-18 21:06:35,622][00245] Avg episode reward: 4.876, avg true_objective: 4.001 +[2024-12-18 21:06:35,742][00245] Num frames 3300... +[2024-12-18 21:06:35,862][00245] Num frames 3400... +[2024-12-18 21:06:35,983][00245] Num frames 3500... +[2024-12-18 21:06:36,138][00245] Avg episode rewards: #0: 4.761, true rewards: #0: 3.983 +[2024-12-18 21:06:36,140][00245] Avg episode reward: 4.761, avg true_objective: 3.983 +[2024-12-18 21:06:36,160][00245] Num frames 3600... +[2024-12-18 21:06:36,282][00245] Num frames 3700... +[2024-12-18 21:06:36,404][00245] Num frames 3800... +[2024-12-18 21:06:36,540][00245] Num frames 3900... +[2024-12-18 21:06:36,680][00245] Avg episode rewards: #0: 4.669, true rewards: #0: 3.969 +[2024-12-18 21:06:36,682][00245] Avg episode reward: 4.669, avg true_objective: 3.969 +[2024-12-18 21:06:58,380][00245] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-12-18 21:08:50,447][00245] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-12-18 21:08:50,448][00245] Overriding arg 'num_workers' with value 1 passed from command line +[2024-12-18 21:08:50,451][00245] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-12-18 21:08:50,453][00245] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-12-18 21:08:50,455][00245] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-12-18 21:08:50,457][00245] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-12-18 21:08:50,458][00245] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-12-18 21:08:50,459][00245] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-12-18 21:08:50,460][00245] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-12-18 21:08:50,461][00245] Adding new argument 'hf_repository'='Esteban00007/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-12-18 21:08:50,462][00245] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-12-18 21:08:50,463][00245] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-12-18 21:08:50,464][00245] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-12-18 21:08:50,465][00245] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-12-18 21:08:50,466][00245] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-12-18 21:08:50,496][00245] RunningMeanStd input shape: (3, 72, 128) +[2024-12-18 21:08:50,497][00245] RunningMeanStd input shape: (1,) +[2024-12-18 21:08:50,510][00245] ConvEncoder: input_channels=3 +[2024-12-18 21:08:50,548][00245] Conv encoder output size: 512 +[2024-12-18 21:08:50,549][00245] Policy head output size: 512 +[2024-12-18 21:08:50,567][00245] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-12-18 21:08:50,975][00245] Num frames 100... +[2024-12-18 21:08:51,097][00245] Num frames 200... +[2024-12-18 21:08:51,219][00245] Num frames 300... +[2024-12-18 21:08:51,377][00245] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2024-12-18 21:08:51,378][00245] Avg episode reward: 3.840, avg true_objective: 3.840 +[2024-12-18 21:08:51,400][00245] Num frames 400... +[2024-12-18 21:08:51,540][00245] Num frames 500... +[2024-12-18 21:08:51,660][00245] Num frames 600... +[2024-12-18 21:08:51,781][00245] Num frames 700... +[2024-12-18 21:08:51,921][00245] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2024-12-18 21:08:51,923][00245] Avg episode reward: 3.840, avg true_objective: 3.840 +[2024-12-18 21:08:51,963][00245] Num frames 800... +[2024-12-18 21:08:52,079][00245] Num frames 900... +[2024-12-18 21:08:52,201][00245] Num frames 1000... +[2024-12-18 21:08:52,323][00245] Num frames 1100... +[2024-12-18 21:08:52,449][00245] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2024-12-18 21:08:52,451][00245] Avg episode reward: 3.840, avg true_objective: 3.840 +[2024-12-18 21:08:52,520][00245] Num frames 1200... +[2024-12-18 21:08:52,639][00245] Num frames 1300... +[2024-12-18 21:08:52,763][00245] Num frames 1400... +[2024-12-18 21:08:52,885][00245] Num frames 1500... +[2024-12-18 21:08:52,988][00245] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840 +[2024-12-18 21:08:52,989][00245] Avg episode reward: 3.840, avg true_objective: 3.840 +[2024-12-18 21:08:53,067][00245] Num frames 1600... +[2024-12-18 21:08:53,187][00245] Num frames 1700... +[2024-12-18 21:08:53,310][00245] Num frames 1800... +[2024-12-18 21:08:53,448][00245] Num frames 1900... +[2024-12-18 21:08:53,573][00245] Num frames 2000... +[2024-12-18 21:08:53,696][00245] Num frames 2100... +[2024-12-18 21:08:53,818][00245] Num frames 2200... +[2024-12-18 21:08:53,950][00245] Num frames 2300... +[2024-12-18 21:08:54,012][00245] Avg episode rewards: #0: 5.608, true rewards: #0: 4.608 +[2024-12-18 21:08:54,014][00245] Avg episode reward: 5.608, avg true_objective: 4.608 +[2024-12-18 21:08:54,130][00245] Num frames 2400... +[2024-12-18 21:08:54,302][00245] Num frames 2500... +[2024-12-18 21:08:54,482][00245] Num frames 2600... +[2024-12-18 21:08:54,644][00245] Num frames 2700... +[2024-12-18 21:08:54,788][00245] Avg episode rewards: #0: 5.587, true rewards: #0: 4.587 +[2024-12-18 21:08:54,792][00245] Avg episode reward: 5.587, avg true_objective: 4.587 +[2024-12-18 21:08:54,877][00245] Num frames 2800... +[2024-12-18 21:08:55,042][00245] Num frames 2900... +[2024-12-18 21:08:55,209][00245] Num frames 3000... +[2024-12-18 21:08:55,371][00245] Num frames 3100... +[2024-12-18 21:08:55,548][00245] Avg episode rewards: #0: 5.526, true rewards: #0: 4.526 +[2024-12-18 21:08:55,550][00245] Avg episode reward: 5.526, avg true_objective: 4.526 +[2024-12-18 21:08:55,610][00245] Num frames 3200... +[2024-12-18 21:08:55,785][00245] Num frames 3300... +[2024-12-18 21:08:55,958][00245] Num frames 3400... +[2024-12-18 21:08:56,135][00245] Num frames 3500... +[2024-12-18 21:08:56,312][00245] Num frames 3600... +[2024-12-18 21:08:56,401][00245] Avg episode rewards: #0: 5.520, true rewards: #0: 4.520 +[2024-12-18 21:08:56,403][00245] Avg episode reward: 5.520, avg true_objective: 4.520 +[2024-12-18 21:08:56,568][00245] Num frames 3700... +[2024-12-18 21:08:56,700][00245] Num frames 3800... +[2024-12-18 21:08:56,828][00245] Num frames 3900... +[2024-12-18 21:08:56,949][00245] Num frames 4000... +[2024-12-18 21:08:57,078][00245] Avg episode rewards: #0: 5.516, true rewards: #0: 4.516 +[2024-12-18 21:08:57,080][00245] Avg episode reward: 5.516, avg true_objective: 4.516 +[2024-12-18 21:08:57,130][00245] Num frames 4100... +[2024-12-18 21:08:57,252][00245] Num frames 4200... +[2024-12-18 21:08:57,376][00245] Num frames 4300... +[2024-12-18 21:08:57,503][00245] Num frames 4400... +[2024-12-18 21:08:57,674][00245] Avg episode rewards: #0: 5.480, true rewards: #0: 4.480 +[2024-12-18 21:08:57,675][00245] Avg episode reward: 5.480, avg true_objective: 4.480 +[2024-12-18 21:09:21,685][00245] Replay video saved to /content/train_dir/default_experiment/replay.mp4!