diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1169 @@ +[2023-12-29 10:32:54,271][00379] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-12-29 10:32:54,274][00379] Rollout worker 0 uses device cpu +[2023-12-29 10:32:54,276][00379] Rollout worker 1 uses device cpu +[2023-12-29 10:32:54,277][00379] Rollout worker 2 uses device cpu +[2023-12-29 10:32:54,278][00379] Rollout worker 3 uses device cpu +[2023-12-29 10:32:54,280][00379] Rollout worker 4 uses device cpu +[2023-12-29 10:32:54,281][00379] Rollout worker 5 uses device cpu +[2023-12-29 10:32:54,283][00379] Rollout worker 6 uses device cpu +[2023-12-29 10:32:54,292][00379] Rollout worker 7 uses device cpu +[2023-12-29 10:32:54,450][00379] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-12-29 10:32:54,452][00379] InferenceWorker_p0-w0: min num requests: 2 +[2023-12-29 10:32:54,487][00379] Starting all processes... +[2023-12-29 10:32:54,489][00379] Starting process learner_proc0 +[2023-12-29 10:32:54,546][00379] Starting all processes... +[2023-12-29 10:32:54,553][00379] Starting process inference_proc0-0 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc0 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc1 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc2 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc3 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc4 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc5 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc6 +[2023-12-29 10:32:54,557][00379] Starting process rollout_proc7 +[2023-12-29 10:33:13,130][02217] Worker 3 uses CPU cores [1] +[2023-12-29 10:33:13,247][02218] Worker 2 uses CPU cores [0] +[2023-12-29 10:33:13,494][02201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-12-29 10:33:13,497][02201] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-12-29 10:33:13,563][02201] Num visible devices: 1 +[2023-12-29 10:33:13,600][02201] Starting seed is not provided +[2023-12-29 10:33:13,601][02201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-12-29 10:33:13,602][02201] Initializing actor-critic model on device cuda:0 +[2023-12-29 10:33:13,603][02201] RunningMeanStd input shape: (3, 72, 128) +[2023-12-29 10:33:13,606][02201] RunningMeanStd input shape: (1,) +[2023-12-29 10:33:13,717][02201] ConvEncoder: input_channels=3 +[2023-12-29 10:33:13,755][02215] Worker 0 uses CPU cores [0] +[2023-12-29 10:33:13,851][02219] Worker 4 uses CPU cores [0] +[2023-12-29 10:33:13,885][02216] Worker 1 uses CPU cores [1] +[2023-12-29 10:33:13,951][02222] Worker 7 uses CPU cores [1] +[2023-12-29 10:33:13,950][02221] Worker 5 uses CPU cores [1] +[2023-12-29 10:33:13,956][02220] Worker 6 uses CPU cores [0] +[2023-12-29 10:33:14,031][02214] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-12-29 10:33:14,031][02214] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-12-29 10:33:14,071][02214] Num visible devices: 1 +[2023-12-29 10:33:14,165][02201] Conv encoder output size: 512 +[2023-12-29 10:33:14,167][02201] Policy head output size: 512 +[2023-12-29 10:33:14,243][02201] Created Actor Critic model with architecture: +[2023-12-29 10:33:14,245][02201] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-12-29 10:33:14,487][00379] Heartbeat connected on Batcher_0 +[2023-12-29 10:33:14,491][00379] Heartbeat connected on InferenceWorker_p0-w0 +[2023-12-29 10:33:14,494][00379] Heartbeat connected on RolloutWorker_w1 +[2023-12-29 10:33:14,495][00379] Heartbeat connected on RolloutWorker_w3 +[2023-12-29 10:33:14,504][00379] Heartbeat connected on RolloutWorker_w5 +[2023-12-29 10:33:14,505][00379] Heartbeat connected on RolloutWorker_w4 +[2023-12-29 10:33:14,506][00379] Heartbeat connected on RolloutWorker_w2 +[2023-12-29 10:33:14,507][00379] Heartbeat connected on RolloutWorker_w6 +[2023-12-29 10:33:14,508][00379] Heartbeat connected on RolloutWorker_w0 +[2023-12-29 10:33:14,509][00379] Heartbeat connected on RolloutWorker_w7 +[2023-12-29 10:33:14,899][02201] Using optimizer +[2023-12-29 10:33:18,895][02201] No checkpoints found +[2023-12-29 10:33:18,904][02201] Did not load from checkpoint, starting from scratch! +[2023-12-29 10:33:18,905][02201] Initialized policy 0 weights for model version 0 +[2023-12-29 10:33:18,934][02201] LearnerWorker_p0 finished initialization! +[2023-12-29 10:33:18,935][02201] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-12-29 10:33:18,944][00379] Heartbeat connected on LearnerWorker_p0 +[2023-12-29 10:33:19,630][02214] RunningMeanStd input shape: (3, 72, 128) +[2023-12-29 10:33:19,631][02214] RunningMeanStd input shape: (1,) +[2023-12-29 10:33:19,704][02214] ConvEncoder: input_channels=3 +[2023-12-29 10:33:20,150][02214] Conv encoder output size: 512 +[2023-12-29 10:33:20,150][02214] Policy head output size: 512 +[2023-12-29 10:33:20,292][00379] Inference worker 0-0 is ready! +[2023-12-29 10:33:20,294][00379] All inference workers are ready! Signal rollout workers to start! +[2023-12-29 10:33:20,602][02222] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,593][02221] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,612][02217] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,620][02216] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,723][02218] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,713][02220] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,722][02219] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:20,729][02215] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:33:22,264][02220] Decorrelating experience for 0 frames... +[2023-12-29 10:33:22,263][02219] Decorrelating experience for 0 frames... +[2023-12-29 10:33:22,359][02222] Decorrelating experience for 0 frames... +[2023-12-29 10:33:22,362][02216] Decorrelating experience for 0 frames... +[2023-12-29 10:33:22,365][02221] Decorrelating experience for 0 frames... +[2023-12-29 10:33:22,856][00379] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-12-29 10:33:23,156][02216] Decorrelating experience for 32 frames... +[2023-12-29 10:33:23,159][02217] Decorrelating experience for 0 frames... +[2023-12-29 10:33:23,401][02220] Decorrelating experience for 32 frames... +[2023-12-29 10:33:23,403][02219] Decorrelating experience for 32 frames... +[2023-12-29 10:33:23,645][02215] Decorrelating experience for 0 frames... +[2023-12-29 10:33:24,398][02217] Decorrelating experience for 32 frames... +[2023-12-29 10:33:24,407][02218] Decorrelating experience for 0 frames... +[2023-12-29 10:33:24,433][02221] Decorrelating experience for 32 frames... +[2023-12-29 10:33:24,685][02220] Decorrelating experience for 64 frames... +[2023-12-29 10:33:24,825][02222] Decorrelating experience for 32 frames... +[2023-12-29 10:33:25,477][02216] Decorrelating experience for 64 frames... +[2023-12-29 10:33:25,547][02218] Decorrelating experience for 32 frames... +[2023-12-29 10:33:25,600][02219] Decorrelating experience for 64 frames... +[2023-12-29 10:33:25,894][02215] Decorrelating experience for 32 frames... +[2023-12-29 10:33:26,237][02222] Decorrelating experience for 64 frames... +[2023-12-29 10:33:26,555][02220] Decorrelating experience for 96 frames... +[2023-12-29 10:33:27,140][02217] Decorrelating experience for 64 frames... +[2023-12-29 10:33:27,200][02216] Decorrelating experience for 96 frames... +[2023-12-29 10:33:27,238][02221] Decorrelating experience for 64 frames... +[2023-12-29 10:33:27,340][02218] Decorrelating experience for 64 frames... +[2023-12-29 10:33:27,856][00379] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-12-29 10:33:27,948][02215] Decorrelating experience for 64 frames... +[2023-12-29 10:33:27,979][02222] Decorrelating experience for 96 frames... +[2023-12-29 10:33:28,630][02219] Decorrelating experience for 96 frames... +[2023-12-29 10:33:28,893][02218] Decorrelating experience for 96 frames... +[2023-12-29 10:33:29,134][02221] Decorrelating experience for 96 frames... +[2023-12-29 10:33:30,043][02217] Decorrelating experience for 96 frames... +[2023-12-29 10:33:30,203][02215] Decorrelating experience for 96 frames... +[2023-12-29 10:33:32,861][00379] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 212.5. Samples: 2126. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-12-29 10:33:32,863][00379] Avg episode reward: [(0, '2.262')] +[2023-12-29 10:33:34,376][02201] Signal inference workers to stop experience collection... +[2023-12-29 10:33:34,397][02214] InferenceWorker_p0-w0: stopping experience collection +[2023-12-29 10:33:36,348][02201] Signal inference workers to resume experience collection... +[2023-12-29 10:33:36,355][02214] InferenceWorker_p0-w0: resuming experience collection +[2023-12-29 10:33:37,856][00379] Fps is (10 sec: 819.2, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 8192. Throughput: 0: 189.5. Samples: 2842. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-12-29 10:33:37,859][00379] Avg episode reward: [(0, '2.578')] +[2023-12-29 10:33:42,856][00379] Fps is (10 sec: 2458.8, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 24576. Throughput: 0: 287.1. Samples: 5742. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:33:42,868][00379] Avg episode reward: [(0, '3.555')] +[2023-12-29 10:33:46,316][02214] Updated weights for policy 0, policy_version 10 (0.0324) +[2023-12-29 10:33:47,856][00379] Fps is (10 sec: 3686.4, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 45056. Throughput: 0: 475.8. Samples: 11894. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:33:47,858][00379] Avg episode reward: [(0, '4.207')] +[2023-12-29 10:33:52,861][00379] Fps is (10 sec: 3684.6, 60 sec: 2047.7, 300 sec: 2047.7). Total num frames: 61440. Throughput: 0: 498.9. Samples: 14968. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:33:52,865][00379] Avg episode reward: [(0, '4.481')] +[2023-12-29 10:33:57,859][00379] Fps is (10 sec: 2866.4, 60 sec: 2106.4, 300 sec: 2106.4). Total num frames: 73728. Throughput: 0: 529.7. Samples: 18540. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:33:57,861][00379] Avg episode reward: [(0, '4.565')] +[2023-12-29 10:34:00,139][02214] Updated weights for policy 0, policy_version 20 (0.0016) +[2023-12-29 10:34:02,856][00379] Fps is (10 sec: 2868.6, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 90112. Throughput: 0: 567.7. Samples: 22706. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:34:02,858][00379] Avg episode reward: [(0, '4.298')] +[2023-12-29 10:34:07,856][00379] Fps is (10 sec: 3687.4, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 110592. Throughput: 0: 572.8. Samples: 25776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:34:07,865][00379] Avg episode reward: [(0, '4.296')] +[2023-12-29 10:34:07,872][02201] Saving new best policy, reward=4.296! +[2023-12-29 10:34:10,632][02214] Updated weights for policy 0, policy_version 30 (0.0016) +[2023-12-29 10:34:12,856][00379] Fps is (10 sec: 3686.4, 60 sec: 2539.5, 300 sec: 2539.5). Total num frames: 126976. Throughput: 0: 708.9. Samples: 31900. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:34:12,864][00379] Avg episode reward: [(0, '4.440')] +[2023-12-29 10:34:12,867][02201] Saving new best policy, reward=4.440! +[2023-12-29 10:34:17,857][00379] Fps is (10 sec: 3276.6, 60 sec: 2606.5, 300 sec: 2606.5). Total num frames: 143360. Throughput: 0: 752.2. Samples: 35974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:34:17,861][00379] Avg episode reward: [(0, '4.486')] +[2023-12-29 10:34:17,875][02201] Saving new best policy, reward=4.486! +[2023-12-29 10:34:22,856][00379] Fps is (10 sec: 2867.2, 60 sec: 2594.1, 300 sec: 2594.1). Total num frames: 155648. Throughput: 0: 778.8. Samples: 37886. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:34:22,862][00379] Avg episode reward: [(0, '4.513')] +[2023-12-29 10:34:22,867][02201] Saving new best policy, reward=4.513! +[2023-12-29 10:34:24,702][02214] Updated weights for policy 0, policy_version 40 (0.0017) +[2023-12-29 10:34:27,856][00379] Fps is (10 sec: 3277.0, 60 sec: 2935.5, 300 sec: 2709.7). Total num frames: 176128. Throughput: 0: 832.4. Samples: 43198. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:34:27,871][00379] Avg episode reward: [(0, '4.666')] +[2023-12-29 10:34:27,884][02201] Saving new best policy, reward=4.666! +[2023-12-29 10:34:32,861][00379] Fps is (10 sec: 3684.7, 60 sec: 3208.5, 300 sec: 2750.0). Total num frames: 192512. Throughput: 0: 819.1. Samples: 48758. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:34:32,865][00379] Avg episode reward: [(0, '4.477')] +[2023-12-29 10:34:37,856][00379] Fps is (10 sec: 2048.0, 60 sec: 3140.3, 300 sec: 2621.4). Total num frames: 196608. Throughput: 0: 765.6. Samples: 49418. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:34:37,861][00379] Avg episode reward: [(0, '4.559')] +[2023-12-29 10:34:41,160][02214] Updated weights for policy 0, policy_version 50 (0.0045) +[2023-12-29 10:34:42,856][00379] Fps is (10 sec: 1639.1, 60 sec: 3072.0, 300 sec: 2611.2). Total num frames: 208896. Throughput: 0: 742.4. Samples: 51946. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:34:42,861][00379] Avg episode reward: [(0, '4.495')] +[2023-12-29 10:34:47,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 2698.5). Total num frames: 229376. Throughput: 0: 779.8. Samples: 57798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:34:47,858][00379] Avg episode reward: [(0, '4.448')] +[2023-12-29 10:34:47,873][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000056_229376.pth... +[2023-12-29 10:34:51,608][02214] Updated weights for policy 0, policy_version 60 (0.0025) +[2023-12-29 10:34:52,856][00379] Fps is (10 sec: 4096.1, 60 sec: 3140.5, 300 sec: 2776.2). Total num frames: 249856. Throughput: 0: 779.1. Samples: 60836. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:34:52,859][00379] Avg episode reward: [(0, '4.335')] +[2023-12-29 10:34:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3140.4, 300 sec: 2759.4). Total num frames: 262144. Throughput: 0: 743.4. Samples: 65352. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:34:57,864][00379] Avg episode reward: [(0, '4.399')] +[2023-12-29 10:35:02,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3072.0, 300 sec: 2744.3). Total num frames: 274432. Throughput: 0: 734.1. Samples: 69010. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:35:02,863][00379] Avg episode reward: [(0, '4.443')] +[2023-12-29 10:35:05,722][02214] Updated weights for policy 0, policy_version 70 (0.0013) +[2023-12-29 10:35:07,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 2808.7). Total num frames: 294912. Throughput: 0: 751.8. Samples: 71716. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:35:07,859][00379] Avg episode reward: [(0, '4.488')] +[2023-12-29 10:35:12,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3072.0, 300 sec: 2830.0). Total num frames: 311296. Throughput: 0: 767.1. Samples: 77716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:35:12,858][00379] Avg episode reward: [(0, '4.390')] +[2023-12-29 10:35:17,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3003.8, 300 sec: 2813.8). Total num frames: 323584. Throughput: 0: 736.8. Samples: 81910. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:35:17,864][00379] Avg episode reward: [(0, '4.390')] +[2023-12-29 10:35:18,288][02214] Updated weights for policy 0, policy_version 80 (0.0019) +[2023-12-29 10:35:22,862][00379] Fps is (10 sec: 2456.2, 60 sec: 3003.5, 300 sec: 2798.8). Total num frames: 335872. Throughput: 0: 763.5. Samples: 83778. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:35:22,866][00379] Avg episode reward: [(0, '4.364')] +[2023-12-29 10:35:27,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3003.7, 300 sec: 2850.8). Total num frames: 356352. Throughput: 0: 811.9. Samples: 88480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:35:27,859][00379] Avg episode reward: [(0, '4.321')] +[2023-12-29 10:35:30,323][02214] Updated weights for policy 0, policy_version 90 (0.0025) +[2023-12-29 10:35:32,856][00379] Fps is (10 sec: 4098.3, 60 sec: 3072.2, 300 sec: 2898.7). Total num frames: 376832. Throughput: 0: 820.4. Samples: 94714. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:35:32,859][00379] Avg episode reward: [(0, '4.410')] +[2023-12-29 10:35:37,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 2912.7). Total num frames: 393216. Throughput: 0: 811.9. Samples: 97370. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:35:37,864][00379] Avg episode reward: [(0, '4.445')] +[2023-12-29 10:35:42,860][00379] Fps is (10 sec: 2866.2, 60 sec: 3276.6, 300 sec: 2896.4). Total num frames: 405504. Throughput: 0: 800.1. Samples: 101360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:35:42,862][00379] Avg episode reward: [(0, '4.579')] +[2023-12-29 10:35:43,677][02214] Updated weights for policy 0, policy_version 100 (0.0031) +[2023-12-29 10:35:47,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2937.8). Total num frames: 425984. Throughput: 0: 831.9. Samples: 106446. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:35:47,863][00379] Avg episode reward: [(0, '4.588')] +[2023-12-29 10:35:52,856][00379] Fps is (10 sec: 4097.5, 60 sec: 3276.8, 300 sec: 2976.4). Total num frames: 446464. Throughput: 0: 841.5. Samples: 109584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:35:52,859][00379] Avg episode reward: [(0, '4.476')] +[2023-12-29 10:35:53,760][02214] Updated weights for policy 0, policy_version 110 (0.0013) +[2023-12-29 10:35:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2959.7). Total num frames: 458752. Throughput: 0: 830.0. Samples: 115066. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:35:57,864][00379] Avg episode reward: [(0, '4.449')] +[2023-12-29 10:36:02,856][00379] Fps is (10 sec: 2867.1, 60 sec: 3345.0, 300 sec: 2969.6). Total num frames: 475136. Throughput: 0: 825.4. Samples: 119054. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:36:02,858][00379] Avg episode reward: [(0, '4.437')] +[2023-12-29 10:36:07,521][02214] Updated weights for policy 0, policy_version 120 (0.0036) +[2023-12-29 10:36:07,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2978.9). Total num frames: 491520. Throughput: 0: 831.5. Samples: 121192. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:36:07,858][00379] Avg episode reward: [(0, '4.636')] +[2023-12-29 10:36:12,856][00379] Fps is (10 sec: 3686.5, 60 sec: 3345.1, 300 sec: 3011.8). Total num frames: 512000. Throughput: 0: 864.2. Samples: 127370. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:36:12,858][00379] Avg episode reward: [(0, '4.576')] +[2023-12-29 10:36:17,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3019.3). Total num frames: 528384. Throughput: 0: 847.1. Samples: 132834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:36:17,865][00379] Avg episode reward: [(0, '4.543')] +[2023-12-29 10:36:18,643][02214] Updated weights for policy 0, policy_version 130 (0.0021) +[2023-12-29 10:36:22,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3413.7, 300 sec: 3003.7). Total num frames: 540672. Throughput: 0: 831.6. Samples: 134794. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:36:22,858][00379] Avg episode reward: [(0, '4.616')] +[2023-12-29 10:36:27,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3011.1). Total num frames: 557056. Throughput: 0: 831.8. Samples: 138788. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:36:27,859][00379] Avg episode reward: [(0, '4.476')] +[2023-12-29 10:36:31,111][02214] Updated weights for policy 0, policy_version 140 (0.0033) +[2023-12-29 10:36:32,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3039.7). Total num frames: 577536. Throughput: 0: 857.7. Samples: 145042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:36:32,859][00379] Avg episode reward: [(0, '4.376')] +[2023-12-29 10:36:37,859][00379] Fps is (10 sec: 3685.4, 60 sec: 3344.9, 300 sec: 3045.7). Total num frames: 593920. Throughput: 0: 852.6. Samples: 147952. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:36:37,861][00379] Avg episode reward: [(0, '4.335')] +[2023-12-29 10:36:42,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.3, 300 sec: 3031.0). Total num frames: 606208. Throughput: 0: 820.3. Samples: 151978. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:36:42,862][00379] Avg episode reward: [(0, '4.416')] +[2023-12-29 10:36:44,775][02214] Updated weights for policy 0, policy_version 150 (0.0028) +[2023-12-29 10:36:47,856][00379] Fps is (10 sec: 2868.0, 60 sec: 3276.8, 300 sec: 3037.0). Total num frames: 622592. Throughput: 0: 825.6. Samples: 156206. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:36:47,858][00379] Avg episode reward: [(0, '4.425')] +[2023-12-29 10:36:47,873][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000152_622592.pth... +[2023-12-29 10:36:52,856][00379] Fps is (10 sec: 3686.5, 60 sec: 3276.8, 300 sec: 3062.2). Total num frames: 643072. Throughput: 0: 846.1. Samples: 159268. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:36:52,861][00379] Avg episode reward: [(0, '4.529')] +[2023-12-29 10:36:55,524][02214] Updated weights for policy 0, policy_version 160 (0.0019) +[2023-12-29 10:36:57,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3067.2). Total num frames: 659456. Throughput: 0: 844.8. Samples: 165386. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:36:57,861][00379] Avg episode reward: [(0, '4.562')] +[2023-12-29 10:37:02,858][00379] Fps is (10 sec: 3276.3, 60 sec: 3345.0, 300 sec: 3072.0). Total num frames: 675840. Throughput: 0: 811.8. Samples: 169368. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:37:02,862][00379] Avg episode reward: [(0, '4.561')] +[2023-12-29 10:37:07,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3058.3). Total num frames: 688128. Throughput: 0: 812.4. Samples: 171352. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:37:07,859][00379] Avg episode reward: [(0, '4.544')] +[2023-12-29 10:37:09,217][02214] Updated weights for policy 0, policy_version 170 (0.0020) +[2023-12-29 10:37:12,856][00379] Fps is (10 sec: 3277.3, 60 sec: 3276.8, 300 sec: 3080.9). Total num frames: 708608. Throughput: 0: 844.5. Samples: 176790. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:12,858][00379] Avg episode reward: [(0, '4.581')] +[2023-12-29 10:37:17,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3102.5). Total num frames: 729088. Throughput: 0: 842.0. Samples: 182934. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:37:17,862][00379] Avg episode reward: [(0, '4.528')] +[2023-12-29 10:37:20,135][02214] Updated weights for policy 0, policy_version 180 (0.0037) +[2023-12-29 10:37:22,856][00379] Fps is (10 sec: 3276.7, 60 sec: 3345.1, 300 sec: 3089.1). Total num frames: 741376. Throughput: 0: 822.0. Samples: 184940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:22,862][00379] Avg episode reward: [(0, '4.431')] +[2023-12-29 10:37:27,856][00379] Fps is (10 sec: 2457.5, 60 sec: 3276.8, 300 sec: 3076.2). Total num frames: 753664. Throughput: 0: 819.9. Samples: 188874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:27,859][00379] Avg episode reward: [(0, '4.539')] +[2023-12-29 10:37:32,856][00379] Fps is (10 sec: 3276.9, 60 sec: 3276.8, 300 sec: 3096.6). Total num frames: 774144. Throughput: 0: 850.3. Samples: 194468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:32,859][00379] Avg episode reward: [(0, '4.688')] +[2023-12-29 10:37:32,865][02201] Saving new best policy, reward=4.688! +[2023-12-29 10:37:33,119][02214] Updated weights for policy 0, policy_version 190 (0.0022) +[2023-12-29 10:37:37,856][00379] Fps is (10 sec: 4096.1, 60 sec: 3345.2, 300 sec: 3116.2). Total num frames: 794624. Throughput: 0: 851.2. Samples: 197574. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:37,859][00379] Avg episode reward: [(0, '4.581')] +[2023-12-29 10:37:42,856][00379] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 3119.3). Total num frames: 811008. Throughput: 0: 826.0. Samples: 202558. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:37:42,859][00379] Avg episode reward: [(0, '4.548')] +[2023-12-29 10:37:45,360][02214] Updated weights for policy 0, policy_version 200 (0.0031) +[2023-12-29 10:37:47,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3106.8). Total num frames: 823296. Throughput: 0: 828.5. Samples: 206648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:47,867][00379] Avg episode reward: [(0, '4.753')] +[2023-12-29 10:37:47,879][02201] Saving new best policy, reward=4.753! +[2023-12-29 10:37:52,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3125.1). Total num frames: 843776. Throughput: 0: 843.4. Samples: 209306. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:37:52,866][00379] Avg episode reward: [(0, '4.620')] +[2023-12-29 10:37:56,110][02214] Updated weights for policy 0, policy_version 210 (0.0015) +[2023-12-29 10:37:57,856][00379] Fps is (10 sec: 4095.9, 60 sec: 3413.3, 300 sec: 3142.7). Total num frames: 864256. Throughput: 0: 866.5. Samples: 215784. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:37:57,864][00379] Avg episode reward: [(0, '4.638')] +[2023-12-29 10:38:02,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3413.4, 300 sec: 3145.1). Total num frames: 880640. Throughput: 0: 838.1. Samples: 220650. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:38:02,859][00379] Avg episode reward: [(0, '4.570')] +[2023-12-29 10:38:07,856][00379] Fps is (10 sec: 2867.3, 60 sec: 3413.3, 300 sec: 3133.1). Total num frames: 892928. Throughput: 0: 837.8. Samples: 222640. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:38:07,865][00379] Avg episode reward: [(0, '4.530')] +[2023-12-29 10:38:09,578][02214] Updated weights for policy 0, policy_version 220 (0.0024) +[2023-12-29 10:38:12,859][00379] Fps is (10 sec: 3275.9, 60 sec: 3413.2, 300 sec: 3149.7). Total num frames: 913408. Throughput: 0: 863.9. Samples: 227750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:38:12,861][00379] Avg episode reward: [(0, '4.511')] +[2023-12-29 10:38:17,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3165.7). Total num frames: 933888. Throughput: 0: 884.0. Samples: 234246. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:38:17,859][00379] Avg episode reward: [(0, '4.643')] +[2023-12-29 10:38:19,206][02214] Updated weights for policy 0, policy_version 230 (0.0022) +[2023-12-29 10:38:22,856][00379] Fps is (10 sec: 3687.4, 60 sec: 3481.6, 300 sec: 3221.3). Total num frames: 950272. Throughput: 0: 872.0. Samples: 236814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:38:22,860][00379] Avg episode reward: [(0, '4.766')] +[2023-12-29 10:38:22,865][02201] Saving new best policy, reward=4.766! +[2023-12-29 10:38:27,858][00379] Fps is (10 sec: 2866.7, 60 sec: 3481.5, 300 sec: 3262.9). Total num frames: 962560. Throughput: 0: 850.5. Samples: 240834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:38:27,862][00379] Avg episode reward: [(0, '4.732')] +[2023-12-29 10:38:32,561][02214] Updated weights for policy 0, policy_version 240 (0.0013) +[2023-12-29 10:38:32,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3304.6). Total num frames: 983040. Throughput: 0: 875.4. Samples: 246042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:38:32,861][00379] Avg episode reward: [(0, '4.644')] +[2023-12-29 10:38:37,856][00379] Fps is (10 sec: 4096.7, 60 sec: 3481.6, 300 sec: 3318.5). Total num frames: 1003520. Throughput: 0: 886.3. Samples: 249190. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:38:37,859][00379] Avg episode reward: [(0, '4.712')] +[2023-12-29 10:38:42,860][00379] Fps is (10 sec: 3685.1, 60 sec: 3481.4, 300 sec: 3304.5). Total num frames: 1019904. Throughput: 0: 864.2. Samples: 254676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:38:42,864][00379] Avg episode reward: [(0, '4.846')] +[2023-12-29 10:38:42,872][02201] Saving new best policy, reward=4.846! +[2023-12-29 10:38:44,292][02214] Updated weights for policy 0, policy_version 250 (0.0016) +[2023-12-29 10:38:47,856][00379] Fps is (10 sec: 2867.1, 60 sec: 3481.6, 300 sec: 3290.7). Total num frames: 1032192. Throughput: 0: 844.6. Samples: 258658. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:38:47,864][00379] Avg episode reward: [(0, '4.643')] +[2023-12-29 10:38:47,883][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000252_1032192.pth... +[2023-12-29 10:38:48,041][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000056_229376.pth +[2023-12-29 10:38:52,856][00379] Fps is (10 sec: 2868.1, 60 sec: 3413.3, 300 sec: 3304.6). Total num frames: 1048576. Throughput: 0: 846.9. Samples: 260752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:38:52,858][00379] Avg episode reward: [(0, '4.638')] +[2023-12-29 10:38:56,086][02214] Updated weights for policy 0, policy_version 260 (0.0015) +[2023-12-29 10:38:57,856][00379] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3318.5). Total num frames: 1069056. Throughput: 0: 873.0. Samples: 267032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:38:57,863][00379] Avg episode reward: [(0, '4.980')] +[2023-12-29 10:38:57,903][02201] Saving new best policy, reward=4.980! +[2023-12-29 10:39:02,856][00379] Fps is (10 sec: 3686.6, 60 sec: 3413.3, 300 sec: 3304.6). Total num frames: 1085440. Throughput: 0: 846.8. Samples: 272350. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:39:02,858][00379] Avg episode reward: [(0, '4.955')] +[2023-12-29 10:39:07,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3304.6). Total num frames: 1101824. Throughput: 0: 835.5. Samples: 274412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:07,861][00379] Avg episode reward: [(0, '4.875')] +[2023-12-29 10:39:09,169][02214] Updated weights for policy 0, policy_version 270 (0.0028) +[2023-12-29 10:39:12,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3413.5, 300 sec: 3304.6). Total num frames: 1118208. Throughput: 0: 843.5. Samples: 278788. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:12,859][00379] Avg episode reward: [(0, '4.630')] +[2023-12-29 10:39:17,856][00379] Fps is (10 sec: 3686.2, 60 sec: 3413.3, 300 sec: 3332.3). Total num frames: 1138688. Throughput: 0: 867.0. Samples: 285058. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:39:17,859][00379] Avg episode reward: [(0, '4.707')] +[2023-12-29 10:39:19,426][02214] Updated weights for policy 0, policy_version 280 (0.0014) +[2023-12-29 10:39:22,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3318.5). Total num frames: 1155072. Throughput: 0: 867.1. Samples: 288210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:22,860][00379] Avg episode reward: [(0, '4.643')] +[2023-12-29 10:39:27,856][00379] Fps is (10 sec: 2867.3, 60 sec: 3413.4, 300 sec: 3304.6). Total num frames: 1167360. Throughput: 0: 834.9. Samples: 292242. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:39:27,859][00379] Avg episode reward: [(0, '4.723')] +[2023-12-29 10:39:32,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3346.2). Total num frames: 1183744. Throughput: 0: 845.8. Samples: 296720. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:39:32,859][00379] Avg episode reward: [(0, '4.715')] +[2023-12-29 10:39:33,092][02214] Updated weights for policy 0, policy_version 290 (0.0033) +[2023-12-29 10:39:37,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 1204224. Throughput: 0: 869.3. Samples: 299870. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:39:37,860][00379] Avg episode reward: [(0, '4.792')] +[2023-12-29 10:39:42,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3413.5, 300 sec: 3374.0). Total num frames: 1224704. Throughput: 0: 867.2. Samples: 306058. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:42,858][00379] Avg episode reward: [(0, '5.010')] +[2023-12-29 10:39:42,862][02201] Saving new best policy, reward=5.010! +[2023-12-29 10:39:43,874][02214] Updated weights for policy 0, policy_version 300 (0.0015) +[2023-12-29 10:39:47,860][00379] Fps is (10 sec: 3275.6, 60 sec: 3413.1, 300 sec: 3346.2). Total num frames: 1236992. Throughput: 0: 837.3. Samples: 310032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:47,862][00379] Avg episode reward: [(0, '4.920')] +[2023-12-29 10:39:52,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3346.2). Total num frames: 1249280. Throughput: 0: 829.8. Samples: 311752. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:39:52,859][00379] Avg episode reward: [(0, '4.861')] +[2023-12-29 10:39:57,298][02214] Updated weights for policy 0, policy_version 310 (0.0028) +[2023-12-29 10:39:57,857][00379] Fps is (10 sec: 3277.9, 60 sec: 3345.0, 300 sec: 3374.0). Total num frames: 1269760. Throughput: 0: 849.8. Samples: 317030. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:39:57,862][00379] Avg episode reward: [(0, '4.488')] +[2023-12-29 10:40:02,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 1286144. Throughput: 0: 825.1. Samples: 322186. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:40:02,861][00379] Avg episode reward: [(0, '4.429')] +[2023-12-29 10:40:07,856][00379] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 3346.2). Total num frames: 1298432. Throughput: 0: 799.7. Samples: 324198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:07,861][00379] Avg episode reward: [(0, '4.444')] +[2023-12-29 10:40:11,671][02214] Updated weights for policy 0, policy_version 320 (0.0017) +[2023-12-29 10:40:12,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3346.2). Total num frames: 1310720. Throughput: 0: 799.3. Samples: 328212. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:40:12,864][00379] Avg episode reward: [(0, '4.509')] +[2023-12-29 10:40:17,857][00379] Fps is (10 sec: 3276.4, 60 sec: 3208.5, 300 sec: 3374.0). Total num frames: 1331200. Throughput: 0: 812.8. Samples: 333298. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:40:17,864][00379] Avg episode reward: [(0, '4.614')] +[2023-12-29 10:40:22,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3360.1). Total num frames: 1347584. Throughput: 0: 812.4. Samples: 336430. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:40:22,862][00379] Avg episode reward: [(0, '4.831')] +[2023-12-29 10:40:23,224][02214] Updated weights for policy 0, policy_version 330 (0.0037) +[2023-12-29 10:40:27,856][00379] Fps is (10 sec: 2867.5, 60 sec: 3208.5, 300 sec: 3332.3). Total num frames: 1359872. Throughput: 0: 765.6. Samples: 340510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:27,858][00379] Avg episode reward: [(0, '4.909')] +[2023-12-29 10:40:32,856][00379] Fps is (10 sec: 2867.1, 60 sec: 3208.5, 300 sec: 3332.3). Total num frames: 1376256. Throughput: 0: 765.8. Samples: 344490. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:32,859][00379] Avg episode reward: [(0, '4.852')] +[2023-12-29 10:40:36,581][02214] Updated weights for policy 0, policy_version 340 (0.0029) +[2023-12-29 10:40:37,856][00379] Fps is (10 sec: 3686.3, 60 sec: 3208.5, 300 sec: 3360.1). Total num frames: 1396736. Throughput: 0: 791.3. Samples: 347360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:37,859][00379] Avg episode reward: [(0, '4.565')] +[2023-12-29 10:40:42,856][00379] Fps is (10 sec: 4096.1, 60 sec: 3208.5, 300 sec: 3360.1). Total num frames: 1417216. Throughput: 0: 807.6. Samples: 353372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:42,859][00379] Avg episode reward: [(0, '4.722')] +[2023-12-29 10:40:47,864][00379] Fps is (10 sec: 2455.8, 60 sec: 3071.8, 300 sec: 3304.5). Total num frames: 1421312. Throughput: 0: 765.1. Samples: 356620. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:40:47,866][00379] Avg episode reward: [(0, '4.742')] +[2023-12-29 10:40:47,878][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000347_1421312.pth... +[2023-12-29 10:40:48,046][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000152_622592.pth +[2023-12-29 10:40:51,244][02214] Updated weights for policy 0, policy_version 350 (0.0020) +[2023-12-29 10:40:52,856][00379] Fps is (10 sec: 1638.4, 60 sec: 3072.0, 300 sec: 3304.6). Total num frames: 1433600. Throughput: 0: 756.7. Samples: 358248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:40:52,858][00379] Avg episode reward: [(0, '4.891')] +[2023-12-29 10:40:57,856][00379] Fps is (10 sec: 3689.2, 60 sec: 3140.3, 300 sec: 3332.3). Total num frames: 1458176. Throughput: 0: 783.3. Samples: 363460. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:40:57,864][00379] Avg episode reward: [(0, '4.382')] +[2023-12-29 10:41:01,728][02214] Updated weights for policy 0, policy_version 360 (0.0015) +[2023-12-29 10:41:02,856][00379] Fps is (10 sec: 4505.6, 60 sec: 3208.5, 300 sec: 3346.2). Total num frames: 1478656. Throughput: 0: 814.0. Samples: 369926. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:41:02,861][00379] Avg episode reward: [(0, '4.633')] +[2023-12-29 10:41:07,856][00379] Fps is (10 sec: 3276.7, 60 sec: 3208.5, 300 sec: 3318.5). Total num frames: 1490944. Throughput: 0: 796.0. Samples: 372250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:41:07,862][00379] Avg episode reward: [(0, '4.810')] +[2023-12-29 10:41:12,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3304.6). Total num frames: 1503232. Throughput: 0: 794.5. Samples: 376262. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:41:12,859][00379] Avg episode reward: [(0, '4.858')] +[2023-12-29 10:41:15,259][02214] Updated weights for policy 0, policy_version 370 (0.0028) +[2023-12-29 10:41:17,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3208.6, 300 sec: 3332.3). Total num frames: 1523712. Throughput: 0: 826.3. Samples: 381672. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:41:17,858][00379] Avg episode reward: [(0, '4.677')] +[2023-12-29 10:41:22,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3346.2). Total num frames: 1544192. Throughput: 0: 832.0. Samples: 384798. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:41:22,864][00379] Avg episode reward: [(0, '4.598')] +[2023-12-29 10:41:27,411][02214] Updated weights for policy 0, policy_version 380 (0.0027) +[2023-12-29 10:41:27,859][00379] Fps is (10 sec: 3275.9, 60 sec: 3276.7, 300 sec: 3318.4). Total num frames: 1556480. Throughput: 0: 792.5. Samples: 389038. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:41:27,861][00379] Avg episode reward: [(0, '4.664')] +[2023-12-29 10:41:32,856][00379] Fps is (10 sec: 2047.9, 60 sec: 3140.3, 300 sec: 3290.7). Total num frames: 1564672. Throughput: 0: 788.8. Samples: 392112. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:41:32,864][00379] Avg episode reward: [(0, '4.605')] +[2023-12-29 10:41:37,858][00379] Fps is (10 sec: 2867.5, 60 sec: 3140.2, 300 sec: 3318.4). Total num frames: 1585152. Throughput: 0: 810.1. Samples: 394704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:41:37,860][00379] Avg episode reward: [(0, '4.529')] +[2023-12-29 10:41:40,149][02214] Updated weights for policy 0, policy_version 390 (0.0017) +[2023-12-29 10:41:42,856][00379] Fps is (10 sec: 4096.1, 60 sec: 3140.3, 300 sec: 3332.3). Total num frames: 1605632. Throughput: 0: 831.8. Samples: 400890. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:41:42,858][00379] Avg episode reward: [(0, '4.599')] +[2023-12-29 10:41:47,858][00379] Fps is (10 sec: 3276.5, 60 sec: 3277.1, 300 sec: 3304.5). Total num frames: 1617920. Throughput: 0: 776.2. Samples: 404856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:41:47,866][00379] Avg episode reward: [(0, '4.583')] +[2023-12-29 10:41:52,856][00379] Fps is (10 sec: 2048.0, 60 sec: 3208.5, 300 sec: 3276.8). Total num frames: 1626112. Throughput: 0: 769.2. Samples: 406866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:41:52,860][00379] Avg episode reward: [(0, '4.724')] +[2023-12-29 10:41:55,568][02214] Updated weights for policy 0, policy_version 400 (0.0026) +[2023-12-29 10:41:57,856][00379] Fps is (10 sec: 2867.9, 60 sec: 3140.3, 300 sec: 3290.7). Total num frames: 1646592. Throughput: 0: 771.1. Samples: 410960. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:41:57,859][00379] Avg episode reward: [(0, '4.828')] +[2023-12-29 10:42:02,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3072.0, 300 sec: 3304.6). Total num frames: 1662976. Throughput: 0: 776.5. Samples: 416614. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:42:02,865][00379] Avg episode reward: [(0, '4.984')] +[2023-12-29 10:42:06,910][02214] Updated weights for policy 0, policy_version 410 (0.0019) +[2023-12-29 10:42:07,862][00379] Fps is (10 sec: 3274.8, 60 sec: 3140.0, 300 sec: 3290.6). Total num frames: 1679360. Throughput: 0: 767.1. Samples: 419322. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:42:07,870][00379] Avg episode reward: [(0, '4.771')] +[2023-12-29 10:42:12,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3208.5, 300 sec: 3276.8). Total num frames: 1695744. Throughput: 0: 763.4. Samples: 423390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:42:12,860][00379] Avg episode reward: [(0, '4.753')] +[2023-12-29 10:42:17,856][00379] Fps is (10 sec: 3278.8, 60 sec: 3140.3, 300 sec: 3290.7). Total num frames: 1712128. Throughput: 0: 810.7. Samples: 428594. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:42:17,858][00379] Avg episode reward: [(0, '4.555')] +[2023-12-29 10:42:19,123][02214] Updated weights for policy 0, policy_version 420 (0.0039) +[2023-12-29 10:42:22,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 3318.5). Total num frames: 1732608. Throughput: 0: 826.3. Samples: 431888. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:42:22,859][00379] Avg episode reward: [(0, '4.611')] +[2023-12-29 10:42:27,856][00379] Fps is (10 sec: 3686.3, 60 sec: 3208.7, 300 sec: 3304.6). Total num frames: 1748992. Throughput: 0: 803.4. Samples: 437042. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:42:27,862][00379] Avg episode reward: [(0, '4.707')] +[2023-12-29 10:42:32,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3262.9). Total num frames: 1757184. Throughput: 0: 782.2. Samples: 440054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:42:32,859][00379] Avg episode reward: [(0, '4.811')] +[2023-12-29 10:42:33,523][02214] Updated weights for policy 0, policy_version 430 (0.0030) +[2023-12-29 10:42:37,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3140.4, 300 sec: 3262.9). Total num frames: 1773568. Throughput: 0: 772.4. Samples: 441626. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:42:37,862][00379] Avg episode reward: [(0, '4.737')] +[2023-12-29 10:42:42,856][00379] Fps is (10 sec: 3276.7, 60 sec: 3072.0, 300 sec: 3276.8). Total num frames: 1789952. Throughput: 0: 806.7. Samples: 447260. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:42:42,859][00379] Avg episode reward: [(0, '4.803')] +[2023-12-29 10:42:45,133][02214] Updated weights for policy 0, policy_version 440 (0.0032) +[2023-12-29 10:42:47,861][00379] Fps is (10 sec: 3275.3, 60 sec: 3140.1, 300 sec: 3262.9). Total num frames: 1806336. Throughput: 0: 791.8. Samples: 452248. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:42:47,863][00379] Avg episode reward: [(0, '4.951')] +[2023-12-29 10:42:47,874][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000441_1806336.pth... +[2023-12-29 10:42:48,031][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000252_1032192.pth +[2023-12-29 10:42:52,856][00379] Fps is (10 sec: 2867.3, 60 sec: 3208.5, 300 sec: 3235.1). Total num frames: 1818624. Throughput: 0: 776.7. Samples: 454270. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:42:52,858][00379] Avg episode reward: [(0, '5.103')] +[2023-12-29 10:42:52,865][02201] Saving new best policy, reward=5.103! +[2023-12-29 10:42:57,856][00379] Fps is (10 sec: 2868.5, 60 sec: 3140.3, 300 sec: 3235.1). Total num frames: 1835008. Throughput: 0: 782.7. Samples: 458612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:42:57,859][00379] Avg episode reward: [(0, '4.898')] +[2023-12-29 10:42:59,040][02214] Updated weights for policy 0, policy_version 450 (0.0024) +[2023-12-29 10:43:02,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 1859584. Throughput: 0: 812.8. Samples: 465168. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:43:02,858][00379] Avg episode reward: [(0, '4.695')] +[2023-12-29 10:43:07,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3277.1, 300 sec: 3262.9). Total num frames: 1875968. Throughput: 0: 811.8. Samples: 468420. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:43:07,858][00379] Avg episode reward: [(0, '4.724')] +[2023-12-29 10:43:10,167][02214] Updated weights for policy 0, policy_version 460 (0.0019) +[2023-12-29 10:43:12,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3235.1). Total num frames: 1888256. Throughput: 0: 790.2. Samples: 472600. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:43:12,863][00379] Avg episode reward: [(0, '4.877')] +[2023-12-29 10:43:17,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3235.1). Total num frames: 1904640. Throughput: 0: 815.7. Samples: 476762. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:43:17,858][00379] Avg episode reward: [(0, '4.960')] +[2023-12-29 10:43:22,242][02214] Updated weights for policy 0, policy_version 470 (0.0027) +[2023-12-29 10:43:22,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3262.9). Total num frames: 1925120. Throughput: 0: 846.9. Samples: 479738. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:43:22,858][00379] Avg episode reward: [(0, '4.991')] +[2023-12-29 10:43:27,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3249.0). Total num frames: 1941504. Throughput: 0: 842.2. Samples: 485160. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:43:27,863][00379] Avg episode reward: [(0, '5.124')] +[2023-12-29 10:43:27,870][02201] Saving new best policy, reward=5.124! +[2023-12-29 10:43:32,860][00379] Fps is (10 sec: 2456.7, 60 sec: 3208.3, 300 sec: 3207.3). Total num frames: 1949696. Throughput: 0: 803.4. Samples: 488402. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:43:32,867][00379] Avg episode reward: [(0, '5.161')] +[2023-12-29 10:43:32,869][02201] Saving new best policy, reward=5.161! +[2023-12-29 10:43:37,856][00379] Fps is (10 sec: 1638.4, 60 sec: 3072.0, 300 sec: 3179.6). Total num frames: 1957888. Throughput: 0: 782.4. Samples: 489480. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:43:37,863][00379] Avg episode reward: [(0, '4.970')] +[2023-12-29 10:43:39,970][02214] Updated weights for policy 0, policy_version 480 (0.0023) +[2023-12-29 10:43:42,856][00379] Fps is (10 sec: 2458.5, 60 sec: 3072.0, 300 sec: 3193.5). Total num frames: 1974272. Throughput: 0: 778.1. Samples: 493628. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:43:42,864][00379] Avg episode reward: [(0, '4.635')] +[2023-12-29 10:43:47,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.2, 300 sec: 3193.5). Total num frames: 1990656. Throughput: 0: 749.2. Samples: 498882. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:43:47,859][00379] Avg episode reward: [(0, '4.592')] +[2023-12-29 10:43:52,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3072.0, 300 sec: 3165.7). Total num frames: 2002944. Throughput: 0: 714.4. Samples: 500570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:43:52,860][00379] Avg episode reward: [(0, '4.932')] +[2023-12-29 10:43:53,221][02214] Updated weights for policy 0, policy_version 490 (0.0024) +[2023-12-29 10:43:57,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3003.7, 300 sec: 3151.8). Total num frames: 2015232. Throughput: 0: 694.4. Samples: 503850. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:43:57,859][00379] Avg episode reward: [(0, '5.069')] +[2023-12-29 10:44:02,859][00379] Fps is (10 sec: 2047.5, 60 sec: 2730.5, 300 sec: 3124.0). Total num frames: 2023424. Throughput: 0: 666.4. Samples: 506752. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:44:02,864][00379] Avg episode reward: [(0, '5.245')] +[2023-12-29 10:44:02,866][02201] Saving new best policy, reward=5.245! +[2023-12-29 10:44:07,856][00379] Fps is (10 sec: 1638.4, 60 sec: 2594.1, 300 sec: 3096.3). Total num frames: 2031616. Throughput: 0: 629.8. Samples: 508078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:44:07,858][00379] Avg episode reward: [(0, '5.252')] +[2023-12-29 10:44:07,871][02201] Saving new best policy, reward=5.252! +[2023-12-29 10:44:12,856][00379] Fps is (10 sec: 2048.6, 60 sec: 2594.1, 300 sec: 3068.5). Total num frames: 2043904. Throughput: 0: 578.4. Samples: 511188. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:44:12,859][00379] Avg episode reward: [(0, '5.021')] +[2023-12-29 10:44:13,396][02214] Updated weights for policy 0, policy_version 500 (0.0064) +[2023-12-29 10:44:17,860][00379] Fps is (10 sec: 2456.7, 60 sec: 2525.7, 300 sec: 3054.6). Total num frames: 2056192. Throughput: 0: 597.0. Samples: 515266. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:44:17,862][00379] Avg episode reward: [(0, '4.884')] +[2023-12-29 10:44:22,856][00379] Fps is (10 sec: 2867.2, 60 sec: 2457.6, 300 sec: 3068.5). Total num frames: 2072576. Throughput: 0: 618.6. Samples: 517318. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:44:22,858][00379] Avg episode reward: [(0, '4.743')] +[2023-12-29 10:44:27,856][00379] Fps is (10 sec: 2868.2, 60 sec: 2389.3, 300 sec: 3054.6). Total num frames: 2084864. Throughput: 0: 613.9. Samples: 521254. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:44:27,858][00379] Avg episode reward: [(0, '4.612')] +[2023-12-29 10:44:27,983][02214] Updated weights for policy 0, policy_version 510 (0.0058) +[2023-12-29 10:44:32,856][00379] Fps is (10 sec: 3276.8, 60 sec: 2594.3, 300 sec: 3054.6). Total num frames: 2105344. Throughput: 0: 628.1. Samples: 527148. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:44:32,863][00379] Avg episode reward: [(0, '4.582')] +[2023-12-29 10:44:37,856][00379] Fps is (10 sec: 4096.0, 60 sec: 2798.9, 300 sec: 3054.6). Total num frames: 2125824. Throughput: 0: 658.5. Samples: 530202. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:44:37,859][00379] Avg episode reward: [(0, '4.480')] +[2023-12-29 10:44:39,175][02214] Updated weights for policy 0, policy_version 520 (0.0034) +[2023-12-29 10:44:42,856][00379] Fps is (10 sec: 3276.8, 60 sec: 2730.7, 300 sec: 3054.7). Total num frames: 2138112. Throughput: 0: 673.9. Samples: 534176. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:44:42,862][00379] Avg episode reward: [(0, '4.462')] +[2023-12-29 10:44:47,856][00379] Fps is (10 sec: 2457.6, 60 sec: 2662.4, 300 sec: 3054.6). Total num frames: 2150400. Throughput: 0: 685.3. Samples: 537588. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:44:47,858][00379] Avg episode reward: [(0, '4.446')] +[2023-12-29 10:44:47,872][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth... +[2023-12-29 10:44:48,032][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000347_1421312.pth +[2023-12-29 10:44:52,857][00379] Fps is (10 sec: 2866.9, 60 sec: 2730.6, 300 sec: 3040.8). Total num frames: 2166784. Throughput: 0: 708.5. Samples: 539962. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:44:52,863][00379] Avg episode reward: [(0, '4.685')] +[2023-12-29 10:44:53,730][02214] Updated weights for policy 0, policy_version 530 (0.0024) +[2023-12-29 10:44:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 2798.9, 300 sec: 3040.8). Total num frames: 2183168. Throughput: 0: 765.6. Samples: 545640. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:44:57,860][00379] Avg episode reward: [(0, '5.008')] +[2023-12-29 10:45:02,861][00379] Fps is (10 sec: 2866.2, 60 sec: 2867.1, 300 sec: 3040.7). Total num frames: 2195456. Throughput: 0: 762.1. Samples: 549562. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-12-29 10:45:02,863][00379] Avg episode reward: [(0, '5.025')] +[2023-12-29 10:45:07,858][00379] Fps is (10 sec: 2457.2, 60 sec: 2935.4, 300 sec: 3040.7). Total num frames: 2207744. Throughput: 0: 763.4. Samples: 551674. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:45:07,860][00379] Avg episode reward: [(0, '5.010')] +[2023-12-29 10:45:07,898][02214] Updated weights for policy 0, policy_version 540 (0.0023) +[2023-12-29 10:45:12,857][00379] Fps is (10 sec: 3278.0, 60 sec: 3071.9, 300 sec: 3040.8). Total num frames: 2228224. Throughput: 0: 789.6. Samples: 556786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:45:12,860][00379] Avg episode reward: [(0, '4.689')] +[2023-12-29 10:45:17,861][00379] Fps is (10 sec: 4094.8, 60 sec: 3208.5, 300 sec: 3054.6). Total num frames: 2248704. Throughput: 0: 784.1. Samples: 562436. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:45:17,863][00379] Avg episode reward: [(0, '4.696')] +[2023-12-29 10:45:19,081][02214] Updated weights for policy 0, policy_version 550 (0.0029) +[2023-12-29 10:45:22,856][00379] Fps is (10 sec: 3277.1, 60 sec: 3140.3, 300 sec: 3054.6). Total num frames: 2260992. Throughput: 0: 764.4. Samples: 564600. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:45:22,863][00379] Avg episode reward: [(0, '4.799')] +[2023-12-29 10:45:27,856][00379] Fps is (10 sec: 2458.7, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2273280. Throughput: 0: 764.0. Samples: 568558. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:45:27,859][00379] Avg episode reward: [(0, '4.948')] +[2023-12-29 10:45:32,829][02214] Updated weights for policy 0, policy_version 560 (0.0023) +[2023-12-29 10:45:32,856][00379] Fps is (10 sec: 3276.9, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2293760. Throughput: 0: 795.5. Samples: 573386. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:45:32,858][00379] Avg episode reward: [(0, '4.898')] +[2023-12-29 10:45:37,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3072.0, 300 sec: 3026.9). Total num frames: 2310144. Throughput: 0: 811.6. Samples: 576482. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:45:37,859][00379] Avg episode reward: [(0, '4.984')] +[2023-12-29 10:45:42,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3072.0, 300 sec: 3054.7). Total num frames: 2322432. Throughput: 0: 785.6. Samples: 580994. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:45:42,868][00379] Avg episode reward: [(0, '4.868')] +[2023-12-29 10:45:47,416][02214] Updated weights for policy 0, policy_version 570 (0.0027) +[2023-12-29 10:45:47,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3072.0, 300 sec: 3054.6). Total num frames: 2334720. Throughput: 0: 767.9. Samples: 584114. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:45:47,858][00379] Avg episode reward: [(0, '4.982')] +[2023-12-29 10:45:52,861][00379] Fps is (10 sec: 2456.5, 60 sec: 3003.6, 300 sec: 3012.9). Total num frames: 2347008. Throughput: 0: 757.3. Samples: 585754. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:45:52,863][00379] Avg episode reward: [(0, '4.821')] +[2023-12-29 10:45:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 3013.0). Total num frames: 2367488. Throughput: 0: 769.1. Samples: 591396. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:45:57,863][00379] Avg episode reward: [(0, '4.604')] +[2023-12-29 10:45:59,164][02214] Updated weights for policy 0, policy_version 580 (0.0023) +[2023-12-29 10:46:02,856][00379] Fps is (10 sec: 4097.8, 60 sec: 3208.8, 300 sec: 3040.8). Total num frames: 2387968. Throughput: 0: 773.7. Samples: 597250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:46:02,859][00379] Avg episode reward: [(0, '4.517')] +[2023-12-29 10:46:07,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3208.6, 300 sec: 3040.8). Total num frames: 2400256. Throughput: 0: 771.4. Samples: 599314. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:46:07,858][00379] Avg episode reward: [(0, '4.515')] +[2023-12-29 10:46:12,857][00379] Fps is (10 sec: 2457.5, 60 sec: 3072.0, 300 sec: 3013.0). Total num frames: 2412544. Throughput: 0: 767.1. Samples: 603078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:46:12,859][00379] Avg episode reward: [(0, '4.639')] +[2023-12-29 10:46:13,470][02214] Updated weights for policy 0, policy_version 590 (0.0021) +[2023-12-29 10:46:17,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.2, 300 sec: 3013.0). Total num frames: 2433024. Throughput: 0: 775.5. Samples: 608284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:46:17,859][00379] Avg episode reward: [(0, '4.759')] +[2023-12-29 10:46:22,856][00379] Fps is (10 sec: 3686.6, 60 sec: 3140.3, 300 sec: 3026.9). Total num frames: 2449408. Throughput: 0: 771.2. Samples: 611184. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:46:22,858][00379] Avg episode reward: [(0, '4.675')] +[2023-12-29 10:46:25,868][02214] Updated weights for policy 0, policy_version 600 (0.0017) +[2023-12-29 10:46:27,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2461696. Throughput: 0: 764.8. Samples: 615412. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:46:27,860][00379] Avg episode reward: [(0, '4.736')] +[2023-12-29 10:46:32,858][00379] Fps is (10 sec: 2457.2, 60 sec: 3003.6, 300 sec: 3013.0). Total num frames: 2473984. Throughput: 0: 784.4. Samples: 619414. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:46:32,860][00379] Avg episode reward: [(0, '4.820')] +[2023-12-29 10:46:37,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3003.7, 300 sec: 2999.1). Total num frames: 2490368. Throughput: 0: 794.6. Samples: 621506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:46:37,858][00379] Avg episode reward: [(0, '4.765')] +[2023-12-29 10:46:39,743][02214] Updated weights for policy 0, policy_version 610 (0.0043) +[2023-12-29 10:46:42,856][00379] Fps is (10 sec: 3277.4, 60 sec: 3072.0, 300 sec: 3013.0). Total num frames: 2506752. Throughput: 0: 787.1. Samples: 626814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:46:42,859][00379] Avg episode reward: [(0, '4.621')] +[2023-12-29 10:46:47,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2523136. Throughput: 0: 761.3. Samples: 631508. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:46:47,858][00379] Avg episode reward: [(0, '4.678')] +[2023-12-29 10:46:47,871][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth... +[2023-12-29 10:46:48,041][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000441_1806336.pth +[2023-12-29 10:46:52,859][00379] Fps is (10 sec: 2866.4, 60 sec: 3140.4, 300 sec: 3013.0). Total num frames: 2535424. Throughput: 0: 759.9. Samples: 633510. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:46:52,861][00379] Avg episode reward: [(0, '4.838')] +[2023-12-29 10:46:53,344][02214] Updated weights for policy 0, policy_version 620 (0.0019) +[2023-12-29 10:46:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3026.9). Total num frames: 2555904. Throughput: 0: 779.1. Samples: 638138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:46:57,859][00379] Avg episode reward: [(0, '4.931')] +[2023-12-29 10:47:02,856][00379] Fps is (10 sec: 4097.0, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2576384. Throughput: 0: 801.4. Samples: 644346. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:47:02,863][00379] Avg episode reward: [(0, '4.920')] +[2023-12-29 10:47:03,513][02214] Updated weights for policy 0, policy_version 630 (0.0014) +[2023-12-29 10:47:07,861][00379] Fps is (10 sec: 3684.7, 60 sec: 3208.3, 300 sec: 3040.7). Total num frames: 2592768. Throughput: 0: 801.0. Samples: 647232. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:47:07,866][00379] Avg episode reward: [(0, '4.678')] +[2023-12-29 10:47:12,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3140.3, 300 sec: 3013.0). Total num frames: 2600960. Throughput: 0: 780.2. Samples: 650520. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:47:12,862][00379] Avg episode reward: [(0, '4.748')] +[2023-12-29 10:47:17,856][00379] Fps is (10 sec: 2458.8, 60 sec: 3072.0, 300 sec: 2999.1). Total num frames: 2617344. Throughput: 0: 774.3. Samples: 654256. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:47:17,864][00379] Avg episode reward: [(0, '4.805')] +[2023-12-29 10:47:19,081][02214] Updated weights for policy 0, policy_version 640 (0.0053) +[2023-12-29 10:47:22,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3072.0, 300 sec: 2999.1). Total num frames: 2633728. Throughput: 0: 793.2. Samples: 657202. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:47:22,858][00379] Avg episode reward: [(0, '5.067')] +[2023-12-29 10:47:27,858][00379] Fps is (10 sec: 3685.5, 60 sec: 3208.4, 300 sec: 3040.7). Total num frames: 2654208. Throughput: 0: 805.2. Samples: 663048. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:47:27,861][00379] Avg episode reward: [(0, '5.203')] +[2023-12-29 10:47:31,228][02214] Updated weights for policy 0, policy_version 650 (0.0033) +[2023-12-29 10:47:32,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3208.6, 300 sec: 3026.9). Total num frames: 2666496. Throughput: 0: 787.2. Samples: 666932. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:47:32,860][00379] Avg episode reward: [(0, '5.234')] +[2023-12-29 10:47:37,861][00379] Fps is (10 sec: 2457.0, 60 sec: 3140.0, 300 sec: 3012.9). Total num frames: 2678784. Throughput: 0: 790.5. Samples: 669082. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:47:37,868][00379] Avg episode reward: [(0, '5.343')] +[2023-12-29 10:47:37,884][02201] Saving new best policy, reward=5.343! +[2023-12-29 10:47:42,856][00379] Fps is (10 sec: 2457.5, 60 sec: 3072.0, 300 sec: 2999.2). Total num frames: 2691072. Throughput: 0: 766.4. Samples: 672628. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:47:42,863][00379] Avg episode reward: [(0, '5.169')] +[2023-12-29 10:47:44,627][02214] Updated weights for policy 0, policy_version 660 (0.0019) +[2023-12-29 10:47:47,856][00379] Fps is (10 sec: 3278.3, 60 sec: 3140.3, 300 sec: 3026.9). Total num frames: 2711552. Throughput: 0: 767.2. Samples: 678872. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-12-29 10:47:47,862][00379] Avg episode reward: [(0, '4.941')] +[2023-12-29 10:47:52,856][00379] Fps is (10 sec: 3686.5, 60 sec: 3208.7, 300 sec: 3026.9). Total num frames: 2727936. Throughput: 0: 759.5. Samples: 681408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:47:52,859][00379] Avg episode reward: [(0, '5.460')] +[2023-12-29 10:47:52,861][02201] Saving new best policy, reward=5.460! +[2023-12-29 10:47:57,858][00379] Fps is (10 sec: 2866.7, 60 sec: 3071.9, 300 sec: 2985.2). Total num frames: 2740224. Throughput: 0: 774.1. Samples: 685358. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:47:57,860][00379] Avg episode reward: [(0, '5.483')] +[2023-12-29 10:47:57,872][02201] Saving new best policy, reward=5.483! +[2023-12-29 10:47:58,677][02214] Updated weights for policy 0, policy_version 670 (0.0025) +[2023-12-29 10:48:02,856][00379] Fps is (10 sec: 2457.6, 60 sec: 2935.5, 300 sec: 2971.3). Total num frames: 2752512. Throughput: 0: 778.8. Samples: 689304. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:48:02,867][00379] Avg episode reward: [(0, '5.824')] +[2023-12-29 10:48:02,885][02201] Saving new best policy, reward=5.824! +[2023-12-29 10:48:07,856][00379] Fps is (10 sec: 3277.4, 60 sec: 3004.0, 300 sec: 2999.1). Total num frames: 2772992. Throughput: 0: 772.3. Samples: 691954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:48:07,861][00379] Avg episode reward: [(0, '5.907')] +[2023-12-29 10:48:07,873][02201] Saving new best policy, reward=5.907! +[2023-12-29 10:48:10,389][02214] Updated weights for policy 0, policy_version 680 (0.0030) +[2023-12-29 10:48:12,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 2999.1). Total num frames: 2789376. Throughput: 0: 765.9. Samples: 697510. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:48:12,858][00379] Avg episode reward: [(0, '6.117')] +[2023-12-29 10:48:12,860][02201] Saving new best policy, reward=6.117! +[2023-12-29 10:48:17,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3072.0, 300 sec: 2971.3). Total num frames: 2801664. Throughput: 0: 766.1. Samples: 701408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:48:17,864][00379] Avg episode reward: [(0, '5.792')] +[2023-12-29 10:48:22,861][00379] Fps is (10 sec: 2456.5, 60 sec: 3003.5, 300 sec: 2957.4). Total num frames: 2813952. Throughput: 0: 755.5. Samples: 703078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:48:22,865][00379] Avg episode reward: [(0, '5.905')] +[2023-12-29 10:48:24,959][02214] Updated weights for policy 0, policy_version 690 (0.0013) +[2023-12-29 10:48:27,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3003.9, 300 sec: 2999.1). Total num frames: 2834432. Throughput: 0: 797.6. Samples: 708518. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:48:27,866][00379] Avg episode reward: [(0, '5.769')] +[2023-12-29 10:48:32,856][00379] Fps is (10 sec: 4097.9, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2854912. Throughput: 0: 793.6. Samples: 714582. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:48:32,864][00379] Avg episode reward: [(0, '6.190')] +[2023-12-29 10:48:32,867][02201] Saving new best policy, reward=6.190! +[2023-12-29 10:48:36,575][02214] Updated weights for policy 0, policy_version 700 (0.0021) +[2023-12-29 10:48:37,863][00379] Fps is (10 sec: 3274.6, 60 sec: 3140.2, 300 sec: 3026.8). Total num frames: 2867200. Throughput: 0: 777.9. Samples: 716420. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:48:37,865][00379] Avg episode reward: [(0, '6.368')] +[2023-12-29 10:48:37,887][02201] Saving new best policy, reward=6.368! +[2023-12-29 10:48:42,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3208.5, 300 sec: 3026.9). Total num frames: 2883584. Throughput: 0: 780.1. Samples: 720460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:48:42,858][00379] Avg episode reward: [(0, '6.624')] +[2023-12-29 10:48:42,864][02201] Saving new best policy, reward=6.624! +[2023-12-29 10:48:47,856][00379] Fps is (10 sec: 3279.0, 60 sec: 3140.3, 300 sec: 3040.8). Total num frames: 2899968. Throughput: 0: 804.9. Samples: 725526. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:48:47,861][00379] Avg episode reward: [(0, '6.729')] +[2023-12-29 10:48:47,873][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000708_2899968.pth... +[2023-12-29 10:48:48,016][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000525_2150400.pth +[2023-12-29 10:48:48,032][02201] Saving new best policy, reward=6.729! +[2023-12-29 10:48:49,624][02214] Updated weights for policy 0, policy_version 710 (0.0039) +[2023-12-29 10:48:52,858][00379] Fps is (10 sec: 3685.8, 60 sec: 3208.4, 300 sec: 3068.5). Total num frames: 2920448. Throughput: 0: 808.7. Samples: 728348. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:48:52,860][00379] Avg episode reward: [(0, '7.124')] +[2023-12-29 10:48:52,870][02201] Saving new best policy, reward=7.124! +[2023-12-29 10:48:57,862][00379] Fps is (10 sec: 3275.0, 60 sec: 3208.3, 300 sec: 3082.4). Total num frames: 2932736. Throughput: 0: 796.3. Samples: 733350. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:48:57,866][00379] Avg episode reward: [(0, '7.395')] +[2023-12-29 10:48:57,878][02201] Saving new best policy, reward=7.395! +[2023-12-29 10:49:02,856][00379] Fps is (10 sec: 2458.0, 60 sec: 3208.5, 300 sec: 3096.3). Total num frames: 2945024. Throughput: 0: 799.5. Samples: 737386. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:49:02,860][00379] Avg episode reward: [(0, '8.145')] +[2023-12-29 10:49:02,889][02201] Saving new best policy, reward=8.145! +[2023-12-29 10:49:02,903][02214] Updated weights for policy 0, policy_version 720 (0.0024) +[2023-12-29 10:49:07,857][00379] Fps is (10 sec: 2868.6, 60 sec: 3140.2, 300 sec: 3110.2). Total num frames: 2961408. Throughput: 0: 803.7. Samples: 739240. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:49:07,864][00379] Avg episode reward: [(0, '8.095')] +[2023-12-29 10:49:12,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3208.5, 300 sec: 3138.0). Total num frames: 2981888. Throughput: 0: 821.2. Samples: 745472. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:49:12,863][00379] Avg episode reward: [(0, '7.792')] +[2023-12-29 10:49:13,718][02214] Updated weights for policy 0, policy_version 730 (0.0028) +[2023-12-29 10:49:17,856][00379] Fps is (10 sec: 3686.6, 60 sec: 3276.8, 300 sec: 3138.0). Total num frames: 2998272. Throughput: 0: 804.3. Samples: 750774. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:49:17,863][00379] Avg episode reward: [(0, '7.426')] +[2023-12-29 10:49:22,860][00379] Fps is (10 sec: 3275.4, 60 sec: 3345.1, 300 sec: 3151.8). Total num frames: 3014656. Throughput: 0: 806.8. Samples: 752722. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:49:22,863][00379] Avg episode reward: [(0, '7.427')] +[2023-12-29 10:49:27,862][00379] Fps is (10 sec: 2865.6, 60 sec: 3208.2, 300 sec: 3124.0). Total num frames: 3026944. Throughput: 0: 811.8. Samples: 756996. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:49:27,871][00379] Avg episode reward: [(0, '7.465')] +[2023-12-29 10:49:28,025][02214] Updated weights for policy 0, policy_version 740 (0.0015) +[2023-12-29 10:49:32,856][00379] Fps is (10 sec: 3688.0, 60 sec: 3276.8, 300 sec: 3138.0). Total num frames: 3051520. Throughput: 0: 831.6. Samples: 762948. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:49:32,863][00379] Avg episode reward: [(0, '7.800')] +[2023-12-29 10:49:37,860][00379] Fps is (10 sec: 4096.8, 60 sec: 3345.2, 300 sec: 3151.8). Total num frames: 3067904. Throughput: 0: 835.0. Samples: 765924. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:49:37,862][00379] Avg episode reward: [(0, '7.765')] +[2023-12-29 10:49:38,106][02214] Updated weights for policy 0, policy_version 750 (0.0017) +[2023-12-29 10:49:42,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3165.7). Total num frames: 3084288. Throughput: 0: 827.3. Samples: 770572. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:49:42,859][00379] Avg episode reward: [(0, '8.303')] +[2023-12-29 10:49:42,863][02201] Saving new best policy, reward=8.303! +[2023-12-29 10:49:47,856][00379] Fps is (10 sec: 2868.3, 60 sec: 3276.8, 300 sec: 3151.8). Total num frames: 3096576. Throughput: 0: 828.8. Samples: 774682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:49:47,863][00379] Avg episode reward: [(0, '7.991')] +[2023-12-29 10:49:51,695][02214] Updated weights for policy 0, policy_version 760 (0.0018) +[2023-12-29 10:49:52,856][00379] Fps is (10 sec: 3276.7, 60 sec: 3276.9, 300 sec: 3165.7). Total num frames: 3117056. Throughput: 0: 850.0. Samples: 777488. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:49:52,864][00379] Avg episode reward: [(0, '7.973')] +[2023-12-29 10:49:57,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3413.7, 300 sec: 3193.5). Total num frames: 3137536. Throughput: 0: 844.4. Samples: 783470. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:49:57,858][00379] Avg episode reward: [(0, '7.696')] +[2023-12-29 10:50:02,856][00379] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3193.5). Total num frames: 3149824. Throughput: 0: 829.6. Samples: 788104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:50:02,861][00379] Avg episode reward: [(0, '7.970')] +[2023-12-29 10:50:03,726][02214] Updated weights for policy 0, policy_version 770 (0.0029) +[2023-12-29 10:50:07,857][00379] Fps is (10 sec: 2457.3, 60 sec: 3345.0, 300 sec: 3165.7). Total num frames: 3162112. Throughput: 0: 828.8. Samples: 790016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:50:07,861][00379] Avg episode reward: [(0, '8.461')] +[2023-12-29 10:50:07,874][02201] Saving new best policy, reward=8.461! +[2023-12-29 10:50:12,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3165.8). Total num frames: 3182592. Throughput: 0: 838.4. Samples: 794718. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:50:12,862][00379] Avg episode reward: [(0, '9.253')] +[2023-12-29 10:50:12,865][02201] Saving new best policy, reward=9.253! +[2023-12-29 10:50:15,525][02214] Updated weights for policy 0, policy_version 780 (0.0029) +[2023-12-29 10:50:17,856][00379] Fps is (10 sec: 4096.5, 60 sec: 3413.3, 300 sec: 3193.5). Total num frames: 3203072. Throughput: 0: 850.5. Samples: 801220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:50:17,860][00379] Avg episode reward: [(0, '10.000')] +[2023-12-29 10:50:17,881][02201] Saving new best policy, reward=10.000! +[2023-12-29 10:50:22,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3413.6, 300 sec: 3207.4). Total num frames: 3219456. Throughput: 0: 842.2. Samples: 803820. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:50:22,860][00379] Avg episode reward: [(0, '9.565')] +[2023-12-29 10:50:27,856][00379] Fps is (10 sec: 2867.1, 60 sec: 3413.6, 300 sec: 3179.6). Total num frames: 3231744. Throughput: 0: 828.9. Samples: 807874. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:50:27,865][00379] Avg episode reward: [(0, '9.017')] +[2023-12-29 10:50:28,594][02214] Updated weights for policy 0, policy_version 790 (0.0016) +[2023-12-29 10:50:32,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3179.6). Total num frames: 3248128. Throughput: 0: 841.5. Samples: 812548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:50:32,859][00379] Avg episode reward: [(0, '8.987')] +[2023-12-29 10:50:37,856][00379] Fps is (10 sec: 3686.5, 60 sec: 3345.3, 300 sec: 3207.4). Total num frames: 3268608. Throughput: 0: 848.1. Samples: 815654. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-12-29 10:50:37,859][00379] Avg episode reward: [(0, '9.199')] +[2023-12-29 10:50:39,565][02214] Updated weights for policy 0, policy_version 800 (0.0026) +[2023-12-29 10:50:42,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3221.3). Total num frames: 3284992. Throughput: 0: 839.0. Samples: 821226. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:50:42,863][00379] Avg episode reward: [(0, '9.971')] +[2023-12-29 10:50:47,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3276.8, 300 sec: 3207.4). Total num frames: 3293184. Throughput: 0: 800.4. Samples: 824124. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:50:47,861][00379] Avg episode reward: [(0, '10.161')] +[2023-12-29 10:50:47,876][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000804_3293184.pth... +[2023-12-29 10:50:48,135][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth +[2023-12-29 10:50:48,157][02201] Saving new best policy, reward=10.161! +[2023-12-29 10:50:52,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3193.5). Total num frames: 3309568. Throughput: 0: 797.0. Samples: 825882. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:50:52,863][00379] Avg episode reward: [(0, '10.424')] +[2023-12-29 10:50:52,872][02201] Saving new best policy, reward=10.424! +[2023-12-29 10:50:54,928][02214] Updated weights for policy 0, policy_version 810 (0.0020) +[2023-12-29 10:50:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3140.3, 300 sec: 3179.6). Total num frames: 3325952. Throughput: 0: 814.7. Samples: 831380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:50:57,858][00379] Avg episode reward: [(0, '10.294')] +[2023-12-29 10:51:02,858][00379] Fps is (10 sec: 3685.8, 60 sec: 3276.7, 300 sec: 3207.4). Total num frames: 3346432. Throughput: 0: 802.5. Samples: 837336. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:51:02,865][00379] Avg episode reward: [(0, '10.556')] +[2023-12-29 10:51:02,868][02201] Saving new best policy, reward=10.556! +[2023-12-29 10:51:06,814][02214] Updated weights for policy 0, policy_version 820 (0.0016) +[2023-12-29 10:51:07,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3276.9, 300 sec: 3207.4). Total num frames: 3358720. Throughput: 0: 786.5. Samples: 839214. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:51:07,859][00379] Avg episode reward: [(0, '10.063')] +[2023-12-29 10:51:12,857][00379] Fps is (10 sec: 2867.5, 60 sec: 3208.5, 300 sec: 3193.5). Total num frames: 3375104. Throughput: 0: 787.4. Samples: 843306. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:51:12,859][00379] Avg episode reward: [(0, '9.784')] +[2023-12-29 10:51:17,861][00379] Fps is (10 sec: 3684.6, 60 sec: 3208.3, 300 sec: 3207.3). Total num frames: 3395584. Throughput: 0: 819.6. Samples: 849432. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:17,864][00379] Avg episode reward: [(0, '9.514')] +[2023-12-29 10:51:18,476][02214] Updated weights for policy 0, policy_version 830 (0.0018) +[2023-12-29 10:51:22,856][00379] Fps is (10 sec: 4096.2, 60 sec: 3276.8, 300 sec: 3235.1). Total num frames: 3416064. Throughput: 0: 813.7. Samples: 852270. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:22,858][00379] Avg episode reward: [(0, '9.810')] +[2023-12-29 10:51:27,860][00379] Fps is (10 sec: 3277.2, 60 sec: 3276.6, 300 sec: 3235.1). Total num frames: 3428352. Throughput: 0: 800.9. Samples: 857270. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:27,862][00379] Avg episode reward: [(0, '10.421')] +[2023-12-29 10:51:31,401][02214] Updated weights for policy 0, policy_version 840 (0.0020) +[2023-12-29 10:51:32,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3208.5, 300 sec: 3221.3). Total num frames: 3440640. Throughput: 0: 825.8. Samples: 861286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:51:32,863][00379] Avg episode reward: [(0, '11.620')] +[2023-12-29 10:51:32,866][02201] Saving new best policy, reward=11.620! +[2023-12-29 10:51:37,856][00379] Fps is (10 sec: 3278.0, 60 sec: 3208.5, 300 sec: 3235.1). Total num frames: 3461120. Throughput: 0: 836.5. Samples: 863526. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:37,864][00379] Avg episode reward: [(0, '11.429')] +[2023-12-29 10:51:42,250][02214] Updated weights for policy 0, policy_version 850 (0.0026) +[2023-12-29 10:51:42,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3276.8, 300 sec: 3249.0). Total num frames: 3481600. Throughput: 0: 860.3. Samples: 870092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:42,859][00379] Avg episode reward: [(0, '11.769')] +[2023-12-29 10:51:42,869][02201] Saving new best policy, reward=11.769! +[2023-12-29 10:51:47,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3262.9). Total num frames: 3497984. Throughput: 0: 838.0. Samples: 875046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:51:47,858][00379] Avg episode reward: [(0, '11.581')] +[2023-12-29 10:51:52,858][00379] Fps is (10 sec: 2866.6, 60 sec: 3345.0, 300 sec: 3235.1). Total num frames: 3510272. Throughput: 0: 838.6. Samples: 876954. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-12-29 10:51:52,863][00379] Avg episode reward: [(0, '11.387')] +[2023-12-29 10:51:56,544][02214] Updated weights for policy 0, policy_version 860 (0.0046) +[2023-12-29 10:51:57,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3221.3). Total num frames: 3526656. Throughput: 0: 838.1. Samples: 881018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:51:57,863][00379] Avg episode reward: [(0, '11.915')] +[2023-12-29 10:51:57,875][02201] Saving new best policy, reward=11.915! +[2023-12-29 10:52:02,856][00379] Fps is (10 sec: 3687.1, 60 sec: 3345.2, 300 sec: 3235.2). Total num frames: 3547136. Throughput: 0: 839.1. Samples: 887188. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:02,863][00379] Avg episode reward: [(0, '13.204')] +[2023-12-29 10:52:02,869][02201] Saving new best policy, reward=13.204! +[2023-12-29 10:52:06,294][02214] Updated weights for policy 0, policy_version 870 (0.0019) +[2023-12-29 10:52:07,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3276.8). Total num frames: 3567616. Throughput: 0: 846.4. Samples: 890360. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:52:07,863][00379] Avg episode reward: [(0, '13.117')] +[2023-12-29 10:52:12,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3413.4, 300 sec: 3262.9). Total num frames: 3579904. Throughput: 0: 830.6. Samples: 894646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:12,863][00379] Avg episode reward: [(0, '13.490')] +[2023-12-29 10:52:12,864][02201] Saving new best policy, reward=13.490! +[2023-12-29 10:52:17,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3277.1, 300 sec: 3249.0). Total num frames: 3592192. Throughput: 0: 831.2. Samples: 898692. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:52:17,864][00379] Avg episode reward: [(0, '13.456')] +[2023-12-29 10:52:20,369][02214] Updated weights for policy 0, policy_version 880 (0.0026) +[2023-12-29 10:52:22,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3249.1). Total num frames: 3612672. Throughput: 0: 849.0. Samples: 901730. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:22,859][00379] Avg episode reward: [(0, '11.693')] +[2023-12-29 10:52:27,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3413.5, 300 sec: 3276.8). Total num frames: 3633152. Throughput: 0: 848.3. Samples: 908266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:27,858][00379] Avg episode reward: [(0, '11.616')] +[2023-12-29 10:52:30,672][02214] Updated weights for policy 0, policy_version 890 (0.0015) +[2023-12-29 10:52:32,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3290.7). Total num frames: 3649536. Throughput: 0: 844.0. Samples: 913028. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:52:32,867][00379] Avg episode reward: [(0, '11.178')] +[2023-12-29 10:52:37,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3290.7). Total num frames: 3661824. Throughput: 0: 848.3. Samples: 915128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:37,862][00379] Avg episode reward: [(0, '12.219')] +[2023-12-29 10:52:42,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3290.7). Total num frames: 3682304. Throughput: 0: 876.3. Samples: 920450. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:42,859][00379] Avg episode reward: [(0, '13.292')] +[2023-12-29 10:52:42,913][02214] Updated weights for policy 0, policy_version 900 (0.0026) +[2023-12-29 10:52:47,856][00379] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3318.5). Total num frames: 3706880. Throughput: 0: 886.1. Samples: 927064. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-12-29 10:52:47,858][00379] Avg episode reward: [(0, '13.561')] +[2023-12-29 10:52:47,871][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000905_3706880.pth... +[2023-12-29 10:52:47,999][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000708_2899968.pth +[2023-12-29 10:52:48,016][02201] Saving new best policy, reward=13.561! +[2023-12-29 10:52:52,858][00379] Fps is (10 sec: 3685.8, 60 sec: 3481.6, 300 sec: 3318.5). Total num frames: 3719168. Throughput: 0: 867.7. Samples: 929408. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:52,860][00379] Avg episode reward: [(0, '14.480')] +[2023-12-29 10:52:52,865][02201] Saving new best policy, reward=14.480! +[2023-12-29 10:52:54,853][02214] Updated weights for policy 0, policy_version 910 (0.0032) +[2023-12-29 10:52:57,856][00379] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3318.5). Total num frames: 3731456. Throughput: 0: 862.5. Samples: 933458. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:52:57,862][00379] Avg episode reward: [(0, '14.448')] +[2023-12-29 10:53:02,856][00379] Fps is (10 sec: 3687.0, 60 sec: 3481.6, 300 sec: 3332.3). Total num frames: 3756032. Throughput: 0: 896.7. Samples: 939042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-12-29 10:53:02,863][00379] Avg episode reward: [(0, '14.521')] +[2023-12-29 10:53:02,865][02201] Saving new best policy, reward=14.521! +[2023-12-29 10:53:05,489][02214] Updated weights for policy 0, policy_version 920 (0.0022) +[2023-12-29 10:53:07,856][00379] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3346.2). Total num frames: 3776512. Throughput: 0: 901.2. Samples: 942286. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-12-29 10:53:07,859][00379] Avg episode reward: [(0, '14.871')] +[2023-12-29 10:53:07,873][02201] Saving new best policy, reward=14.871! +[2023-12-29 10:53:12,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3360.1). Total num frames: 3792896. Throughput: 0: 881.0. Samples: 947910. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:53:12,864][00379] Avg episode reward: [(0, '14.794')] +[2023-12-29 10:53:17,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3360.2). Total num frames: 3805184. Throughput: 0: 870.7. Samples: 952210. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:53:17,862][00379] Avg episode reward: [(0, '13.803')] +[2023-12-29 10:53:18,239][02214] Updated weights for policy 0, policy_version 930 (0.0036) +[2023-12-29 10:53:22,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3360.1). Total num frames: 3825664. Throughput: 0: 877.5. Samples: 954614. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:53:22,859][00379] Avg episode reward: [(0, '13.080')] +[2023-12-29 10:53:27,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3360.1). Total num frames: 3846144. Throughput: 0: 906.1. Samples: 961226. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:53:27,859][00379] Avg episode reward: [(0, '13.810')] +[2023-12-29 10:53:28,185][02214] Updated weights for policy 0, policy_version 940 (0.0021) +[2023-12-29 10:53:32,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3374.1). Total num frames: 3862528. Throughput: 0: 880.4. Samples: 966680. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:53:32,864][00379] Avg episode reward: [(0, '14.306')] +[2023-12-29 10:53:37,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3374.0). Total num frames: 3878912. Throughput: 0: 875.2. Samples: 968790. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:53:37,859][00379] Avg episode reward: [(0, '15.120')] +[2023-12-29 10:53:37,876][02201] Saving new best policy, reward=15.120! +[2023-12-29 10:53:41,172][02214] Updated weights for policy 0, policy_version 950 (0.0024) +[2023-12-29 10:53:42,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3374.0). Total num frames: 3895296. Throughput: 0: 890.4. Samples: 973528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-12-29 10:53:42,861][00379] Avg episode reward: [(0, '16.992')] +[2023-12-29 10:53:42,863][02201] Saving new best policy, reward=16.992! +[2023-12-29 10:53:47,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3374.0). Total num frames: 3915776. Throughput: 0: 911.3. Samples: 980052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-12-29 10:53:47,861][00379] Avg episode reward: [(0, '17.666')] +[2023-12-29 10:53:47,953][02201] Saving new best policy, reward=17.666! +[2023-12-29 10:53:51,306][02214] Updated weights for policy 0, policy_version 960 (0.0025) +[2023-12-29 10:53:52,856][00379] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3401.8). Total num frames: 3936256. Throughput: 0: 905.6. Samples: 983036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-12-29 10:53:52,863][00379] Avg episode reward: [(0, '17.907')] +[2023-12-29 10:53:52,871][02201] Saving new best policy, reward=17.907! +[2023-12-29 10:53:57,856][00379] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3401.8). Total num frames: 3948544. Throughput: 0: 869.1. Samples: 987018. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:53:57,862][00379] Avg episode reward: [(0, '18.345')] +[2023-12-29 10:53:57,877][02201] Saving new best policy, reward=18.345! +[2023-12-29 10:54:02,856][00379] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 3964928. Throughput: 0: 882.1. Samples: 991906. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:54:02,859][00379] Avg episode reward: [(0, '18.135')] +[2023-12-29 10:54:04,026][02214] Updated weights for policy 0, policy_version 970 (0.0022) +[2023-12-29 10:54:07,856][00379] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 3985408. Throughput: 0: 901.6. Samples: 995184. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-12-29 10:54:07,867][00379] Avg episode reward: [(0, '16.248')] +[2023-12-29 10:54:12,445][02201] Stopping Batcher_0... +[2023-12-29 10:54:12,446][02201] Loop batcher_evt_loop terminating... +[2023-12-29 10:54:12,445][00379] Component Batcher_0 stopped! +[2023-12-29 10:54:12,455][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-12-29 10:54:12,540][02220] Stopping RolloutWorker_w6... +[2023-12-29 10:54:12,540][00379] Component RolloutWorker_w6 stopped! +[2023-12-29 10:54:12,541][02220] Loop rollout_proc6_evt_loop terminating... +[2023-12-29 10:54:12,553][02218] Stopping RolloutWorker_w2... +[2023-12-29 10:54:12,553][00379] Component RolloutWorker_w2 stopped! +[2023-12-29 10:54:12,558][02218] Loop rollout_proc2_evt_loop terminating... +[2023-12-29 10:54:12,578][00379] Component RolloutWorker_w4 stopped! +[2023-12-29 10:54:12,584][02219] Stopping RolloutWorker_w4... +[2023-12-29 10:54:12,585][02219] Loop rollout_proc4_evt_loop terminating... +[2023-12-29 10:54:12,587][02221] Stopping RolloutWorker_w5... +[2023-12-29 10:54:12,587][02221] Loop rollout_proc5_evt_loop terminating... +[2023-12-29 10:54:12,587][00379] Component RolloutWorker_w5 stopped! +[2023-12-29 10:54:12,619][00379] Component RolloutWorker_w3 stopped! +[2023-12-29 10:54:12,622][02217] Stopping RolloutWorker_w3... +[2023-12-29 10:54:12,624][02214] Weights refcount: 2 0 +[2023-12-29 10:54:12,624][02217] Loop rollout_proc3_evt_loop terminating... +[2023-12-29 10:54:12,627][00379] Component InferenceWorker_p0-w0 stopped! +[2023-12-29 10:54:12,630][02214] Stopping InferenceWorker_p0-w0... +[2023-12-29 10:54:12,631][02214] Loop inference_proc0-0_evt_loop terminating... +[2023-12-29 10:54:12,650][02215] Stopping RolloutWorker_w0... +[2023-12-29 10:54:12,650][02201] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000804_3293184.pth +[2023-12-29 10:54:12,651][00379] Component RolloutWorker_w0 stopped! +[2023-12-29 10:54:12,657][02215] Loop rollout_proc0_evt_loop terminating... +[2023-12-29 10:54:12,665][02201] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-12-29 10:54:12,667][00379] Component RolloutWorker_w7 stopped! +[2023-12-29 10:54:12,671][02222] Stopping RolloutWorker_w7... +[2023-12-29 10:54:12,677][00379] Component RolloutWorker_w1 stopped! +[2023-12-29 10:54:12,678][02216] Stopping RolloutWorker_w1... +[2023-12-29 10:54:12,679][02216] Loop rollout_proc1_evt_loop terminating... +[2023-12-29 10:54:12,685][02222] Loop rollout_proc7_evt_loop terminating... +[2023-12-29 10:54:12,840][02201] Stopping LearnerWorker_p0... +[2023-12-29 10:54:12,840][02201] Loop learner_proc0_evt_loop terminating... +[2023-12-29 10:54:12,842][00379] Component LearnerWorker_p0 stopped! +[2023-12-29 10:54:12,847][00379] Waiting for process learner_proc0 to stop... +[2023-12-29 10:54:14,999][00379] Waiting for process inference_proc0-0 to join... +[2023-12-29 10:54:15,146][00379] Waiting for process rollout_proc0 to join... +[2023-12-29 10:54:17,297][00379] Waiting for process rollout_proc1 to join... +[2023-12-29 10:54:17,465][00379] Waiting for process rollout_proc2 to join... +[2023-12-29 10:54:17,468][00379] Waiting for process rollout_proc3 to join... +[2023-12-29 10:54:17,474][00379] Waiting for process rollout_proc4 to join... +[2023-12-29 10:54:17,476][00379] Waiting for process rollout_proc5 to join... +[2023-12-29 10:54:17,477][00379] Waiting for process rollout_proc6 to join... +[2023-12-29 10:54:17,479][00379] Waiting for process rollout_proc7 to join... +[2023-12-29 10:54:17,480][00379] Batcher 0 profile tree view: +batching: 28.6410, releasing_batches: 0.0293 +[2023-12-29 10:54:17,482][00379] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0009 + wait_policy_total: 587.7447 +update_model: 9.2862 + weight_update: 0.0019 +one_step: 0.0066 + handle_policy_step: 606.5250 + deserialize: 16.2744, stack: 3.2899, obs_to_device_normalize: 121.2973, forward: 324.1769, send_messages: 28.6468 + prepare_outputs: 81.3767 + to_cpu: 46.2605 +[2023-12-29 10:54:17,483][00379] Learner 0 profile tree view: +misc: 0.0062, prepare_batch: 14.0496 +train: 76.9981 + epoch_init: 0.0064, minibatch_init: 0.0071, losses_postprocess: 0.6673, kl_divergence: 0.6176, after_optimizer: 35.0777 + calculate_losses: 27.9639 + losses_init: 0.0040, forward_head: 1.3533, bptt_initial: 18.5089, tail: 1.0914, advantages_returns: 0.2690, losses: 4.2898 + bptt: 2.1374 + bptt_forward_core: 2.0460 + update: 12.0040 + clip: 1.0035 +[2023-12-29 10:54:17,485][00379] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.4815, enqueue_policy_requests: 171.9884, env_step: 934.4311, overhead: 26.1479, complete_rollouts: 8.0881 +save_policy_outputs: 22.4042 + split_output_tensors: 10.8226 +[2023-12-29 10:54:17,486][00379] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3767, enqueue_policy_requests: 175.1907, env_step: 931.4306, overhead: 25.2515, complete_rollouts: 8.2427 +save_policy_outputs: 22.4749 + split_output_tensors: 10.5720 +[2023-12-29 10:54:17,487][00379] Loop Runner_EvtLoop terminating... +[2023-12-29 10:54:17,489][00379] Runner profile tree view: +main_loop: 1283.0022 +[2023-12-29 10:54:17,490][00379] Collected {0: 4005888}, FPS: 3122.3 +[2023-12-29 10:57:12,823][00379] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-12-29 10:57:12,824][00379] Overriding arg 'num_workers' with value 1 passed from command line +[2023-12-29 10:57:12,828][00379] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-12-29 10:57:12,830][00379] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-12-29 10:57:12,832][00379] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-12-29 10:57:12,834][00379] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-12-29 10:57:12,835][00379] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-12-29 10:57:12,837][00379] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-12-29 10:57:12,841][00379] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-12-29 10:57:12,842][00379] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-12-29 10:57:12,844][00379] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-12-29 10:57:12,845][00379] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-12-29 10:57:12,846][00379] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-12-29 10:57:12,847][00379] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-12-29 10:57:12,849][00379] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-12-29 10:57:12,888][00379] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-12-29 10:57:12,892][00379] RunningMeanStd input shape: (3, 72, 128) +[2023-12-29 10:57:12,893][00379] RunningMeanStd input shape: (1,) +[2023-12-29 10:57:12,910][00379] ConvEncoder: input_channels=3 +[2023-12-29 10:57:13,022][00379] Conv encoder output size: 512 +[2023-12-29 10:57:13,024][00379] Policy head output size: 512 +[2023-12-29 10:57:13,304][00379] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-12-29 10:57:14,015][00379] Num frames 100... +[2023-12-29 10:57:14,154][00379] Num frames 200... +[2023-12-29 10:57:14,284][00379] Num frames 300... +[2023-12-29 10:57:14,414][00379] Num frames 400... +[2023-12-29 10:57:14,541][00379] Num frames 500... +[2023-12-29 10:57:14,664][00379] Num frames 600... +[2023-12-29 10:57:14,788][00379] Num frames 700... +[2023-12-29 10:57:14,918][00379] Num frames 800... +[2023-12-29 10:57:15,018][00379] Avg episode rewards: #0: 20.320, true rewards: #0: 8.320 +[2023-12-29 10:57:15,020][00379] Avg episode reward: 20.320, avg true_objective: 8.320 +[2023-12-29 10:57:15,115][00379] Num frames 900... +[2023-12-29 10:57:15,244][00379] Num frames 1000... +[2023-12-29 10:57:15,371][00379] Num frames 1100... +[2023-12-29 10:57:15,496][00379] Num frames 1200... +[2023-12-29 10:57:15,620][00379] Num frames 1300... +[2023-12-29 10:57:15,743][00379] Num frames 1400... +[2023-12-29 10:57:15,872][00379] Num frames 1500... +[2023-12-29 10:57:15,968][00379] Avg episode rewards: #0: 16.645, true rewards: #0: 7.645 +[2023-12-29 10:57:15,970][00379] Avg episode reward: 16.645, avg true_objective: 7.645 +[2023-12-29 10:57:16,065][00379] Num frames 1600... +[2023-12-29 10:57:16,194][00379] Num frames 1700... +[2023-12-29 10:57:16,321][00379] Num frames 1800... +[2023-12-29 10:57:16,448][00379] Num frames 1900... +[2023-12-29 10:57:16,571][00379] Num frames 2000... +[2023-12-29 10:57:16,698][00379] Num frames 2100... +[2023-12-29 10:57:16,831][00379] Num frames 2200... +[2023-12-29 10:57:16,933][00379] Avg episode rewards: #0: 15.110, true rewards: #0: 7.443 +[2023-12-29 10:57:16,935][00379] Avg episode reward: 15.110, avg true_objective: 7.443 +[2023-12-29 10:57:17,024][00379] Num frames 2300... +[2023-12-29 10:57:17,162][00379] Num frames 2400... +[2023-12-29 10:57:17,288][00379] Num frames 2500... +[2023-12-29 10:57:17,419][00379] Num frames 2600... +[2023-12-29 10:57:17,552][00379] Num frames 2700... +[2023-12-29 10:57:17,682][00379] Num frames 2800... +[2023-12-29 10:57:17,810][00379] Num frames 2900... +[2023-12-29 10:57:17,913][00379] Avg episode rewards: #0: 14.093, true rewards: #0: 7.342 +[2023-12-29 10:57:17,915][00379] Avg episode reward: 14.093, avg true_objective: 7.342 +[2023-12-29 10:57:17,996][00379] Num frames 3000... +[2023-12-29 10:57:18,135][00379] Num frames 3100... +[2023-12-29 10:57:18,262][00379] Num frames 3200... +[2023-12-29 10:57:18,387][00379] Num frames 3300... +[2023-12-29 10:57:18,511][00379] Num frames 3400... +[2023-12-29 10:57:18,667][00379] Avg episode rewards: #0: 12.762, true rewards: #0: 6.962 +[2023-12-29 10:57:18,668][00379] Avg episode reward: 12.762, avg true_objective: 6.962 +[2023-12-29 10:57:18,695][00379] Num frames 3500... +[2023-12-29 10:57:18,822][00379] Num frames 3600... +[2023-12-29 10:57:18,958][00379] Num frames 3700... +[2023-12-29 10:57:19,086][00379] Num frames 3800... +[2023-12-29 10:57:19,219][00379] Num frames 3900... +[2023-12-29 10:57:19,343][00379] Num frames 4000... +[2023-12-29 10:57:19,474][00379] Num frames 4100... +[2023-12-29 10:57:19,599][00379] Num frames 4200... +[2023-12-29 10:57:19,722][00379] Num frames 4300... +[2023-12-29 10:57:19,847][00379] Num frames 4400... +[2023-12-29 10:57:19,982][00379] Num frames 4500... +[2023-12-29 10:57:20,111][00379] Num frames 4600... +[2023-12-29 10:57:20,246][00379] Num frames 4700... +[2023-12-29 10:57:20,381][00379] Num frames 4800... +[2023-12-29 10:57:20,522][00379] Num frames 4900... +[2023-12-29 10:57:20,652][00379] Num frames 5000... +[2023-12-29 10:57:20,780][00379] Num frames 5100... +[2023-12-29 10:57:20,911][00379] Num frames 5200... +[2023-12-29 10:57:21,056][00379] Num frames 5300... +[2023-12-29 10:57:21,189][00379] Num frames 5400... +[2023-12-29 10:57:21,316][00379] Num frames 5500... +[2023-12-29 10:57:21,411][00379] Avg episode rewards: #0: 19.215, true rewards: #0: 9.215 +[2023-12-29 10:57:21,412][00379] Avg episode reward: 19.215, avg true_objective: 9.215 +[2023-12-29 10:57:21,503][00379] Num frames 5600... +[2023-12-29 10:57:21,626][00379] Num frames 5700... +[2023-12-29 10:57:21,760][00379] Num frames 5800... +[2023-12-29 10:57:21,947][00379] Num frames 5900... +[2023-12-29 10:57:22,033][00379] Avg episode rewards: #0: 17.018, true rewards: #0: 8.447 +[2023-12-29 10:57:22,035][00379] Avg episode reward: 17.018, avg true_objective: 8.447 +[2023-12-29 10:57:22,206][00379] Num frames 6000... +[2023-12-29 10:57:22,387][00379] Num frames 6100... +[2023-12-29 10:57:22,562][00379] Num frames 6200... +[2023-12-29 10:57:22,743][00379] Num frames 6300... +[2023-12-29 10:57:22,926][00379] Num frames 6400... +[2023-12-29 10:57:23,087][00379] Avg episode rewards: #0: 15.821, true rewards: #0: 8.071 +[2023-12-29 10:57:23,089][00379] Avg episode reward: 15.821, avg true_objective: 8.071 +[2023-12-29 10:57:23,168][00379] Num frames 6500... +[2023-12-29 10:57:23,351][00379] Num frames 6600... +[2023-12-29 10:57:23,535][00379] Num frames 6700... +[2023-12-29 10:57:23,714][00379] Num frames 6800... +[2023-12-29 10:57:23,902][00379] Num frames 6900... +[2023-12-29 10:57:24,095][00379] Num frames 7000... +[2023-12-29 10:57:24,301][00379] Num frames 7100... +[2023-12-29 10:57:24,486][00379] Num frames 7200... +[2023-12-29 10:57:24,673][00379] Num frames 7300... +[2023-12-29 10:57:24,810][00379] Num frames 7400... +[2023-12-29 10:57:24,947][00379] Num frames 7500... +[2023-12-29 10:57:25,081][00379] Num frames 7600... +[2023-12-29 10:57:25,209][00379] Num frames 7700... +[2023-12-29 10:57:25,313][00379] Avg episode rewards: #0: 17.374, true rewards: #0: 8.597 +[2023-12-29 10:57:25,315][00379] Avg episode reward: 17.374, avg true_objective: 8.597 +[2023-12-29 10:57:25,398][00379] Num frames 7800... +[2023-12-29 10:57:25,526][00379] Num frames 7900... +[2023-12-29 10:57:25,655][00379] Num frames 8000... +[2023-12-29 10:57:25,784][00379] Num frames 8100... +[2023-12-29 10:57:25,917][00379] Num frames 8200... +[2023-12-29 10:57:26,047][00379] Num frames 8300... +[2023-12-29 10:57:26,182][00379] Num frames 8400... +[2023-12-29 10:57:26,315][00379] Num frames 8500... +[2023-12-29 10:57:26,454][00379] Num frames 8600... +[2023-12-29 10:57:26,585][00379] Num frames 8700... +[2023-12-29 10:57:26,715][00379] Num frames 8800... +[2023-12-29 10:57:26,856][00379] Num frames 8900... +[2023-12-29 10:57:26,952][00379] Avg episode rewards: #0: 18.327, true rewards: #0: 8.927 +[2023-12-29 10:57:26,955][00379] Avg episode reward: 18.327, avg true_objective: 8.927 +[2023-12-29 10:58:20,698][00379] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-12-29 10:58:56,186][00379] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-12-29 10:58:56,188][00379] Overriding arg 'num_workers' with value 1 passed from command line +[2023-12-29 10:58:56,190][00379] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-12-29 10:58:56,193][00379] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-12-29 10:58:56,196][00379] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-12-29 10:58:56,197][00379] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-12-29 10:58:56,199][00379] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-12-29 10:58:56,201][00379] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-12-29 10:58:56,204][00379] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-12-29 10:58:56,205][00379] Adding new argument 'hf_repository'='VinayHajare/vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-12-29 10:58:56,207][00379] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-12-29 10:58:56,208][00379] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-12-29 10:58:56,209][00379] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-12-29 10:58:56,210][00379] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-12-29 10:58:56,211][00379] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-12-29 10:58:56,246][00379] RunningMeanStd input shape: (3, 72, 128) +[2023-12-29 10:58:56,248][00379] RunningMeanStd input shape: (1,) +[2023-12-29 10:58:56,260][00379] ConvEncoder: input_channels=3 +[2023-12-29 10:58:56,300][00379] Conv encoder output size: 512 +[2023-12-29 10:58:56,302][00379] Policy head output size: 512 +[2023-12-29 10:58:56,322][00379] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-12-29 10:58:56,718][00379] Num frames 100... +[2023-12-29 10:58:56,853][00379] Num frames 200... +[2023-12-29 10:58:56,987][00379] Num frames 300... +[2023-12-29 10:58:57,110][00379] Num frames 400... +[2023-12-29 10:58:57,238][00379] Num frames 500... +[2023-12-29 10:58:57,365][00379] Num frames 600... +[2023-12-29 10:58:57,491][00379] Num frames 700... +[2023-12-29 10:58:57,622][00379] Num frames 800... +[2023-12-29 10:58:57,752][00379] Num frames 900... +[2023-12-29 10:58:57,882][00379] Num frames 1000... +[2023-12-29 10:58:58,027][00379] Num frames 1100... +[2023-12-29 10:58:58,160][00379] Num frames 1200... +[2023-12-29 10:58:58,293][00379] Num frames 1300... +[2023-12-29 10:58:58,424][00379] Num frames 1400... +[2023-12-29 10:58:58,551][00379] Num frames 1500... +[2023-12-29 10:58:58,681][00379] Num frames 1600... +[2023-12-29 10:58:58,813][00379] Num frames 1700... +[2023-12-29 10:58:58,955][00379] Num frames 1800... +[2023-12-29 10:58:59,085][00379] Num frames 1900... +[2023-12-29 10:58:59,225][00379] Num frames 2000... +[2023-12-29 10:58:59,388][00379] Avg episode rewards: #0: 60.799, true rewards: #0: 20.800 +[2023-12-29 10:58:59,390][00379] Avg episode reward: 60.799, avg true_objective: 20.800 +[2023-12-29 10:58:59,422][00379] Num frames 2100... +[2023-12-29 10:58:59,568][00379] Num frames 2200... +[2023-12-29 10:58:59,698][00379] Num frames 2300... +[2023-12-29 10:58:59,828][00379] Num frames 2400... +[2023-12-29 10:58:59,962][00379] Num frames 2500... +[2023-12-29 10:59:00,090][00379] Num frames 2600... +[2023-12-29 10:59:00,228][00379] Num frames 2700... +[2023-12-29 10:59:00,366][00379] Num frames 2800... +[2023-12-29 10:59:00,496][00379] Num frames 2900... +[2023-12-29 10:59:00,648][00379] Avg episode rewards: #0: 41.379, true rewards: #0: 14.880 +[2023-12-29 10:59:00,650][00379] Avg episode reward: 41.379, avg true_objective: 14.880 +[2023-12-29 10:59:00,686][00379] Num frames 3000... +[2023-12-29 10:59:00,816][00379] Num frames 3100... +[2023-12-29 10:59:00,945][00379] Num frames 3200... +[2023-12-29 10:59:01,083][00379] Num frames 3300... +[2023-12-29 10:59:01,217][00379] Num frames 3400... +[2023-12-29 10:59:01,348][00379] Num frames 3500... +[2023-12-29 10:59:01,476][00379] Num frames 3600... +[2023-12-29 10:59:01,602][00379] Num frames 3700... +[2023-12-29 10:59:01,714][00379] Avg episode rewards: #0: 32.480, true rewards: #0: 12.480 +[2023-12-29 10:59:01,715][00379] Avg episode reward: 32.480, avg true_objective: 12.480 +[2023-12-29 10:59:01,789][00379] Num frames 3800... +[2023-12-29 10:59:01,919][00379] Num frames 3900... +[2023-12-29 10:59:02,055][00379] Num frames 4000... +[2023-12-29 10:59:02,182][00379] Num frames 4100... +[2023-12-29 10:59:02,312][00379] Num frames 4200... +[2023-12-29 10:59:02,443][00379] Num frames 4300... +[2023-12-29 10:59:02,572][00379] Num frames 4400... +[2023-12-29 10:59:02,698][00379] Num frames 4500... +[2023-12-29 10:59:02,825][00379] Num frames 4600... +[2023-12-29 10:59:02,956][00379] Num frames 4700... +[2023-12-29 10:59:03,094][00379] Num frames 4800... +[2023-12-29 10:59:03,234][00379] Avg episode rewards: #0: 31.160, true rewards: #0: 12.160 +[2023-12-29 10:59:03,236][00379] Avg episode reward: 31.160, avg true_objective: 12.160 +[2023-12-29 10:59:03,293][00379] Num frames 4900... +[2023-12-29 10:59:03,421][00379] Num frames 5000... +[2023-12-29 10:59:03,549][00379] Num frames 5100... +[2023-12-29 10:59:03,677][00379] Num frames 5200... +[2023-12-29 10:59:03,811][00379] Num frames 5300... +[2023-12-29 10:59:03,943][00379] Num frames 5400... +[2023-12-29 10:59:04,084][00379] Num frames 5500... +[2023-12-29 10:59:04,213][00379] Num frames 5600... +[2023-12-29 10:59:04,338][00379] Num frames 5700... +[2023-12-29 10:59:04,469][00379] Avg episode rewards: #0: 28.120, true rewards: #0: 11.520 +[2023-12-29 10:59:04,471][00379] Avg episode reward: 28.120, avg true_objective: 11.520 +[2023-12-29 10:59:04,529][00379] Num frames 5800... +[2023-12-29 10:59:04,655][00379] Num frames 5900... +[2023-12-29 10:59:04,783][00379] Num frames 6000... +[2023-12-29 10:59:04,913][00379] Num frames 6100... +[2023-12-29 10:59:05,068][00379] Avg episode rewards: #0: 24.293, true rewards: #0: 10.293 +[2023-12-29 10:59:05,070][00379] Avg episode reward: 24.293, avg true_objective: 10.293 +[2023-12-29 10:59:05,124][00379] Num frames 6200... +[2023-12-29 10:59:05,310][00379] Num frames 6300... +[2023-12-29 10:59:05,505][00379] Num frames 6400... +[2023-12-29 10:59:05,683][00379] Num frames 6500... +[2023-12-29 10:59:05,864][00379] Num frames 6600... +[2023-12-29 10:59:06,052][00379] Num frames 6700... +[2023-12-29 10:59:06,242][00379] Num frames 6800... +[2023-12-29 10:59:06,427][00379] Num frames 6900... +[2023-12-29 10:59:06,610][00379] Num frames 7000... +[2023-12-29 10:59:06,786][00379] Num frames 7100... +[2023-12-29 10:59:06,968][00379] Num frames 7200... +[2023-12-29 10:59:07,116][00379] Avg episode rewards: #0: 24.214, true rewards: #0: 10.357 +[2023-12-29 10:59:07,118][00379] Avg episode reward: 24.214, avg true_objective: 10.357 +[2023-12-29 10:59:07,225][00379] Num frames 7300... +[2023-12-29 10:59:07,412][00379] Num frames 7400... +[2023-12-29 10:59:07,599][00379] Num frames 7500... +[2023-12-29 10:59:07,779][00379] Num frames 7600... +[2023-12-29 10:59:07,955][00379] Num frames 7700... +[2023-12-29 10:59:08,183][00379] Avg episode rewards: #0: 22.117, true rewards: #0: 9.742 +[2023-12-29 10:59:08,185][00379] Avg episode reward: 22.117, avg true_objective: 9.742 +[2023-12-29 10:59:08,197][00379] Num frames 7800... +[2023-12-29 10:59:08,327][00379] Num frames 7900... +[2023-12-29 10:59:08,452][00379] Num frames 8000... +[2023-12-29 10:59:08,575][00379] Num frames 8100... +[2023-12-29 10:59:08,703][00379] Num frames 8200... +[2023-12-29 10:59:08,829][00379] Num frames 8300... +[2023-12-29 10:59:08,962][00379] Num frames 8400... +[2023-12-29 10:59:09,096][00379] Num frames 8500... +[2023-12-29 10:59:09,232][00379] Num frames 8600... +[2023-12-29 10:59:09,408][00379] Avg episode rewards: #0: 21.656, true rewards: #0: 9.656 +[2023-12-29 10:59:09,410][00379] Avg episode reward: 21.656, avg true_objective: 9.656 +[2023-12-29 10:59:09,426][00379] Num frames 8700... +[2023-12-29 10:59:09,550][00379] Num frames 8800... +[2023-12-29 10:59:09,673][00379] Num frames 8900... +[2023-12-29 10:59:09,797][00379] Num frames 9000... +[2023-12-29 10:59:09,923][00379] Num frames 9100... +[2023-12-29 10:59:10,050][00379] Num frames 9200... +[2023-12-29 10:59:10,181][00379] Num frames 9300... +[2023-12-29 10:59:10,314][00379] Num frames 9400... +[2023-12-29 10:59:10,443][00379] Num frames 9500... +[2023-12-29 10:59:10,569][00379] Num frames 9600... +[2023-12-29 10:59:10,729][00379] Avg episode rewards: #0: 21.382, true rewards: #0: 9.682 +[2023-12-29 10:59:10,731][00379] Avg episode reward: 21.382, avg true_objective: 9.682 +[2023-12-29 11:00:08,724][00379] Replay video saved to /content/train_dir/default_experiment/replay.mp4!