diff --git "a/sf_log.txt" "b/sf_log.txt" new file mode 100644--- /dev/null +++ "b/sf_log.txt" @@ -0,0 +1,1154 @@ +[2024-11-28 15:11:26,079][00190] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-11-28 15:11:26,084][00190] Rollout worker 0 uses device cpu +[2024-11-28 15:11:26,085][00190] Rollout worker 1 uses device cpu +[2024-11-28 15:11:26,086][00190] Rollout worker 2 uses device cpu +[2024-11-28 15:11:26,087][00190] Rollout worker 3 uses device cpu +[2024-11-28 15:11:26,089][00190] Rollout worker 4 uses device cpu +[2024-11-28 15:11:26,090][00190] Rollout worker 5 uses device cpu +[2024-11-28 15:11:26,091][00190] Rollout worker 6 uses device cpu +[2024-11-28 15:11:26,092][00190] Rollout worker 7 uses device cpu +[2024-11-28 15:11:26,241][00190] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-11-28 15:11:26,242][00190] InferenceWorker_p0-w0: min num requests: 2 +[2024-11-28 15:11:26,279][00190] Starting all processes... +[2024-11-28 15:11:26,282][00190] Starting process learner_proc0 +[2024-11-28 15:11:26,330][00190] Starting all processes... +[2024-11-28 15:11:26,337][00190] Starting process inference_proc0-0 +[2024-11-28 15:11:26,337][00190] Starting process rollout_proc0 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc1 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc2 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc3 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc4 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc5 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc6 +[2024-11-28 15:11:26,339][00190] Starting process rollout_proc7 +[2024-11-28 15:11:47,094][02447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-11-28 15:11:47,101][02447] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2024-11-28 15:11:47,162][02447] Num visible devices: 1 +[2024-11-28 15:11:47,224][02447] Starting seed is not provided +[2024-11-28 15:11:47,225][02447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-11-28 15:11:47,226][02447] Initializing actor-critic model on device cuda:0 +[2024-11-28 15:11:47,227][02447] RunningMeanStd input shape: (3, 72, 128) +[2024-11-28 15:11:47,229][00190] Heartbeat connected on Batcher_0 +[2024-11-28 15:11:47,232][02447] RunningMeanStd input shape: (1,) +[2024-11-28 15:11:47,314][02447] ConvEncoder: input_channels=3 +[2024-11-28 15:11:47,362][02472] Worker 7 uses CPU cores [1] +[2024-11-28 15:11:47,420][02467] Worker 4 uses CPU cores [0] +[2024-11-28 15:11:47,485][00190] Heartbeat connected on RolloutWorker_w4 +[2024-11-28 15:11:47,525][00190] Heartbeat connected on RolloutWorker_w7 +[2024-11-28 15:11:47,600][02465] Worker 6 uses CPU cores [0] +[2024-11-28 15:11:47,613][02466] Worker 3 uses CPU cores [1] +[2024-11-28 15:11:47,694][00190] Heartbeat connected on RolloutWorker_w6 +[2024-11-28 15:11:47,698][02463] Worker 2 uses CPU cores [0] +[2024-11-28 15:11:47,700][02462] Worker 0 uses CPU cores [0] +[2024-11-28 15:11:47,715][02460] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-11-28 15:11:47,716][02460] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2024-11-28 15:11:47,732][00190] Heartbeat connected on RolloutWorker_w0 +[2024-11-28 15:11:47,735][00190] Heartbeat connected on RolloutWorker_w2 +[2024-11-28 15:11:47,739][02460] Num visible devices: 1 +[2024-11-28 15:11:47,751][00190] Heartbeat connected on RolloutWorker_w3 +[2024-11-28 15:11:47,761][02461] Worker 1 uses CPU cores [1] +[2024-11-28 15:11:47,770][00190] Heartbeat connected on InferenceWorker_p0-w0 +[2024-11-28 15:11:47,783][02464] Worker 5 uses CPU cores [1] +[2024-11-28 15:11:47,790][00190] Heartbeat connected on RolloutWorker_w1 +[2024-11-28 15:11:47,800][00190] Heartbeat connected on RolloutWorker_w5 +[2024-11-28 15:11:47,857][02447] Conv encoder output size: 512 +[2024-11-28 15:11:47,857][02447] Policy head output size: 512 +[2024-11-28 15:11:47,908][02447] Created Actor Critic model with architecture: +[2024-11-28 15:11:47,908][02447] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-11-28 15:11:48,201][02447] Using optimizer +[2024-11-28 15:11:51,813][02447] No checkpoints found +[2024-11-28 15:11:51,814][02447] Did not load from checkpoint, starting from scratch! +[2024-11-28 15:11:51,814][02447] Initialized policy 0 weights for model version 0 +[2024-11-28 15:11:51,819][02447] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2024-11-28 15:11:51,826][02447] LearnerWorker_p0 finished initialization! +[2024-11-28 15:11:51,827][00190] Heartbeat connected on LearnerWorker_p0 +[2024-11-28 15:11:51,913][02460] RunningMeanStd input shape: (3, 72, 128) +[2024-11-28 15:11:51,914][02460] RunningMeanStd input shape: (1,) +[2024-11-28 15:11:51,926][02460] ConvEncoder: input_channels=3 +[2024-11-28 15:11:52,033][02460] Conv encoder output size: 512 +[2024-11-28 15:11:52,034][02460] Policy head output size: 512 +[2024-11-28 15:11:52,086][00190] Inference worker 0-0 is ready! +[2024-11-28 15:11:52,087][00190] All inference workers are ready! Signal rollout workers to start! +[2024-11-28 15:11:52,286][02465] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,283][02463] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,289][02467] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,291][02462] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,304][02464] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,296][02461] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,308][02472] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:52,310][02466] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:11:53,381][02462] Decorrelating experience for 0 frames... +[2024-11-28 15:11:53,383][02463] Decorrelating experience for 0 frames... +[2024-11-28 15:11:53,759][02464] Decorrelating experience for 0 frames... +[2024-11-28 15:11:53,764][02472] Decorrelating experience for 0 frames... +[2024-11-28 15:11:53,763][02466] Decorrelating experience for 0 frames... +[2024-11-28 15:11:53,783][02462] Decorrelating experience for 32 frames... +[2024-11-28 15:11:54,536][02461] Decorrelating experience for 0 frames... +[2024-11-28 15:11:54,750][02464] Decorrelating experience for 32 frames... +[2024-11-28 15:11:54,752][02472] Decorrelating experience for 32 frames... +[2024-11-28 15:11:54,755][02466] Decorrelating experience for 32 frames... +[2024-11-28 15:11:55,753][02463] Decorrelating experience for 32 frames... +[2024-11-28 15:11:55,757][02462] Decorrelating experience for 64 frames... +[2024-11-28 15:11:55,994][00190] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-11-28 15:11:56,187][02461] Decorrelating experience for 32 frames... +[2024-11-28 15:11:57,092][02472] Decorrelating experience for 64 frames... +[2024-11-28 15:11:57,094][02466] Decorrelating experience for 64 frames... +[2024-11-28 15:11:57,267][02463] Decorrelating experience for 64 frames... +[2024-11-28 15:11:57,546][02465] Decorrelating experience for 0 frames... +[2024-11-28 15:11:57,816][02464] Decorrelating experience for 64 frames... +[2024-11-28 15:11:58,753][02463] Decorrelating experience for 96 frames... +[2024-11-28 15:11:58,997][02462] Decorrelating experience for 96 frames... +[2024-11-28 15:11:59,153][02461] Decorrelating experience for 64 frames... +[2024-11-28 15:11:59,364][02472] Decorrelating experience for 96 frames... +[2024-11-28 15:11:59,374][02466] Decorrelating experience for 96 frames... +[2024-11-28 15:12:00,196][02464] Decorrelating experience for 96 frames... +[2024-11-28 15:12:00,621][02465] Decorrelating experience for 32 frames... +[2024-11-28 15:12:00,833][02461] Decorrelating experience for 96 frames... +[2024-11-28 15:12:00,994][00190] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-11-28 15:12:01,591][02465] Decorrelating experience for 64 frames... +[2024-11-28 15:12:03,726][02465] Decorrelating experience for 96 frames... +[2024-11-28 15:12:04,608][02447] Signal inference workers to stop experience collection... +[2024-11-28 15:12:04,617][02460] InferenceWorker_p0-w0: stopping experience collection +[2024-11-28 15:12:05,994][00190] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 207.2. Samples: 2072. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-11-28 15:12:06,000][00190] Avg episode reward: [(0, '2.372')] +[2024-11-28 15:12:07,425][02447] Signal inference workers to resume experience collection... +[2024-11-28 15:12:07,426][02460] InferenceWorker_p0-w0: resuming experience collection +[2024-11-28 15:12:10,994][00190] Fps is (10 sec: 2048.1, 60 sec: 1365.3, 300 sec: 1365.3). Total num frames: 20480. Throughput: 0: 331.3. Samples: 4970. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2024-11-28 15:12:10,998][00190] Avg episode reward: [(0, '3.505')] +[2024-11-28 15:12:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 32768. Throughput: 0: 363.4. Samples: 7268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:12:15,996][00190] Avg episode reward: [(0, '3.905')] +[2024-11-28 15:12:18,238][02460] Updated weights for policy 0, policy_version 10 (0.0024) +[2024-11-28 15:12:20,994][00190] Fps is (10 sec: 2457.6, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 45056. Throughput: 0: 446.3. Samples: 11158. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:12:20,996][00190] Avg episode reward: [(0, '4.521')] +[2024-11-28 15:12:25,994][00190] Fps is (10 sec: 3276.7, 60 sec: 2184.5, 300 sec: 2184.5). Total num frames: 65536. Throughput: 0: 561.5. Samples: 16844. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:12:25,996][00190] Avg episode reward: [(0, '4.662')] +[2024-11-28 15:12:29,876][02460] Updated weights for policy 0, policy_version 20 (0.0026) +[2024-11-28 15:12:30,994][00190] Fps is (10 sec: 3686.3, 60 sec: 2340.6, 300 sec: 2340.6). Total num frames: 81920. Throughput: 0: 567.3. Samples: 19856. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:12:30,998][00190] Avg episode reward: [(0, '4.594')] +[2024-11-28 15:12:35,994][00190] Fps is (10 sec: 3276.9, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 98304. Throughput: 0: 592.6. Samples: 23702. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:12:35,999][00190] Avg episode reward: [(0, '4.645')] +[2024-11-28 15:12:40,994][00190] Fps is (10 sec: 3686.5, 60 sec: 2639.6, 300 sec: 2639.6). Total num frames: 118784. Throughput: 0: 665.2. Samples: 29934. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:12:40,996][00190] Avg episode reward: [(0, '4.650')] +[2024-11-28 15:12:41,002][02447] Saving new best policy, reward=4.650! +[2024-11-28 15:12:41,475][02460] Updated weights for policy 0, policy_version 30 (0.0014) +[2024-11-28 15:12:45,994][00190] Fps is (10 sec: 4096.0, 60 sec: 2785.3, 300 sec: 2785.3). Total num frames: 139264. Throughput: 0: 733.3. Samples: 33000. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:12:45,999][00190] Avg episode reward: [(0, '4.674')] +[2024-11-28 15:12:46,003][02447] Saving new best policy, reward=4.674! +[2024-11-28 15:12:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 2755.5, 300 sec: 2755.5). Total num frames: 151552. Throughput: 0: 778.8. Samples: 37120. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-11-28 15:12:50,996][00190] Avg episode reward: [(0, '4.569')] +[2024-11-28 15:12:53,883][02460] Updated weights for policy 0, policy_version 40 (0.0023) +[2024-11-28 15:12:55,994][00190] Fps is (10 sec: 2867.2, 60 sec: 2798.9, 300 sec: 2798.9). Total num frames: 167936. Throughput: 0: 836.4. Samples: 42608. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:12:55,998][00190] Avg episode reward: [(0, '4.410')] +[2024-11-28 15:13:00,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3140.3, 300 sec: 2898.7). Total num frames: 188416. Throughput: 0: 854.9. Samples: 45738. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:13:00,999][00190] Avg episode reward: [(0, '4.326')] +[2024-11-28 15:13:05,847][02460] Updated weights for policy 0, policy_version 50 (0.0023) +[2024-11-28 15:13:05,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 2925.7). Total num frames: 204800. Throughput: 0: 873.4. Samples: 50462. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:13:05,999][00190] Avg episode reward: [(0, '4.352')] +[2024-11-28 15:13:10,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 2949.1). Total num frames: 221184. Throughput: 0: 862.2. Samples: 55642. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:13:11,000][00190] Avg episode reward: [(0, '4.492')] +[2024-11-28 15:13:15,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3020.8). Total num frames: 241664. Throughput: 0: 864.4. Samples: 58756. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:13:15,996][00190] Avg episode reward: [(0, '4.507')] +[2024-11-28 15:13:16,241][02460] Updated weights for policy 0, policy_version 60 (0.0014) +[2024-11-28 15:13:20,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3035.9). Total num frames: 258048. Throughput: 0: 895.6. Samples: 64002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:13:20,996][00190] Avg episode reward: [(0, '4.430')] +[2024-11-28 15:13:21,002][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000063_258048.pth... +[2024-11-28 15:13:25,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3049.2). Total num frames: 274432. Throughput: 0: 852.8. Samples: 68308. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:13:25,996][00190] Avg episode reward: [(0, '4.391')] +[2024-11-28 15:13:28,890][02460] Updated weights for policy 0, policy_version 70 (0.0026) +[2024-11-28 15:13:30,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3104.3). Total num frames: 294912. Throughput: 0: 853.7. Samples: 71418. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:13:30,999][00190] Avg episode reward: [(0, '4.440')] +[2024-11-28 15:13:35,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3113.0). Total num frames: 311296. Throughput: 0: 892.1. Samples: 77266. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:13:35,998][00190] Avg episode reward: [(0, '4.405')] +[2024-11-28 15:13:40,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3081.8). Total num frames: 323584. Throughput: 0: 862.0. Samples: 81396. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:13:40,996][00190] Avg episode reward: [(0, '4.226')] +[2024-11-28 15:13:41,012][02460] Updated weights for policy 0, policy_version 80 (0.0014) +[2024-11-28 15:13:45,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3165.1). Total num frames: 348160. Throughput: 0: 861.8. Samples: 84518. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:13:45,998][00190] Avg episode reward: [(0, '4.343')] +[2024-11-28 15:13:50,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3169.9). Total num frames: 364544. Throughput: 0: 893.4. Samples: 90666. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:13:50,999][00190] Avg episode reward: [(0, '4.693')] +[2024-11-28 15:13:51,008][02447] Saving new best policy, reward=4.693! +[2024-11-28 15:13:52,379][02460] Updated weights for policy 0, policy_version 90 (0.0029) +[2024-11-28 15:13:55,994][00190] Fps is (10 sec: 2867.3, 60 sec: 3481.6, 300 sec: 3140.3). Total num frames: 376832. Throughput: 0: 860.2. Samples: 94352. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:13:56,000][00190] Avg episode reward: [(0, '4.539')] +[2024-11-28 15:14:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3178.5). Total num frames: 397312. Throughput: 0: 853.6. Samples: 97166. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:00,996][00190] Avg episode reward: [(0, '4.412')] +[2024-11-28 15:14:03,530][02460] Updated weights for policy 0, policy_version 100 (0.0030) +[2024-11-28 15:14:05,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3213.8). Total num frames: 417792. Throughput: 0: 877.5. Samples: 103488. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:06,003][00190] Avg episode reward: [(0, '4.334')] +[2024-11-28 15:14:10,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3185.8). Total num frames: 430080. Throughput: 0: 882.4. Samples: 108016. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:14:10,999][00190] Avg episode reward: [(0, '4.399')] +[2024-11-28 15:14:15,858][02460] Updated weights for policy 0, policy_version 110 (0.0022) +[2024-11-28 15:14:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3218.3). Total num frames: 450560. Throughput: 0: 862.0. Samples: 110206. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:14:16,000][00190] Avg episode reward: [(0, '4.551')] +[2024-11-28 15:14:20,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3248.6). Total num frames: 471040. Throughput: 0: 869.5. Samples: 116392. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:21,000][00190] Avg episode reward: [(0, '4.575')] +[2024-11-28 15:14:25,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3222.2). Total num frames: 483328. Throughput: 0: 885.8. Samples: 121258. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:14:25,996][00190] Avg episode reward: [(0, '4.600')] +[2024-11-28 15:14:28,659][02460] Updated weights for policy 0, policy_version 120 (0.0023) +[2024-11-28 15:14:30,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3223.9). Total num frames: 499712. Throughput: 0: 857.1. Samples: 123088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:14:30,996][00190] Avg episode reward: [(0, '4.695')] +[2024-11-28 15:14:31,008][02447] Saving new best policy, reward=4.695! +[2024-11-28 15:14:35,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3225.6). Total num frames: 516096. Throughput: 0: 845.6. Samples: 128716. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:35,996][00190] Avg episode reward: [(0, '4.542')] +[2024-11-28 15:14:39,037][02460] Updated weights for policy 0, policy_version 130 (0.0036) +[2024-11-28 15:14:40,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3252.0). Total num frames: 536576. Throughput: 0: 887.0. Samples: 134268. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:40,996][00190] Avg episode reward: [(0, '4.596')] +[2024-11-28 15:14:45,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3228.6). Total num frames: 548864. Throughput: 0: 863.9. Samples: 136042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:14:45,996][00190] Avg episode reward: [(0, '4.636')] +[2024-11-28 15:14:50,994][00190] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3253.4). Total num frames: 569344. Throughput: 0: 841.6. Samples: 141358. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:14:50,997][00190] Avg episode reward: [(0, '4.446')] +[2024-11-28 15:14:51,498][02460] Updated weights for policy 0, policy_version 140 (0.0019) +[2024-11-28 15:14:55,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 589824. Throughput: 0: 876.8. Samples: 147472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:14:56,000][00190] Avg episode reward: [(0, '4.394')] +[2024-11-28 15:15:00,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3254.7). Total num frames: 602112. Throughput: 0: 870.2. Samples: 149366. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:15:01,004][00190] Avg episode reward: [(0, '4.366')] +[2024-11-28 15:15:04,311][02460] Updated weights for policy 0, policy_version 150 (0.0022) +[2024-11-28 15:15:05,994][00190] Fps is (10 sec: 2867.1, 60 sec: 3345.1, 300 sec: 3255.2). Total num frames: 618496. Throughput: 0: 836.0. Samples: 154014. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:15:06,000][00190] Avg episode reward: [(0, '4.457')] +[2024-11-28 15:15:10,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3276.8). Total num frames: 638976. Throughput: 0: 861.3. Samples: 160018. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:15:10,998][00190] Avg episode reward: [(0, '4.550')] +[2024-11-28 15:15:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3256.3). Total num frames: 651264. Throughput: 0: 876.3. Samples: 162520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:15:16,000][00190] Avg episode reward: [(0, '4.633')] +[2024-11-28 15:15:16,099][02460] Updated weights for policy 0, policy_version 160 (0.0018) +[2024-11-28 15:15:20,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3276.8). Total num frames: 671744. Throughput: 0: 840.4. Samples: 166536. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:15:21,000][00190] Avg episode reward: [(0, '4.489')] +[2024-11-28 15:15:21,010][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000164_671744.pth... +[2024-11-28 15:15:25,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3276.8). Total num frames: 688128. Throughput: 0: 845.1. Samples: 172298. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:15:26,000][00190] Avg episode reward: [(0, '4.253')] +[2024-11-28 15:15:27,350][02460] Updated weights for policy 0, policy_version 170 (0.0022) +[2024-11-28 15:15:30,994][00190] Fps is (10 sec: 3276.6, 60 sec: 3413.3, 300 sec: 3276.8). Total num frames: 704512. Throughput: 0: 873.1. Samples: 175332. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:15:30,997][00190] Avg episode reward: [(0, '4.270')] +[2024-11-28 15:15:35,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3276.8). Total num frames: 720896. Throughput: 0: 839.2. Samples: 179120. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:15:35,999][00190] Avg episode reward: [(0, '4.321')] +[2024-11-28 15:15:39,839][02460] Updated weights for policy 0, policy_version 180 (0.0018) +[2024-11-28 15:15:40,996][00190] Fps is (10 sec: 3685.8, 60 sec: 3413.2, 300 sec: 3295.0). Total num frames: 741376. Throughput: 0: 835.0. Samples: 185050. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:15:41,004][00190] Avg episode reward: [(0, '4.445')] +[2024-11-28 15:15:45,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3294.6). Total num frames: 757760. Throughput: 0: 862.3. Samples: 188170. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:15:45,997][00190] Avg episode reward: [(0, '4.385')] +[2024-11-28 15:15:50,996][00190] Fps is (10 sec: 2867.2, 60 sec: 3344.9, 300 sec: 3276.8). Total num frames: 770048. Throughput: 0: 857.4. Samples: 192600. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:15:50,999][00190] Avg episode reward: [(0, '4.392')] +[2024-11-28 15:15:52,241][02460] Updated weights for policy 0, policy_version 190 (0.0013) +[2024-11-28 15:15:55,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3293.9). Total num frames: 790528. Throughput: 0: 842.4. Samples: 197926. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:15:55,996][00190] Avg episode reward: [(0, '4.539')] +[2024-11-28 15:16:00,994][00190] Fps is (10 sec: 4096.8, 60 sec: 3481.6, 300 sec: 3310.2). Total num frames: 811008. Throughput: 0: 857.5. Samples: 201106. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:16:00,996][00190] Avg episode reward: [(0, '4.695')] +[2024-11-28 15:16:03,116][02460] Updated weights for policy 0, policy_version 200 (0.0023) +[2024-11-28 15:16:05,997][00190] Fps is (10 sec: 3275.7, 60 sec: 3413.2, 300 sec: 3293.1). Total num frames: 823296. Throughput: 0: 877.2. Samples: 206014. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:16:06,004][00190] Avg episode reward: [(0, '4.671')] +[2024-11-28 15:16:10,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3308.9). Total num frames: 843776. Throughput: 0: 859.8. Samples: 210988. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:16:10,997][00190] Avg episode reward: [(0, '4.490')] +[2024-11-28 15:16:14,312][02460] Updated weights for policy 0, policy_version 210 (0.0020) +[2024-11-28 15:16:15,994][00190] Fps is (10 sec: 4097.2, 60 sec: 3549.8, 300 sec: 3324.1). Total num frames: 864256. Throughput: 0: 860.9. Samples: 214072. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:16:16,000][00190] Avg episode reward: [(0, '4.466')] +[2024-11-28 15:16:20,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3323.2). Total num frames: 880640. Throughput: 0: 902.4. Samples: 219728. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:16:20,997][00190] Avg episode reward: [(0, '4.581')] +[2024-11-28 15:16:25,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3481.6, 300 sec: 3322.3). Total num frames: 897024. Throughput: 0: 866.1. Samples: 224022. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:16:26,001][00190] Avg episode reward: [(0, '4.392')] +[2024-11-28 15:16:26,709][02460] Updated weights for policy 0, policy_version 220 (0.0013) +[2024-11-28 15:16:30,994][00190] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3336.4). Total num frames: 917504. Throughput: 0: 866.0. Samples: 227138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:16:30,995][00190] Avg episode reward: [(0, '4.375')] +[2024-11-28 15:16:35,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3335.3). Total num frames: 933888. Throughput: 0: 903.0. Samples: 233232. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:16:36,001][00190] Avg episode reward: [(0, '4.478')] +[2024-11-28 15:16:38,616][02460] Updated weights for policy 0, policy_version 230 (0.0015) +[2024-11-28 15:16:40,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.5, 300 sec: 3319.9). Total num frames: 946176. Throughput: 0: 868.3. Samples: 237000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:16:40,996][00190] Avg episode reward: [(0, '4.553')] +[2024-11-28 15:16:45,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3481.6, 300 sec: 3333.3). Total num frames: 966656. Throughput: 0: 865.3. Samples: 240044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:16:46,001][00190] Avg episode reward: [(0, '4.597')] +[2024-11-28 15:16:49,147][02460] Updated weights for policy 0, policy_version 240 (0.0018) +[2024-11-28 15:16:50,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.3, 300 sec: 3346.2). Total num frames: 987136. Throughput: 0: 891.4. Samples: 246122. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:16:50,996][00190] Avg episode reward: [(0, '4.643')] +[2024-11-28 15:16:55,995][00190] Fps is (10 sec: 3276.5, 60 sec: 3481.5, 300 sec: 3387.9). Total num frames: 999424. Throughput: 0: 868.8. Samples: 250086. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:16:55,997][00190] Avg episode reward: [(0, '4.681')] +[2024-11-28 15:17:00,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 1015808. Throughput: 0: 853.8. Samples: 252494. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:17:01,000][00190] Avg episode reward: [(0, '4.379')] +[2024-11-28 15:17:02,056][02460] Updated weights for policy 0, policy_version 250 (0.0026) +[2024-11-28 15:17:05,994][00190] Fps is (10 sec: 3686.7, 60 sec: 3550.0, 300 sec: 3443.4). Total num frames: 1036288. Throughput: 0: 863.6. Samples: 258588. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:17:05,996][00190] Avg episode reward: [(0, '4.458')] +[2024-11-28 15:17:10,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1052672. Throughput: 0: 869.6. Samples: 263152. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:17:10,997][00190] Avg episode reward: [(0, '4.552')] +[2024-11-28 15:17:14,557][02460] Updated weights for policy 0, policy_version 260 (0.0017) +[2024-11-28 15:17:15,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3413.4, 300 sec: 3471.2). Total num frames: 1069056. Throughput: 0: 845.1. Samples: 265166. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:17:15,996][00190] Avg episode reward: [(0, '4.372')] +[2024-11-28 15:17:20,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 1089536. Throughput: 0: 845.5. Samples: 271278. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:17:20,996][00190] Avg episode reward: [(0, '4.475')] +[2024-11-28 15:17:21,008][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000266_1089536.pth... +[2024-11-28 15:17:21,115][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000063_258048.pth +[2024-11-28 15:17:25,995][00190] Fps is (10 sec: 3276.4, 60 sec: 3413.3, 300 sec: 3457.3). Total num frames: 1101824. Throughput: 0: 873.4. Samples: 276302. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:17:26,001][00190] Avg episode reward: [(0, '4.486')] +[2024-11-28 15:17:26,380][02460] Updated weights for policy 0, policy_version 270 (0.0018) +[2024-11-28 15:17:30,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3457.3). Total num frames: 1118208. Throughput: 0: 844.7. Samples: 278056. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:17:30,996][00190] Avg episode reward: [(0, '4.714')] +[2024-11-28 15:17:31,010][02447] Saving new best policy, reward=4.714! +[2024-11-28 15:17:35,994][00190] Fps is (10 sec: 2867.5, 60 sec: 3276.8, 300 sec: 3429.5). Total num frames: 1130496. Throughput: 0: 799.6. Samples: 282106. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:17:35,996][00190] Avg episode reward: [(0, '4.727')] +[2024-11-28 15:17:36,012][02447] Saving new best policy, reward=4.727! +[2024-11-28 15:17:39,214][02460] Updated weights for policy 0, policy_version 280 (0.0025) +[2024-11-28 15:17:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1150976. Throughput: 0: 842.5. Samples: 287998. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:17:40,999][00190] Avg episode reward: [(0, '4.509')] +[2024-11-28 15:17:45,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3429.5). Total num frames: 1163264. Throughput: 0: 829.6. Samples: 289828. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:17:46,000][00190] Avg episode reward: [(0, '4.421')] +[2024-11-28 15:17:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3443.4). Total num frames: 1183744. Throughput: 0: 810.3. Samples: 295050. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:17:51,000][00190] Avg episode reward: [(0, '4.455')] +[2024-11-28 15:17:51,622][02460] Updated weights for policy 0, policy_version 290 (0.0019) +[2024-11-28 15:17:55,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 1204224. Throughput: 0: 843.3. Samples: 301102. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:17:56,001][00190] Avg episode reward: [(0, '4.431')] +[2024-11-28 15:18:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 1216512. Throughput: 0: 846.5. Samples: 303258. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:18:00,999][00190] Avg episode reward: [(0, '4.603')] +[2024-11-28 15:18:03,728][02460] Updated weights for policy 0, policy_version 300 (0.0014) +[2024-11-28 15:18:05,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 1236992. Throughput: 0: 812.6. Samples: 307844. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-11-28 15:18:06,001][00190] Avg episode reward: [(0, '4.754')] +[2024-11-28 15:18:06,004][02447] Saving new best policy, reward=4.754! +[2024-11-28 15:18:10,995][00190] Fps is (10 sec: 4095.7, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1257472. Throughput: 0: 837.9. Samples: 314008. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:18:10,998][00190] Avg episode reward: [(0, '4.726')] +[2024-11-28 15:18:15,234][02460] Updated weights for policy 0, policy_version 310 (0.0018) +[2024-11-28 15:18:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 1269760. Throughput: 0: 858.1. Samples: 316670. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-11-28 15:18:15,997][00190] Avg episode reward: [(0, '4.755')] +[2024-11-28 15:18:16,001][02447] Saving new best policy, reward=4.755! +[2024-11-28 15:18:20,994][00190] Fps is (10 sec: 2867.4, 60 sec: 3276.8, 300 sec: 3429.5). Total num frames: 1286144. Throughput: 0: 857.6. Samples: 320696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:18:20,996][00190] Avg episode reward: [(0, '4.730')] +[2024-11-28 15:18:25,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3413.4, 300 sec: 3429.5). Total num frames: 1306624. Throughput: 0: 853.4. Samples: 326400. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:18:26,001][00190] Avg episode reward: [(0, '4.731')] +[2024-11-28 15:18:26,855][02460] Updated weights for policy 0, policy_version 320 (0.0029) +[2024-11-28 15:18:30,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1323008. Throughput: 0: 879.5. Samples: 329406. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:18:30,999][00190] Avg episode reward: [(0, '4.644')] +[2024-11-28 15:18:35,994][00190] Fps is (10 sec: 2867.3, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1335296. Throughput: 0: 848.4. Samples: 333226. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:18:35,999][00190] Avg episode reward: [(0, '4.463')] +[2024-11-28 15:18:39,318][02460] Updated weights for policy 0, policy_version 330 (0.0023) +[2024-11-28 15:18:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 1355776. Throughput: 0: 845.6. Samples: 339154. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:18:40,996][00190] Avg episode reward: [(0, '4.252')] +[2024-11-28 15:18:45,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1376256. Throughput: 0: 865.8. Samples: 342220. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:18:45,996][00190] Avg episode reward: [(0, '4.304')] +[2024-11-28 15:18:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1388544. Throughput: 0: 860.5. Samples: 346568. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:18:50,997][00190] Avg episode reward: [(0, '4.494')] +[2024-11-28 15:18:52,009][02460] Updated weights for policy 0, policy_version 340 (0.0023) +[2024-11-28 15:18:55,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 1404928. Throughput: 0: 843.4. Samples: 351962. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:18:56,000][00190] Avg episode reward: [(0, '4.582')] +[2024-11-28 15:19:00,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1429504. Throughput: 0: 851.9. Samples: 355004. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:19:00,998][00190] Avg episode reward: [(0, '4.407')] +[2024-11-28 15:19:02,300][02460] Updated weights for policy 0, policy_version 350 (0.0026) +[2024-11-28 15:19:05,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1441792. Throughput: 0: 873.5. Samples: 360002. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:19:06,000][00190] Avg episode reward: [(0, '4.473')] +[2024-11-28 15:19:10,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 1458176. Throughput: 0: 859.9. Samples: 365096. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-11-28 15:19:10,996][00190] Avg episode reward: [(0, '4.525')] +[2024-11-28 15:19:14,030][02460] Updated weights for policy 0, policy_version 360 (0.0022) +[2024-11-28 15:19:15,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1482752. Throughput: 0: 862.9. Samples: 368236. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:19:15,999][00190] Avg episode reward: [(0, '4.468')] +[2024-11-28 15:19:20,994][00190] Fps is (10 sec: 3686.2, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1495040. Throughput: 0: 904.0. Samples: 373908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:20,998][00190] Avg episode reward: [(0, '4.490')] +[2024-11-28 15:19:21,014][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000365_1495040.pth... +[2024-11-28 15:19:21,170][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000164_671744.pth +[2024-11-28 15:19:25,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 1511424. Throughput: 0: 863.6. Samples: 378018. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:25,996][00190] Avg episode reward: [(0, '4.639')] +[2024-11-28 15:19:26,503][02460] Updated weights for policy 0, policy_version 370 (0.0020) +[2024-11-28 15:19:30,994][00190] Fps is (10 sec: 3686.6, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1531904. Throughput: 0: 862.4. Samples: 381026. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:30,996][00190] Avg episode reward: [(0, '4.664')] +[2024-11-28 15:19:35,995][00190] Fps is (10 sec: 3685.9, 60 sec: 3549.8, 300 sec: 3429.5). Total num frames: 1548288. Throughput: 0: 899.7. Samples: 387054. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:35,999][00190] Avg episode reward: [(0, '4.566')] +[2024-11-28 15:19:38,084][02460] Updated weights for policy 0, policy_version 380 (0.0015) +[2024-11-28 15:19:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1564672. Throughput: 0: 862.9. Samples: 390794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:40,998][00190] Avg episode reward: [(0, '4.629')] +[2024-11-28 15:19:45,994][00190] Fps is (10 sec: 3686.9, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1585152. Throughput: 0: 862.9. Samples: 393836. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:45,996][00190] Avg episode reward: [(0, '4.604')] +[2024-11-28 15:19:48,832][02460] Updated weights for policy 0, policy_version 390 (0.0018) +[2024-11-28 15:19:50,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 1601536. Throughput: 0: 891.3. Samples: 400110. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:50,996][00190] Avg episode reward: [(0, '4.620')] +[2024-11-28 15:19:55,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 1613824. Throughput: 0: 869.6. Samples: 404226. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:19:55,996][00190] Avg episode reward: [(0, '4.685')] +[2024-11-28 15:20:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1634304. Throughput: 0: 852.0. Samples: 406576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:20:00,997][00190] Avg episode reward: [(0, '4.694')] +[2024-11-28 15:20:01,416][02460] Updated weights for policy 0, policy_version 400 (0.0035) +[2024-11-28 15:20:05,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1654784. Throughput: 0: 862.6. Samples: 412726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:20:05,998][00190] Avg episode reward: [(0, '4.606')] +[2024-11-28 15:20:10,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1667072. Throughput: 0: 880.6. Samples: 417646. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:20:10,996][00190] Avg episode reward: [(0, '4.371')] +[2024-11-28 15:20:14,194][02460] Updated weights for policy 0, policy_version 410 (0.0015) +[2024-11-28 15:20:15,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 1683456. Throughput: 0: 855.6. Samples: 419528. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:20:16,000][00190] Avg episode reward: [(0, '4.364')] +[2024-11-28 15:20:20,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 1708032. Throughput: 0: 853.8. Samples: 425476. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:20:20,996][00190] Avg episode reward: [(0, '4.373')] +[2024-11-28 15:20:23,873][02460] Updated weights for policy 0, policy_version 420 (0.0026) +[2024-11-28 15:20:25,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1720320. Throughput: 0: 896.9. Samples: 431154. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:20:25,996][00190] Avg episode reward: [(0, '4.436')] +[2024-11-28 15:20:30,994][00190] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1736704. Throughput: 0: 868.4. Samples: 432916. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:20:30,996][00190] Avg episode reward: [(0, '4.487')] +[2024-11-28 15:20:35,994][00190] Fps is (10 sec: 3686.2, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 1757184. Throughput: 0: 853.8. Samples: 438532. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:20:35,998][00190] Avg episode reward: [(0, '4.536')] +[2024-11-28 15:20:36,343][02460] Updated weights for policy 0, policy_version 430 (0.0023) +[2024-11-28 15:20:40,994][00190] Fps is (10 sec: 4096.1, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 1777664. Throughput: 0: 902.5. Samples: 444840. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:20:41,004][00190] Avg episode reward: [(0, '4.594')] +[2024-11-28 15:20:45,996][00190] Fps is (10 sec: 3276.2, 60 sec: 3413.2, 300 sec: 3457.3). Total num frames: 1789952. Throughput: 0: 891.0. Samples: 446674. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:20:45,998][00190] Avg episode reward: [(0, '4.674')] +[2024-11-28 15:20:48,618][02460] Updated weights for policy 0, policy_version 440 (0.0030) +[2024-11-28 15:20:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1810432. Throughput: 0: 861.4. Samples: 451490. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:20:50,996][00190] Avg episode reward: [(0, '4.578')] +[2024-11-28 15:20:55,994][00190] Fps is (10 sec: 4096.7, 60 sec: 3618.1, 300 sec: 3457.3). Total num frames: 1830912. Throughput: 0: 889.4. Samples: 457668. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:20:55,998][00190] Avg episode reward: [(0, '4.588')] +[2024-11-28 15:20:59,936][02460] Updated weights for policy 0, policy_version 450 (0.0037) +[2024-11-28 15:21:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 1843200. Throughput: 0: 900.7. Samples: 460060. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:21:00,998][00190] Avg episode reward: [(0, '4.718')] +[2024-11-28 15:21:05,994][00190] Fps is (10 sec: 2867.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1859584. Throughput: 0: 863.6. Samples: 464338. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:21:05,998][00190] Avg episode reward: [(0, '4.535')] +[2024-11-28 15:21:10,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1880064. Throughput: 0: 876.0. Samples: 470574. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:21:11,000][00190] Avg episode reward: [(0, '4.808')] +[2024-11-28 15:21:11,041][02447] Saving new best policy, reward=4.808! +[2024-11-28 15:21:11,046][02460] Updated weights for policy 0, policy_version 460 (0.0016) +[2024-11-28 15:21:15,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1896448. Throughput: 0: 901.9. Samples: 473502. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:21:15,998][00190] Avg episode reward: [(0, '4.964')] +[2024-11-28 15:21:16,001][02447] Saving new best policy, reward=4.964! +[2024-11-28 15:21:20,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1912832. Throughput: 0: 860.5. Samples: 477254. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:21:21,001][00190] Avg episode reward: [(0, '4.722')] +[2024-11-28 15:21:21,011][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000467_1912832.pth... +[2024-11-28 15:21:21,140][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000266_1089536.pth +[2024-11-28 15:21:23,583][02460] Updated weights for policy 0, policy_version 470 (0.0032) +[2024-11-28 15:21:25,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1933312. Throughput: 0: 858.4. Samples: 483470. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:21:26,003][00190] Avg episode reward: [(0, '4.427')] +[2024-11-28 15:21:30,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3443.4). Total num frames: 1949696. Throughput: 0: 883.2. Samples: 486418. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:21:30,996][00190] Avg episode reward: [(0, '4.541')] +[2024-11-28 15:21:35,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 1961984. Throughput: 0: 867.9. Samples: 490546. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:21:35,997][00190] Avg episode reward: [(0, '4.550')] +[2024-11-28 15:21:36,175][02460] Updated weights for policy 0, policy_version 480 (0.0014) +[2024-11-28 15:21:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 1982464. Throughput: 0: 858.1. Samples: 496282. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:21:40,999][00190] Avg episode reward: [(0, '4.770')] +[2024-11-28 15:21:45,994][00190] Fps is (10 sec: 4095.8, 60 sec: 3550.0, 300 sec: 3443.4). Total num frames: 2002944. Throughput: 0: 874.7. Samples: 499424. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:21:46,002][00190] Avg episode reward: [(0, '4.897')] +[2024-11-28 15:21:46,206][02460] Updated weights for policy 0, policy_version 490 (0.0033) +[2024-11-28 15:21:50,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2019328. Throughput: 0: 885.5. Samples: 504186. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:21:50,999][00190] Avg episode reward: [(0, '4.947')] +[2024-11-28 15:21:55,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3413.4, 300 sec: 3457.3). Total num frames: 2035712. Throughput: 0: 860.8. Samples: 509310. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:21:55,999][00190] Avg episode reward: [(0, '4.898')] +[2024-11-28 15:21:58,252][02460] Updated weights for policy 0, policy_version 500 (0.0030) +[2024-11-28 15:22:00,994][00190] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2056192. Throughput: 0: 862.2. Samples: 512302. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:22:00,997][00190] Avg episode reward: [(0, '4.930')] +[2024-11-28 15:22:05,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2072576. Throughput: 0: 898.2. Samples: 517674. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:22:05,996][00190] Avg episode reward: [(0, '5.032')] +[2024-11-28 15:22:06,004][02447] Saving new best policy, reward=5.032! +[2024-11-28 15:22:10,632][02460] Updated weights for policy 0, policy_version 510 (0.0029) +[2024-11-28 15:22:10,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3457.3). Total num frames: 2088960. Throughput: 0: 862.8. Samples: 522294. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:22:10,996][00190] Avg episode reward: [(0, '5.266')] +[2024-11-28 15:22:11,005][02447] Saving new best policy, reward=5.266! +[2024-11-28 15:22:15,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2109440. Throughput: 0: 866.0. Samples: 525388. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:22:15,996][00190] Avg episode reward: [(0, '5.187')] +[2024-11-28 15:22:20,998][00190] Fps is (10 sec: 3684.8, 60 sec: 3549.6, 300 sec: 3471.1). Total num frames: 2125824. Throughput: 0: 902.5. Samples: 531162. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:22:21,001][00190] Avg episode reward: [(0, '5.200')] +[2024-11-28 15:22:22,194][02460] Updated weights for policy 0, policy_version 520 (0.0015) +[2024-11-28 15:22:25,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3471.2). Total num frames: 2142208. Throughput: 0: 868.7. Samples: 535374. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:22:25,996][00190] Avg episode reward: [(0, '5.475')] +[2024-11-28 15:22:26,005][02447] Saving new best policy, reward=5.475! +[2024-11-28 15:22:30,994][00190] Fps is (10 sec: 3687.9, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2162688. Throughput: 0: 865.7. Samples: 538382. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:22:31,002][00190] Avg episode reward: [(0, '5.768')] +[2024-11-28 15:22:31,015][02447] Saving new best policy, reward=5.768! +[2024-11-28 15:22:32,714][02460] Updated weights for policy 0, policy_version 530 (0.0023) +[2024-11-28 15:22:35,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3485.1). Total num frames: 2179072. Throughput: 0: 900.1. Samples: 544692. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:22:35,996][00190] Avg episode reward: [(0, '5.279')] +[2024-11-28 15:22:40,994][00190] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2195456. Throughput: 0: 872.5. Samples: 548574. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:22:40,996][00190] Avg episode reward: [(0, '5.311')] +[2024-11-28 15:22:44,980][02460] Updated weights for policy 0, policy_version 540 (0.0028) +[2024-11-28 15:22:45,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2215936. Throughput: 0: 872.4. Samples: 551558. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:22:45,996][00190] Avg episode reward: [(0, '5.237')] +[2024-11-28 15:22:51,002][00190] Fps is (10 sec: 4092.6, 60 sec: 3617.7, 300 sec: 3498.9). Total num frames: 2236416. Throughput: 0: 894.1. Samples: 557914. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:22:51,008][00190] Avg episode reward: [(0, '5.333')] +[2024-11-28 15:22:55,995][00190] Fps is (10 sec: 3276.5, 60 sec: 3549.8, 300 sec: 3498.9). Total num frames: 2248704. Throughput: 0: 889.1. Samples: 562306. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:22:55,998][00190] Avg episode reward: [(0, '5.566')] +[2024-11-28 15:22:57,275][02460] Updated weights for policy 0, policy_version 550 (0.0013) +[2024-11-28 15:23:00,994][00190] Fps is (10 sec: 2869.6, 60 sec: 3481.6, 300 sec: 3485.1). Total num frames: 2265088. Throughput: 0: 873.1. Samples: 564676. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:23:00,996][00190] Avg episode reward: [(0, '5.891')] +[2024-11-28 15:23:01,005][02447] Saving new best policy, reward=5.891! +[2024-11-28 15:23:05,994][00190] Fps is (10 sec: 3686.8, 60 sec: 3549.9, 300 sec: 3485.1). Total num frames: 2285568. Throughput: 0: 884.9. Samples: 570980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:23:05,996][00190] Avg episode reward: [(0, '5.978')] +[2024-11-28 15:23:06,022][02447] Saving new best policy, reward=5.978! +[2024-11-28 15:23:07,190][02460] Updated weights for policy 0, policy_version 560 (0.0014) +[2024-11-28 15:23:10,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 2301952. Throughput: 0: 901.9. Samples: 575958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:23:10,999][00190] Avg episode reward: [(0, '5.709')] +[2024-11-28 15:23:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3499.0). Total num frames: 2318336. Throughput: 0: 879.7. Samples: 577966. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:23:15,999][00190] Avg episode reward: [(0, '5.698')] +[2024-11-28 15:23:19,104][02460] Updated weights for policy 0, policy_version 570 (0.0023) +[2024-11-28 15:23:20,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.4, 300 sec: 3512.8). Total num frames: 2342912. Throughput: 0: 878.4. Samples: 584222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:23:21,000][00190] Avg episode reward: [(0, '5.836')] +[2024-11-28 15:23:21,010][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000572_2342912.pth... +[2024-11-28 15:23:21,130][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000365_1495040.pth +[2024-11-28 15:23:25,997][00190] Fps is (10 sec: 3685.2, 60 sec: 3549.7, 300 sec: 3498.9). Total num frames: 2355200. Throughput: 0: 910.6. Samples: 589552. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:23:25,999][00190] Avg episode reward: [(0, '5.974')] +[2024-11-28 15:23:30,994][00190] Fps is (10 sec: 2867.3, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 2371584. Throughput: 0: 884.6. Samples: 591366. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:23:30,996][00190] Avg episode reward: [(0, '6.049')] +[2024-11-28 15:23:31,006][02447] Saving new best policy, reward=6.049! +[2024-11-28 15:23:31,480][02460] Updated weights for policy 0, policy_version 580 (0.0027) +[2024-11-28 15:23:35,994][00190] Fps is (10 sec: 3687.6, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 2392064. Throughput: 0: 873.6. Samples: 597218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:23:35,996][00190] Avg episode reward: [(0, '6.348')] +[2024-11-28 15:23:35,998][02447] Saving new best policy, reward=6.348! +[2024-11-28 15:23:40,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 2412544. Throughput: 0: 906.6. Samples: 603100. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:23:41,000][00190] Avg episode reward: [(0, '6.021')] +[2024-11-28 15:23:42,599][02460] Updated weights for policy 0, policy_version 590 (0.0038) +[2024-11-28 15:23:45,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 2424832. Throughput: 0: 895.5. Samples: 604972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:23:45,999][00190] Avg episode reward: [(0, '6.189')] +[2024-11-28 15:23:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3482.1, 300 sec: 3526.7). Total num frames: 2445312. Throughput: 0: 873.2. Samples: 610274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:23:50,998][00190] Avg episode reward: [(0, '6.062')] +[2024-11-28 15:23:53,650][02460] Updated weights for policy 0, policy_version 600 (0.0025) +[2024-11-28 15:23:55,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3512.8). Total num frames: 2465792. Throughput: 0: 899.7. Samples: 616446. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:23:55,996][00190] Avg episode reward: [(0, '6.207')] +[2024-11-28 15:24:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 2478080. Throughput: 0: 901.5. Samples: 618532. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:24:00,997][00190] Avg episode reward: [(0, '6.543')] +[2024-11-28 15:24:01,006][02447] Saving new best policy, reward=6.543! +[2024-11-28 15:24:05,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 2494464. Throughput: 0: 863.0. Samples: 623058. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:24:05,998][00190] Avg episode reward: [(0, '6.589')] +[2024-11-28 15:24:06,069][02447] Saving new best policy, reward=6.589! +[2024-11-28 15:24:06,074][02460] Updated weights for policy 0, policy_version 610 (0.0029) +[2024-11-28 15:24:10,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 2519040. Throughput: 0: 884.3. Samples: 629342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:24:10,999][00190] Avg episode reward: [(0, '6.867')] +[2024-11-28 15:24:11,009][02447] Saving new best policy, reward=6.867! +[2024-11-28 15:24:15,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 2531328. Throughput: 0: 902.4. Samples: 631972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:24:15,999][00190] Avg episode reward: [(0, '6.752')] +[2024-11-28 15:24:18,399][02460] Updated weights for policy 0, policy_version 620 (0.0013) +[2024-11-28 15:24:20,994][00190] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 2547712. Throughput: 0: 866.4. Samples: 636206. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:24:20,996][00190] Avg episode reward: [(0, '6.807')] +[2024-11-28 15:24:25,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3550.0, 300 sec: 3512.8). Total num frames: 2568192. Throughput: 0: 878.6. Samples: 642636. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:24:25,996][00190] Avg episode reward: [(0, '6.742')] +[2024-11-28 15:24:27,819][02460] Updated weights for policy 0, policy_version 630 (0.0015) +[2024-11-28 15:24:30,994][00190] Fps is (10 sec: 4095.9, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2588672. Throughput: 0: 903.1. Samples: 645614. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:24:31,002][00190] Avg episode reward: [(0, '7.109')] +[2024-11-28 15:24:31,013][02447] Saving new best policy, reward=7.109! +[2024-11-28 15:24:35,995][00190] Fps is (10 sec: 3276.3, 60 sec: 3481.5, 300 sec: 3512.8). Total num frames: 2600960. Throughput: 0: 872.8. Samples: 649550. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:24:36,005][00190] Avg episode reward: [(0, '7.023')] +[2024-11-28 15:24:40,320][02460] Updated weights for policy 0, policy_version 640 (0.0016) +[2024-11-28 15:24:40,994][00190] Fps is (10 sec: 3277.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 2621440. Throughput: 0: 868.8. Samples: 655544. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:24:40,996][00190] Avg episode reward: [(0, '7.298')] +[2024-11-28 15:24:41,007][02447] Saving new best policy, reward=7.298! +[2024-11-28 15:24:45,994][00190] Fps is (10 sec: 4096.7, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2641920. Throughput: 0: 891.1. Samples: 658630. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:24:46,000][00190] Avg episode reward: [(0, '6.964')] +[2024-11-28 15:24:50,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2654208. Throughput: 0: 891.1. Samples: 663158. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:24:50,997][00190] Avg episode reward: [(0, '7.213')] +[2024-11-28 15:24:52,639][02460] Updated weights for policy 0, policy_version 650 (0.0018) +[2024-11-28 15:24:55,994][00190] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2674688. Throughput: 0: 872.9. Samples: 668622. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:24:55,996][00190] Avg episode reward: [(0, '7.321')] +[2024-11-28 15:24:56,000][02447] Saving new best policy, reward=7.321! +[2024-11-28 15:25:00,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2695168. Throughput: 0: 884.4. Samples: 671770. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:25:01,002][00190] Avg episode reward: [(0, '7.954')] +[2024-11-28 15:25:01,020][02447] Saving new best policy, reward=7.954! +[2024-11-28 15:25:03,282][02460] Updated weights for policy 0, policy_version 660 (0.0027) +[2024-11-28 15:25:06,000][00190] Fps is (10 sec: 3275.0, 60 sec: 3549.5, 300 sec: 3526.7). Total num frames: 2707456. Throughput: 0: 900.0. Samples: 676710. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:25:06,006][00190] Avg episode reward: [(0, '7.766')] +[2024-11-28 15:25:10,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 2727936. Throughput: 0: 865.4. Samples: 681578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:25:10,996][00190] Avg episode reward: [(0, '8.138')] +[2024-11-28 15:25:11,003][02447] Saving new best policy, reward=8.138! +[2024-11-28 15:25:14,873][02460] Updated weights for policy 0, policy_version 670 (0.0017) +[2024-11-28 15:25:15,994][00190] Fps is (10 sec: 4098.4, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2748416. Throughput: 0: 868.7. Samples: 684704. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:25:15,996][00190] Avg episode reward: [(0, '8.181')] +[2024-11-28 15:25:16,000][02447] Saving new best policy, reward=8.181! +[2024-11-28 15:25:20,999][00190] Fps is (10 sec: 3275.2, 60 sec: 3549.6, 300 sec: 3526.7). Total num frames: 2760704. Throughput: 0: 907.0. Samples: 690370. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:25:21,001][00190] Avg episode reward: [(0, '8.623')] +[2024-11-28 15:25:21,096][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000675_2764800.pth... +[2024-11-28 15:25:21,239][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000467_1912832.pth +[2024-11-28 15:25:21,262][02447] Saving new best policy, reward=8.623! +[2024-11-28 15:25:25,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2777088. Throughput: 0: 868.0. Samples: 694606. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:25:25,996][00190] Avg episode reward: [(0, '9.197')] +[2024-11-28 15:25:26,001][02447] Saving new best policy, reward=9.197! +[2024-11-28 15:25:27,261][02460] Updated weights for policy 0, policy_version 680 (0.0016) +[2024-11-28 15:25:30,994][00190] Fps is (10 sec: 3688.2, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2797568. Throughput: 0: 872.4. Samples: 697890. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:25:30,996][00190] Avg episode reward: [(0, '9.561')] +[2024-11-28 15:25:31,005][02447] Saving new best policy, reward=9.561! +[2024-11-28 15:25:35,995][00190] Fps is (10 sec: 4095.4, 60 sec: 3618.2, 300 sec: 3526.7). Total num frames: 2818048. Throughput: 0: 907.7. Samples: 704006. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:25:36,000][00190] Avg episode reward: [(0, '9.999')] +[2024-11-28 15:25:36,003][02447] Saving new best policy, reward=9.999! +[2024-11-28 15:25:38,801][02460] Updated weights for policy 0, policy_version 690 (0.0020) +[2024-11-28 15:25:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3526.8). Total num frames: 2830336. Throughput: 0: 870.5. Samples: 707794. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:25:40,996][00190] Avg episode reward: [(0, '10.695')] +[2024-11-28 15:25:41,005][02447] Saving new best policy, reward=10.695! +[2024-11-28 15:25:45,994][00190] Fps is (10 sec: 3277.3, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2850816. Throughput: 0: 864.8. Samples: 710686. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:25:45,998][00190] Avg episode reward: [(0, '10.703')] +[2024-11-28 15:25:46,003][02447] Saving new best policy, reward=10.703! +[2024-11-28 15:25:49,234][02460] Updated weights for policy 0, policy_version 700 (0.0021) +[2024-11-28 15:25:50,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3526.7). Total num frames: 2871296. Throughput: 0: 896.0. Samples: 717026. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:25:50,996][00190] Avg episode reward: [(0, '11.859')] +[2024-11-28 15:25:51,005][02447] Saving new best policy, reward=11.859! +[2024-11-28 15:25:55,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 2883584. Throughput: 0: 882.3. Samples: 721282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:25:55,997][00190] Avg episode reward: [(0, '11.309')] +[2024-11-28 15:26:00,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 2904064. Throughput: 0: 866.6. Samples: 723700. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:26:00,996][00190] Avg episode reward: [(0, '10.700')] +[2024-11-28 15:26:01,867][02460] Updated weights for policy 0, policy_version 710 (0.0021) +[2024-11-28 15:26:05,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.5, 300 sec: 3540.6). Total num frames: 2924544. Throughput: 0: 881.4. Samples: 730030. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:26:05,995][00190] Avg episode reward: [(0, '10.616')] +[2024-11-28 15:26:10,996][00190] Fps is (10 sec: 3276.0, 60 sec: 3481.5, 300 sec: 3526.7). Total num frames: 2936832. Throughput: 0: 892.0. Samples: 734750. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:26:10,998][00190] Avg episode reward: [(0, '10.745')] +[2024-11-28 15:26:13,922][02460] Updated weights for policy 0, policy_version 720 (0.0018) +[2024-11-28 15:26:15,994][00190] Fps is (10 sec: 3276.7, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 2957312. Throughput: 0: 865.5. Samples: 736838. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:26:15,999][00190] Avg episode reward: [(0, '10.474')] +[2024-11-28 15:26:20,994][00190] Fps is (10 sec: 4096.9, 60 sec: 3618.4, 300 sec: 3540.6). Total num frames: 2977792. Throughput: 0: 871.5. Samples: 743222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:26:20,999][00190] Avg episode reward: [(0, '11.248')] +[2024-11-28 15:26:24,213][02460] Updated weights for policy 0, policy_version 730 (0.0016) +[2024-11-28 15:26:25,994][00190] Fps is (10 sec: 3686.5, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 2994176. Throughput: 0: 905.8. Samples: 748554. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:26:26,000][00190] Avg episode reward: [(0, '12.066')] +[2024-11-28 15:26:26,002][02447] Saving new best policy, reward=12.066! +[2024-11-28 15:26:30,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3006464. Throughput: 0: 881.9. Samples: 750370. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:26:31,000][00190] Avg episode reward: [(0, '11.833')] +[2024-11-28 15:26:35,868][02460] Updated weights for policy 0, policy_version 740 (0.0022) +[2024-11-28 15:26:35,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 3031040. Throughput: 0: 875.1. Samples: 756406. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:26:35,999][00190] Avg episode reward: [(0, '13.256')] +[2024-11-28 15:26:36,001][02447] Saving new best policy, reward=13.256! +[2024-11-28 15:26:40,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 3047424. Throughput: 0: 913.3. Samples: 762382. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:26:40,998][00190] Avg episode reward: [(0, '13.306')] +[2024-11-28 15:26:41,010][02447] Saving new best policy, reward=13.306! +[2024-11-28 15:26:45,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 3063808. Throughput: 0: 899.4. Samples: 764172. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:26:45,999][00190] Avg episode reward: [(0, '12.770')] +[2024-11-28 15:26:47,932][02460] Updated weights for policy 0, policy_version 750 (0.0026) +[2024-11-28 15:26:50,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3549.8, 300 sec: 3554.5). Total num frames: 3084288. Throughput: 0: 881.1. Samples: 769680. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:26:51,001][00190] Avg episode reward: [(0, '12.626')] +[2024-11-28 15:26:55,996][00190] Fps is (10 sec: 4095.2, 60 sec: 3686.3, 300 sec: 3554.5). Total num frames: 3104768. Throughput: 0: 916.0. Samples: 775970. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:26:56,002][00190] Avg episode reward: [(0, '13.855')] +[2024-11-28 15:26:56,006][02447] Saving new best policy, reward=13.855! +[2024-11-28 15:26:59,526][02460] Updated weights for policy 0, policy_version 760 (0.0020) +[2024-11-28 15:27:00,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 3112960. Throughput: 0: 911.1. Samples: 777836. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:27:00,998][00190] Avg episode reward: [(0, '14.118')] +[2024-11-28 15:27:01,011][02447] Saving new best policy, reward=14.118! +[2024-11-28 15:27:05,994][00190] Fps is (10 sec: 2867.7, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3133440. Throughput: 0: 875.1. Samples: 782600. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:27:06,004][00190] Avg episode reward: [(0, '14.553')] +[2024-11-28 15:27:06,006][02447] Saving new best policy, reward=14.553! +[2024-11-28 15:27:10,345][02460] Updated weights for policy 0, policy_version 770 (0.0034) +[2024-11-28 15:27:10,994][00190] Fps is (10 sec: 4096.2, 60 sec: 3618.3, 300 sec: 3540.6). Total num frames: 3153920. Throughput: 0: 894.4. Samples: 788804. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:27:10,999][00190] Avg episode reward: [(0, '15.654')] +[2024-11-28 15:27:11,008][02447] Saving new best policy, reward=15.654! +[2024-11-28 15:27:15,994][00190] Fps is (10 sec: 3276.6, 60 sec: 3481.6, 300 sec: 3526.8). Total num frames: 3166208. Throughput: 0: 909.3. Samples: 791288. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:27:15,998][00190] Avg episode reward: [(0, '14.897')] +[2024-11-28 15:27:20,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3186688. Throughput: 0: 870.8. Samples: 795592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:27:20,996][00190] Avg episode reward: [(0, '14.506')] +[2024-11-28 15:27:21,008][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000778_3186688.pth... +[2024-11-28 15:27:21,121][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000572_2342912.pth +[2024-11-28 15:27:22,717][02460] Updated weights for policy 0, policy_version 780 (0.0028) +[2024-11-28 15:27:25,994][00190] Fps is (10 sec: 4096.2, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 3207168. Throughput: 0: 876.8. Samples: 801836. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:27:25,996][00190] Avg episode reward: [(0, '14.059')] +[2024-11-28 15:27:30,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 3223552. Throughput: 0: 906.3. Samples: 804954. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:27:30,996][00190] Avg episode reward: [(0, '13.860')] +[2024-11-28 15:27:35,126][02460] Updated weights for policy 0, policy_version 790 (0.0021) +[2024-11-28 15:27:35,994][00190] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3526.7). Total num frames: 3235840. Throughput: 0: 867.3. Samples: 808710. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:27:35,997][00190] Avg episode reward: [(0, '14.018')] +[2024-11-28 15:27:40,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 3260416. Throughput: 0: 865.4. Samples: 814912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:27:41,000][00190] Avg episode reward: [(0, '14.852')] +[2024-11-28 15:27:44,486][02460] Updated weights for policy 0, policy_version 800 (0.0015) +[2024-11-28 15:27:45,995][00190] Fps is (10 sec: 4505.1, 60 sec: 3618.0, 300 sec: 3540.7). Total num frames: 3280896. Throughput: 0: 897.5. Samples: 818226. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:27:46,001][00190] Avg episode reward: [(0, '14.456')] +[2024-11-28 15:27:50,995][00190] Fps is (10 sec: 3276.4, 60 sec: 3481.5, 300 sec: 3540.6). Total num frames: 3293184. Throughput: 0: 893.9. Samples: 822826. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:27:50,999][00190] Avg episode reward: [(0, '14.419')] +[2024-11-28 15:27:55,994][00190] Fps is (10 sec: 3277.3, 60 sec: 3481.7, 300 sec: 3554.5). Total num frames: 3313664. Throughput: 0: 880.2. Samples: 828412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:27:56,001][00190] Avg episode reward: [(0, '14.082')] +[2024-11-28 15:27:56,516][02460] Updated weights for policy 0, policy_version 810 (0.0020) +[2024-11-28 15:28:00,995][00190] Fps is (10 sec: 4096.0, 60 sec: 3686.3, 300 sec: 3554.5). Total num frames: 3334144. Throughput: 0: 895.2. Samples: 831572. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:28:00,997][00190] Avg episode reward: [(0, '14.229')] +[2024-11-28 15:28:05,994][00190] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3540.6). Total num frames: 3346432. Throughput: 0: 908.1. Samples: 836458. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2024-11-28 15:28:05,999][00190] Avg episode reward: [(0, '14.707')] +[2024-11-28 15:28:08,791][02460] Updated weights for policy 0, policy_version 820 (0.0020) +[2024-11-28 15:28:10,994][00190] Fps is (10 sec: 3277.2, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 3366912. Throughput: 0: 881.7. Samples: 841514. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:28:11,000][00190] Avg episode reward: [(0, '15.667')] +[2024-11-28 15:28:11,012][02447] Saving new best policy, reward=15.667! +[2024-11-28 15:28:15,994][00190] Fps is (10 sec: 4096.1, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 3387392. Throughput: 0: 882.0. Samples: 844642. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:28:15,996][00190] Avg episode reward: [(0, '15.636')] +[2024-11-28 15:28:19,213][02460] Updated weights for policy 0, policy_version 830 (0.0021) +[2024-11-28 15:28:20,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 3403776. Throughput: 0: 922.9. Samples: 850242. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:28:20,997][00190] Avg episode reward: [(0, '16.346')] +[2024-11-28 15:28:21,008][02447] Saving new best policy, reward=16.346! +[2024-11-28 15:28:25,994][00190] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3416064. Throughput: 0: 881.6. Samples: 854582. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:28:25,996][00190] Avg episode reward: [(0, '15.009')] +[2024-11-28 15:28:30,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 3436544. Throughput: 0: 876.0. Samples: 857644. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:28:30,996][00190] Avg episode reward: [(0, '15.014')] +[2024-11-28 15:28:31,073][02460] Updated weights for policy 0, policy_version 840 (0.0013) +[2024-11-28 15:28:35,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 3457024. Throughput: 0: 905.6. Samples: 863578. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:28:35,996][00190] Avg episode reward: [(0, '16.419')] +[2024-11-28 15:28:35,999][02447] Saving new best policy, reward=16.419! +[2024-11-28 15:28:40,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 3469312. Throughput: 0: 865.8. Samples: 867372. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:28:41,003][00190] Avg episode reward: [(0, '16.830')] +[2024-11-28 15:28:41,015][02447] Saving new best policy, reward=16.830! +[2024-11-28 15:28:43,548][02460] Updated weights for policy 0, policy_version 850 (0.0026) +[2024-11-28 15:28:45,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3481.7, 300 sec: 3540.6). Total num frames: 3489792. Throughput: 0: 864.2. Samples: 870460. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:28:45,998][00190] Avg episode reward: [(0, '17.542')] +[2024-11-28 15:28:46,000][02447] Saving new best policy, reward=17.542! +[2024-11-28 15:28:50,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3540.6). Total num frames: 3510272. Throughput: 0: 899.5. Samples: 876934. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:28:50,999][00190] Avg episode reward: [(0, '18.686')] +[2024-11-28 15:28:51,020][02447] Saving new best policy, reward=18.686! +[2024-11-28 15:28:54,758][02460] Updated weights for policy 0, policy_version 860 (0.0031) +[2024-11-28 15:28:55,995][00190] Fps is (10 sec: 3276.5, 60 sec: 3481.5, 300 sec: 3540.6). Total num frames: 3522560. Throughput: 0: 883.4. Samples: 881268. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:28:55,998][00190] Avg episode reward: [(0, '18.720')] +[2024-11-28 15:28:56,057][02447] Saving new best policy, reward=18.720! +[2024-11-28 15:29:00,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 3547136. Throughput: 0: 880.1. Samples: 884246. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:29:00,999][00190] Avg episode reward: [(0, '16.948')] +[2024-11-28 15:29:04,189][02460] Updated weights for policy 0, policy_version 870 (0.0029) +[2024-11-28 15:29:05,994][00190] Fps is (10 sec: 4915.7, 60 sec: 3754.7, 300 sec: 3568.4). Total num frames: 3571712. Throughput: 0: 909.9. Samples: 891188. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:29:05,996][00190] Avg episode reward: [(0, '15.893')] +[2024-11-28 15:29:11,000][00190] Fps is (10 sec: 3684.1, 60 sec: 3617.8, 300 sec: 3568.3). Total num frames: 3584000. Throughput: 0: 931.4. Samples: 896502. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:29:11,006][00190] Avg episode reward: [(0, '15.275')] +[2024-11-28 15:29:15,386][02460] Updated weights for policy 0, policy_version 880 (0.0019) +[2024-11-28 15:29:15,994][00190] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 3604480. Throughput: 0: 913.5. Samples: 898750. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:29:15,996][00190] Avg episode reward: [(0, '18.269')] +[2024-11-28 15:29:20,994][00190] Fps is (10 sec: 4508.4, 60 sec: 3754.7, 300 sec: 3596.2). Total num frames: 3629056. Throughput: 0: 941.1. Samples: 905928. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:29:20,996][00190] Avg episode reward: [(0, '18.323')] +[2024-11-28 15:29:21,004][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000886_3629056.pth... +[2024-11-28 15:29:21,115][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000675_2764800.pth +[2024-11-28 15:29:24,918][02460] Updated weights for policy 0, policy_version 890 (0.0013) +[2024-11-28 15:29:25,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3582.3). Total num frames: 3645440. Throughput: 0: 989.6. Samples: 911906. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:29:26,001][00190] Avg episode reward: [(0, '19.604')] +[2024-11-28 15:29:26,008][02447] Saving new best policy, reward=19.604! +[2024-11-28 15:29:30,994][00190] Fps is (10 sec: 3686.3, 60 sec: 3822.9, 300 sec: 3610.1). Total num frames: 3665920. Throughput: 0: 967.9. Samples: 914014. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:29:30,998][00190] Avg episode reward: [(0, '20.254')] +[2024-11-28 15:29:31,006][02447] Saving new best policy, reward=20.254! +[2024-11-28 15:29:35,553][02460] Updated weights for policy 0, policy_version 900 (0.0022) +[2024-11-28 15:29:35,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3610.0). Total num frames: 3686400. Throughput: 0: 965.8. Samples: 920396. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2024-11-28 15:29:36,000][00190] Avg episode reward: [(0, '18.427')] +[2024-11-28 15:29:40,994][00190] Fps is (10 sec: 4096.1, 60 sec: 3959.5, 300 sec: 3610.0). Total num frames: 3706880. Throughput: 0: 1018.6. Samples: 927106. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:29:41,001][00190] Avg episode reward: [(0, '19.205')] +[2024-11-28 15:29:45,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3623.9). Total num frames: 3723264. Throughput: 0: 998.7. Samples: 929188. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:29:46,000][00190] Avg episode reward: [(0, '19.846')] +[2024-11-28 15:29:46,721][02460] Updated weights for policy 0, policy_version 910 (0.0021) +[2024-11-28 15:29:50,994][00190] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3637.8). Total num frames: 3747840. Throughput: 0: 979.9. Samples: 935284. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:29:50,998][00190] Avg episode reward: [(0, '18.113')] +[2024-11-28 15:29:55,371][02460] Updated weights for policy 0, policy_version 920 (0.0019) +[2024-11-28 15:29:55,996][00190] Fps is (10 sec: 4504.6, 60 sec: 4095.9, 300 sec: 3637.8). Total num frames: 3768320. Throughput: 0: 1020.3. Samples: 942410. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:29:56,002][00190] Avg episode reward: [(0, '20.297')] +[2024-11-28 15:29:56,004][02447] Saving new best policy, reward=20.297! +[2024-11-28 15:30:00,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3651.8). Total num frames: 3784704. Throughput: 0: 1017.2. Samples: 944524. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:30:00,996][00190] Avg episode reward: [(0, '20.636')] +[2024-11-28 15:30:01,002][02447] Saving new best policy, reward=20.636! +[2024-11-28 15:30:05,994][00190] Fps is (10 sec: 3687.2, 60 sec: 3891.2, 300 sec: 3651.7). Total num frames: 3805184. Throughput: 0: 976.5. Samples: 949870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:30:05,996][00190] Avg episode reward: [(0, '20.613')] +[2024-11-28 15:30:06,606][02460] Updated weights for policy 0, policy_version 930 (0.0030) +[2024-11-28 15:30:10,994][00190] Fps is (10 sec: 4096.0, 60 sec: 4028.2, 300 sec: 3651.7). Total num frames: 3825664. Throughput: 0: 1000.9. Samples: 956946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2024-11-28 15:30:11,000][00190] Avg episode reward: [(0, '21.100')] +[2024-11-28 15:30:11,012][02447] Saving new best policy, reward=21.100! +[2024-11-28 15:30:15,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3665.6). Total num frames: 3842048. Throughput: 0: 1017.1. Samples: 959782. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2024-11-28 15:30:15,995][00190] Avg episode reward: [(0, '20.834')] +[2024-11-28 15:30:17,641][02460] Updated weights for policy 0, policy_version 940 (0.0033) +[2024-11-28 15:30:20,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3679.5). Total num frames: 3862528. Throughput: 0: 983.5. Samples: 964652. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:30:21,000][00190] Avg episode reward: [(0, '19.140')] +[2024-11-28 15:30:25,994][00190] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3693.3). Total num frames: 3887104. Throughput: 0: 991.1. Samples: 971704. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2024-11-28 15:30:26,000][00190] Avg episode reward: [(0, '18.122')] +[2024-11-28 15:30:26,352][02460] Updated weights for policy 0, policy_version 950 (0.0015) +[2024-11-28 15:30:30,996][00190] Fps is (10 sec: 4095.1, 60 sec: 3959.3, 300 sec: 3679.4). Total num frames: 3903488. Throughput: 0: 1021.9. Samples: 975174. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2024-11-28 15:30:30,999][00190] Avg episode reward: [(0, '17.617')] +[2024-11-28 15:30:35,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3707.2). Total num frames: 3923968. Throughput: 0: 980.8. Samples: 979422. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:30:35,995][00190] Avg episode reward: [(0, '18.338')] +[2024-11-28 15:30:37,708][02460] Updated weights for policy 0, policy_version 960 (0.0020) +[2024-11-28 15:30:40,994][00190] Fps is (10 sec: 4096.9, 60 sec: 3959.5, 300 sec: 3707.2). Total num frames: 3944448. Throughput: 0: 976.2. Samples: 986336. Policy #0 lag: (min: 0.0, avg: 0.2, max: 1.0) +[2024-11-28 15:30:40,996][00190] Avg episode reward: [(0, '17.293')] +[2024-11-28 15:30:45,994][00190] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3707.2). Total num frames: 3964928. Throughput: 0: 1005.3. Samples: 989764. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:30:45,997][00190] Avg episode reward: [(0, '18.259')] +[2024-11-28 15:30:48,063][02460] Updated weights for policy 0, policy_version 970 (0.0013) +[2024-11-28 15:30:50,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3721.1). Total num frames: 3981312. Throughput: 0: 990.7. Samples: 994452. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2024-11-28 15:30:50,998][00190] Avg episode reward: [(0, '18.173')] +[2024-11-28 15:30:55,994][00190] Fps is (10 sec: 3686.4, 60 sec: 3891.3, 300 sec: 3721.1). Total num frames: 4001792. Throughput: 0: 970.9. Samples: 1000636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2024-11-28 15:30:55,995][00190] Avg episode reward: [(0, '19.848')] +[2024-11-28 15:30:56,192][02447] Stopping Batcher_0... +[2024-11-28 15:30:56,193][02447] Loop batcher_evt_loop terminating... +[2024-11-28 15:30:56,194][00190] Component Batcher_0 stopped! +[2024-11-28 15:30:56,194][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-11-28 15:30:56,200][00190] Component RolloutWorker_w4 process died already! Don't wait for it. +[2024-11-28 15:30:56,256][02460] Weights refcount: 2 0 +[2024-11-28 15:30:56,259][00190] Component InferenceWorker_p0-w0 stopped! +[2024-11-28 15:30:56,264][02460] Stopping InferenceWorker_p0-w0... +[2024-11-28 15:30:56,268][02460] Loop inference_proc0-0_evt_loop terminating... +[2024-11-28 15:30:56,327][02447] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000778_3186688.pth +[2024-11-28 15:30:56,337][02447] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-11-28 15:30:56,526][02447] Stopping LearnerWorker_p0... +[2024-11-28 15:30:56,531][02447] Loop learner_proc0_evt_loop terminating... +[2024-11-28 15:30:56,526][00190] Component LearnerWorker_p0 stopped! +[2024-11-28 15:30:56,550][00190] Component RolloutWorker_w6 stopped! +[2024-11-28 15:30:56,555][02465] Stopping RolloutWorker_w6... +[2024-11-28 15:30:56,556][02465] Loop rollout_proc6_evt_loop terminating... +[2024-11-28 15:30:56,568][00190] Component RolloutWorker_w2 stopped! +[2024-11-28 15:30:56,574][02463] Stopping RolloutWorker_w2... +[2024-11-28 15:30:56,575][02463] Loop rollout_proc2_evt_loop terminating... +[2024-11-28 15:30:56,580][00190] Component RolloutWorker_w0 stopped! +[2024-11-28 15:30:56,586][02462] Stopping RolloutWorker_w0... +[2024-11-28 15:30:56,589][02462] Loop rollout_proc0_evt_loop terminating... +[2024-11-28 15:30:56,687][02461] Stopping RolloutWorker_w1... +[2024-11-28 15:30:56,689][02464] Stopping RolloutWorker_w5... +[2024-11-28 15:30:56,692][02466] Stopping RolloutWorker_w3... +[2024-11-28 15:30:56,687][00190] Component RolloutWorker_w1 stopped! +[2024-11-28 15:30:56,695][00190] Component RolloutWorker_w5 stopped! +[2024-11-28 15:30:56,692][02464] Loop rollout_proc5_evt_loop terminating... +[2024-11-28 15:30:56,696][00190] Component RolloutWorker_w3 stopped! +[2024-11-28 15:30:56,697][02461] Loop rollout_proc1_evt_loop terminating... +[2024-11-28 15:30:56,693][02466] Loop rollout_proc3_evt_loop terminating... +[2024-11-28 15:30:56,734][02472] Stopping RolloutWorker_w7... +[2024-11-28 15:30:56,737][02472] Loop rollout_proc7_evt_loop terminating... +[2024-11-28 15:30:56,734][00190] Component RolloutWorker_w7 stopped! +[2024-11-28 15:30:56,739][00190] Waiting for process learner_proc0 to stop... +[2024-11-28 15:30:58,151][00190] Waiting for process inference_proc0-0 to join... +[2024-11-28 15:30:58,158][00190] Waiting for process rollout_proc0 to join... +[2024-11-28 15:30:59,439][00190] Waiting for process rollout_proc1 to join... +[2024-11-28 15:30:59,815][00190] Waiting for process rollout_proc2 to join... +[2024-11-28 15:30:59,818][00190] Waiting for process rollout_proc3 to join... +[2024-11-28 15:30:59,823][00190] Waiting for process rollout_proc4 to join... +[2024-11-28 15:30:59,825][00190] Waiting for process rollout_proc5 to join... +[2024-11-28 15:30:59,829][00190] Waiting for process rollout_proc6 to join... +[2024-11-28 15:30:59,833][00190] Waiting for process rollout_proc7 to join... +[2024-11-28 15:30:59,836][00190] Batcher 0 profile tree view: +batching: 25.2262, releasing_batches: 0.0332 +[2024-11-28 15:30:59,837][00190] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0000 + wait_policy_total: 473.1598 +update_model: 9.4146 + weight_update: 0.0019 +one_step: 0.0058 + handle_policy_step: 614.2813 + deserialize: 15.8977, stack: 3.5815, obs_to_device_normalize: 131.6496, forward: 318.2260, send_messages: 26.4951 + prepare_outputs: 87.3044 + to_cpu: 51.5849 +[2024-11-28 15:30:59,839][00190] Learner 0 profile tree view: +misc: 0.0050, prepare_batch: 13.4991 +train: 71.7457 + epoch_init: 0.0057, minibatch_init: 0.0146, losses_postprocess: 0.6223, kl_divergence: 0.6428, after_optimizer: 32.7768 + calculate_losses: 25.5064 + losses_init: 0.0066, forward_head: 1.3090, bptt_initial: 16.9304, tail: 1.1035, advantages_returns: 0.3157, losses: 3.5157 + bptt: 1.9314 + bptt_forward_core: 1.8390 + update: 11.5134 + clip: 0.8799 +[2024-11-28 15:30:59,841][00190] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.4366, enqueue_policy_requests: 157.7637, env_step: 850.6162, overhead: 17.0426, complete_rollouts: 6.1278 +save_policy_outputs: 23.7303 + split_output_tensors: 9.7522 +[2024-11-28 15:30:59,842][00190] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4032, enqueue_policy_requests: 111.0568, env_step: 897.8814, overhead: 17.4672, complete_rollouts: 8.3675 +save_policy_outputs: 25.0691 + split_output_tensors: 9.9739 +[2024-11-28 15:30:59,843][00190] Loop Runner_EvtLoop terminating... +[2024-11-28 15:30:59,845][00190] Runner profile tree view: +main_loop: 1173.5655 +[2024-11-28 15:30:59,846][00190] Collected {0: 4005888}, FPS: 3413.4 +[2024-11-28 15:31:19,912][00190] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-11-28 15:31:19,914][00190] Overriding arg 'num_workers' with value 1 passed from command line +[2024-11-28 15:31:19,916][00190] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-11-28 15:31:19,918][00190] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-11-28 15:31:19,920][00190] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-11-28 15:31:19,922][00190] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-11-28 15:31:19,923][00190] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2024-11-28 15:31:19,924][00190] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-11-28 15:31:19,925][00190] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2024-11-28 15:31:19,926][00190] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2024-11-28 15:31:19,927][00190] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-11-28 15:31:19,928][00190] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-11-28 15:31:19,930][00190] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-11-28 15:31:19,931][00190] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-11-28 15:31:19,932][00190] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-11-28 15:31:19,965][00190] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-11-28 15:31:19,968][00190] RunningMeanStd input shape: (3, 72, 128) +[2024-11-28 15:31:19,970][00190] RunningMeanStd input shape: (1,) +[2024-11-28 15:31:19,986][00190] ConvEncoder: input_channels=3 +[2024-11-28 15:31:20,096][00190] Conv encoder output size: 512 +[2024-11-28 15:31:20,099][00190] Policy head output size: 512 +[2024-11-28 15:31:20,283][00190] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-11-28 15:31:21,059][00190] Num frames 100... +[2024-11-28 15:31:21,183][00190] Num frames 200... +[2024-11-28 15:31:21,303][00190] Num frames 300... +[2024-11-28 15:31:21,431][00190] Num frames 400... +[2024-11-28 15:31:21,551][00190] Num frames 500... +[2024-11-28 15:31:21,674][00190] Num frames 600... +[2024-11-28 15:31:21,802][00190] Num frames 700... +[2024-11-28 15:31:21,921][00190] Num frames 800... +[2024-11-28 15:31:22,095][00190] Avg episode rewards: #0: 21.960, true rewards: #0: 8.960 +[2024-11-28 15:31:22,096][00190] Avg episode reward: 21.960, avg true_objective: 8.960 +[2024-11-28 15:31:22,105][00190] Num frames 900... +[2024-11-28 15:31:22,233][00190] Num frames 1000... +[2024-11-28 15:31:22,359][00190] Num frames 1100... +[2024-11-28 15:31:22,477][00190] Num frames 1200... +[2024-11-28 15:31:22,599][00190] Num frames 1300... +[2024-11-28 15:31:22,720][00190] Num frames 1400... +[2024-11-28 15:31:22,850][00190] Num frames 1500... +[2024-11-28 15:31:22,974][00190] Num frames 1600... +[2024-11-28 15:31:23,026][00190] Avg episode rewards: #0: 19.000, true rewards: #0: 8.000 +[2024-11-28 15:31:23,027][00190] Avg episode reward: 19.000, avg true_objective: 8.000 +[2024-11-28 15:31:23,151][00190] Num frames 1700... +[2024-11-28 15:31:23,293][00190] Num frames 1800... +[2024-11-28 15:31:23,419][00190] Num frames 1900... +[2024-11-28 15:31:23,563][00190] Num frames 2000... +[2024-11-28 15:31:23,730][00190] Num frames 2100... +[2024-11-28 15:31:23,907][00190] Num frames 2200... +[2024-11-28 15:31:24,068][00190] Num frames 2300... +[2024-11-28 15:31:24,234][00190] Num frames 2400... +[2024-11-28 15:31:24,406][00190] Num frames 2500... +[2024-11-28 15:31:24,565][00190] Num frames 2600... +[2024-11-28 15:31:24,735][00190] Num frames 2700... +[2024-11-28 15:31:24,912][00190] Num frames 2800... +[2024-11-28 15:31:25,084][00190] Num frames 2900... +[2024-11-28 15:31:25,263][00190] Num frames 3000... +[2024-11-28 15:31:25,442][00190] Num frames 3100... +[2024-11-28 15:31:25,624][00190] Num frames 3200... +[2024-11-28 15:31:25,802][00190] Num frames 3300... +[2024-11-28 15:31:25,989][00190] Num frames 3400... +[2024-11-28 15:31:26,117][00190] Num frames 3500... +[2024-11-28 15:31:26,241][00190] Num frames 3600... +[2024-11-28 15:31:26,369][00190] Num frames 3700... +[2024-11-28 15:31:26,421][00190] Avg episode rewards: #0: 30.333, true rewards: #0: 12.333 +[2024-11-28 15:31:26,422][00190] Avg episode reward: 30.333, avg true_objective: 12.333 +[2024-11-28 15:31:26,543][00190] Num frames 3800... +[2024-11-28 15:31:26,662][00190] Num frames 3900... +[2024-11-28 15:31:26,778][00190] Num frames 4000... +[2024-11-28 15:31:26,899][00190] Num frames 4100... +[2024-11-28 15:31:27,025][00190] Num frames 4200... +[2024-11-28 15:31:27,145][00190] Num frames 4300... +[2024-11-28 15:31:27,266][00190] Num frames 4400... +[2024-11-28 15:31:27,391][00190] Num frames 4500... +[2024-11-28 15:31:27,514][00190] Num frames 4600... +[2024-11-28 15:31:27,633][00190] Num frames 4700... +[2024-11-28 15:31:27,751][00190] Num frames 4800... +[2024-11-28 15:31:27,876][00190] Num frames 4900... +[2024-11-28 15:31:27,987][00190] Avg episode rewards: #0: 29.370, true rewards: #0: 12.370 +[2024-11-28 15:31:27,990][00190] Avg episode reward: 29.370, avg true_objective: 12.370 +[2024-11-28 15:31:28,057][00190] Num frames 5000... +[2024-11-28 15:31:28,184][00190] Num frames 5100... +[2024-11-28 15:31:28,303][00190] Num frames 5200... +[2024-11-28 15:31:28,424][00190] Num frames 5300... +[2024-11-28 15:31:28,540][00190] Num frames 5400... +[2024-11-28 15:31:28,658][00190] Num frames 5500... +[2024-11-28 15:31:28,779][00190] Num frames 5600... +[2024-11-28 15:31:28,898][00190] Num frames 5700... +[2024-11-28 15:31:29,015][00190] Num frames 5800... +[2024-11-28 15:31:29,144][00190] Num frames 5900... +[2024-11-28 15:31:29,265][00190] Num frames 6000... +[2024-11-28 15:31:29,389][00190] Num frames 6100... +[2024-11-28 15:31:29,511][00190] Num frames 6200... +[2024-11-28 15:31:29,643][00190] Avg episode rewards: #0: 30.120, true rewards: #0: 12.520 +[2024-11-28 15:31:29,644][00190] Avg episode reward: 30.120, avg true_objective: 12.520 +[2024-11-28 15:31:29,699][00190] Num frames 6300... +[2024-11-28 15:31:29,819][00190] Num frames 6400... +[2024-11-28 15:31:29,938][00190] Num frames 6500... +[2024-11-28 15:31:30,062][00190] Num frames 6600... +[2024-11-28 15:31:30,190][00190] Num frames 6700... +[2024-11-28 15:31:30,308][00190] Num frames 6800... +[2024-11-28 15:31:30,432][00190] Num frames 6900... +[2024-11-28 15:31:30,485][00190] Avg episode rewards: #0: 27.167, true rewards: #0: 11.500 +[2024-11-28 15:31:30,486][00190] Avg episode reward: 27.167, avg true_objective: 11.500 +[2024-11-28 15:31:30,604][00190] Num frames 7000... +[2024-11-28 15:31:30,726][00190] Num frames 7100... +[2024-11-28 15:31:30,848][00190] Num frames 7200... +[2024-11-28 15:31:30,967][00190] Num frames 7300... +[2024-11-28 15:31:31,093][00190] Num frames 7400... +[2024-11-28 15:31:31,219][00190] Num frames 7500... +[2024-11-28 15:31:31,343][00190] Num frames 7600... +[2024-11-28 15:31:31,463][00190] Num frames 7700... +[2024-11-28 15:31:31,592][00190] Num frames 7800... +[2024-11-28 15:31:31,740][00190] Num frames 7900... +[2024-11-28 15:31:31,865][00190] Num frames 8000... +[2024-11-28 15:31:32,000][00190] Num frames 8100... +[2024-11-28 15:31:32,130][00190] Num frames 8200... +[2024-11-28 15:31:32,256][00190] Num frames 8300... +[2024-11-28 15:31:32,405][00190] Avg episode rewards: #0: 29.253, true rewards: #0: 11.967 +[2024-11-28 15:31:32,408][00190] Avg episode reward: 29.253, avg true_objective: 11.967 +[2024-11-28 15:31:32,437][00190] Num frames 8400... +[2024-11-28 15:31:32,557][00190] Num frames 8500... +[2024-11-28 15:31:32,675][00190] Num frames 8600... +[2024-11-28 15:31:32,795][00190] Num frames 8700... +[2024-11-28 15:31:32,917][00190] Num frames 8800... +[2024-11-28 15:31:33,037][00190] Num frames 8900... +[2024-11-28 15:31:33,174][00190] Num frames 9000... +[2024-11-28 15:31:33,299][00190] Num frames 9100... +[2024-11-28 15:31:33,428][00190] Num frames 9200... +[2024-11-28 15:31:33,548][00190] Num frames 9300... +[2024-11-28 15:31:33,667][00190] Num frames 9400... +[2024-11-28 15:31:33,837][00190] Avg episode rewards: #0: 28.496, true rewards: #0: 11.871 +[2024-11-28 15:31:33,839][00190] Avg episode reward: 28.496, avg true_objective: 11.871 +[2024-11-28 15:31:33,845][00190] Num frames 9500... +[2024-11-28 15:31:33,967][00190] Num frames 9600... +[2024-11-28 15:31:34,084][00190] Num frames 9700... +[2024-11-28 15:31:34,216][00190] Num frames 9800... +[2024-11-28 15:31:34,338][00190] Num frames 9900... +[2024-11-28 15:31:34,456][00190] Avg episode rewards: #0: 26.503, true rewards: #0: 11.059 +[2024-11-28 15:31:34,458][00190] Avg episode reward: 26.503, avg true_objective: 11.059 +[2024-11-28 15:31:34,514][00190] Num frames 10000... +[2024-11-28 15:31:34,636][00190] Num frames 10100... +[2024-11-28 15:31:34,753][00190] Num frames 10200... +[2024-11-28 15:31:34,870][00190] Num frames 10300... +[2024-11-28 15:31:34,989][00190] Num frames 10400... +[2024-11-28 15:31:35,106][00190] Num frames 10500... +[2024-11-28 15:31:35,239][00190] Num frames 10600... +[2024-11-28 15:31:35,402][00190] Avg episode rewards: #0: 25.189, true rewards: #0: 10.689 +[2024-11-28 15:31:35,404][00190] Avg episode reward: 25.189, avg true_objective: 10.689 +[2024-11-28 15:32:38,310][00190] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-11-28 15:33:59,036][00190] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-11-28 15:33:59,038][00190] Overriding arg 'num_workers' with value 1 passed from command line +[2024-11-28 15:33:59,039][00190] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-11-28 15:33:59,041][00190] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-11-28 15:33:59,043][00190] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-11-28 15:33:59,045][00190] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-11-28 15:33:59,046][00190] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-11-28 15:33:59,048][00190] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-11-28 15:33:59,050][00190] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-11-28 15:33:59,051][00190] Adding new argument 'hf_repository'='achrafib11/rl_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-11-28 15:33:59,052][00190] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-11-28 15:33:59,053][00190] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-11-28 15:33:59,054][00190] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-11-28 15:33:59,055][00190] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-11-28 15:33:59,056][00190] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-11-28 15:33:59,084][00190] RunningMeanStd input shape: (3, 72, 128) +[2024-11-28 15:33:59,085][00190] RunningMeanStd input shape: (1,) +[2024-11-28 15:33:59,099][00190] ConvEncoder: input_channels=3 +[2024-11-28 15:33:59,137][00190] Conv encoder output size: 512 +[2024-11-28 15:33:59,139][00190] Policy head output size: 512 +[2024-11-28 15:33:59,162][00190] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2024-11-28 15:33:59,580][00190] Num frames 100... +[2024-11-28 15:33:59,701][00190] Num frames 200... +[2024-11-28 15:33:59,824][00190] Num frames 300... +[2024-11-28 15:33:59,950][00190] Num frames 400... +[2024-11-28 15:34:00,068][00190] Num frames 500... +[2024-11-28 15:34:00,189][00190] Num frames 600... +[2024-11-28 15:34:00,307][00190] Num frames 700... +[2024-11-28 15:34:00,428][00190] Num frames 800... +[2024-11-28 15:34:00,554][00190] Num frames 900... +[2024-11-28 15:34:00,678][00190] Num frames 1000... +[2024-11-28 15:34:00,800][00190] Num frames 1100... +[2024-11-28 15:34:00,922][00190] Num frames 1200... +[2024-11-28 15:34:01,046][00190] Num frames 1300... +[2024-11-28 15:34:01,171][00190] Num frames 1400... +[2024-11-28 15:34:01,277][00190] Avg episode rewards: #0: 32.400, true rewards: #0: 14.400 +[2024-11-28 15:34:01,278][00190] Avg episode reward: 32.400, avg true_objective: 14.400 +[2024-11-28 15:34:01,352][00190] Num frames 1500... +[2024-11-28 15:34:01,473][00190] Num frames 1600... +[2024-11-28 15:34:01,594][00190] Num frames 1700... +[2024-11-28 15:34:01,714][00190] Num frames 1800... +[2024-11-28 15:34:01,838][00190] Num frames 1900... +[2024-11-28 15:34:01,956][00190] Avg episode rewards: #0: 19.760, true rewards: #0: 9.760 +[2024-11-28 15:34:01,958][00190] Avg episode reward: 19.760, avg true_objective: 9.760 +[2024-11-28 15:34:02,018][00190] Num frames 2000... +[2024-11-28 15:34:02,144][00190] Num frames 2100... +[2024-11-28 15:34:02,291][00190] Num frames 2200... +[2024-11-28 15:34:02,472][00190] Num frames 2300... +[2024-11-28 15:34:02,639][00190] Num frames 2400... +[2024-11-28 15:34:02,806][00190] Num frames 2500... +[2024-11-28 15:34:02,976][00190] Num frames 2600... +[2024-11-28 15:34:03,139][00190] Num frames 2700... +[2024-11-28 15:34:03,308][00190] Num frames 2800... +[2024-11-28 15:34:03,466][00190] Num frames 2900... +[2024-11-28 15:34:03,627][00190] Num frames 3000... +[2024-11-28 15:34:03,751][00190] Avg episode rewards: #0: 20.467, true rewards: #0: 10.133 +[2024-11-28 15:34:03,753][00190] Avg episode reward: 20.467, avg true_objective: 10.133 +[2024-11-28 15:34:03,859][00190] Num frames 3100... +[2024-11-28 15:34:04,028][00190] Num frames 3200... +[2024-11-28 15:34:04,214][00190] Num frames 3300... +[2024-11-28 15:34:04,393][00190] Num frames 3400... +[2024-11-28 15:34:04,565][00190] Num frames 3500... +[2024-11-28 15:34:04,735][00190] Num frames 3600... +[2024-11-28 15:34:04,880][00190] Num frames 3700... +[2024-11-28 15:34:05,026][00190] Avg episode rewards: #0: 19.440, true rewards: #0: 9.440 +[2024-11-28 15:34:05,028][00190] Avg episode reward: 19.440, avg true_objective: 9.440 +[2024-11-28 15:34:05,068][00190] Num frames 3800... +[2024-11-28 15:34:05,203][00190] Num frames 3900... +[2024-11-28 15:34:05,343][00190] Num frames 4000... +[2024-11-28 15:34:05,472][00190] Num frames 4100... +[2024-11-28 15:34:05,597][00190] Num frames 4200... +[2024-11-28 15:34:05,722][00190] Num frames 4300... +[2024-11-28 15:34:05,844][00190] Num frames 4400... +[2024-11-28 15:34:05,965][00190] Num frames 4500... +[2024-11-28 15:34:06,095][00190] Num frames 4600... +[2024-11-28 15:34:06,224][00190] Num frames 4700... +[2024-11-28 15:34:06,351][00190] Num frames 4800... +[2024-11-28 15:34:06,474][00190] Num frames 4900... +[2024-11-28 15:34:06,600][00190] Num frames 5000... +[2024-11-28 15:34:06,724][00190] Num frames 5100... +[2024-11-28 15:34:06,848][00190] Num frames 5200... +[2024-11-28 15:34:06,969][00190] Num frames 5300... +[2024-11-28 15:34:07,094][00190] Num frames 5400... +[2024-11-28 15:34:07,234][00190] Num frames 5500... +[2024-11-28 15:34:07,333][00190] Avg episode rewards: #0: 24.072, true rewards: #0: 11.072 +[2024-11-28 15:34:07,335][00190] Avg episode reward: 24.072, avg true_objective: 11.072 +[2024-11-28 15:34:07,415][00190] Num frames 5600... +[2024-11-28 15:34:07,542][00190] Num frames 5700... +[2024-11-28 15:34:07,668][00190] Num frames 5800... +[2024-11-28 15:34:07,792][00190] Num frames 5900... +[2024-11-28 15:34:07,917][00190] Num frames 6000... +[2024-11-28 15:34:08,041][00190] Num frames 6100... +[2024-11-28 15:34:08,181][00190] Num frames 6200... +[2024-11-28 15:34:08,305][00190] Num frames 6300... +[2024-11-28 15:34:08,427][00190] Num frames 6400... +[2024-11-28 15:34:08,551][00190] Num frames 6500... +[2024-11-28 15:34:08,675][00190] Num frames 6600... +[2024-11-28 15:34:08,796][00190] Num frames 6700... +[2024-11-28 15:34:08,923][00190] Num frames 6800... +[2024-11-28 15:34:09,101][00190] Avg episode rewards: #0: 25.165, true rewards: #0: 11.498 +[2024-11-28 15:34:09,104][00190] Avg episode reward: 25.165, avg true_objective: 11.498 +[2024-11-28 15:34:09,108][00190] Num frames 6900... +[2024-11-28 15:34:09,248][00190] Num frames 7000... +[2024-11-28 15:34:09,371][00190] Num frames 7100... +[2024-11-28 15:34:09,495][00190] Num frames 7200... +[2024-11-28 15:34:09,623][00190] Num frames 7300... +[2024-11-28 15:34:09,747][00190] Num frames 7400... +[2024-11-28 15:34:09,871][00190] Num frames 7500... +[2024-11-28 15:34:09,994][00190] Num frames 7600... +[2024-11-28 15:34:10,119][00190] Num frames 7700... +[2024-11-28 15:34:10,259][00190] Num frames 7800... +[2024-11-28 15:34:10,379][00190] Num frames 7900... +[2024-11-28 15:34:10,517][00190] Num frames 8000... +[2024-11-28 15:34:10,654][00190] Num frames 8100... +[2024-11-28 15:34:10,790][00190] Avg episode rewards: #0: 25.234, true rewards: #0: 11.663 +[2024-11-28 15:34:10,791][00190] Avg episode reward: 25.234, avg true_objective: 11.663 +[2024-11-28 15:34:10,838][00190] Num frames 8200... +[2024-11-28 15:34:10,954][00190] Num frames 8300... +[2024-11-28 15:34:11,077][00190] Num frames 8400... +[2024-11-28 15:34:11,207][00190] Num frames 8500... +[2024-11-28 15:34:11,337][00190] Num frames 8600... +[2024-11-28 15:34:11,460][00190] Num frames 8700... +[2024-11-28 15:34:11,587][00190] Num frames 8800... +[2024-11-28 15:34:11,707][00190] Num frames 8900... +[2024-11-28 15:34:11,875][00190] Avg episode rewards: #0: 23.995, true rewards: #0: 11.245 +[2024-11-28 15:34:11,877][00190] Avg episode reward: 23.995, avg true_objective: 11.245 +[2024-11-28 15:34:11,884][00190] Num frames 9000... +[2024-11-28 15:34:12,004][00190] Num frames 9100... +[2024-11-28 15:34:12,125][00190] Num frames 9200... +[2024-11-28 15:34:12,277][00190] Num frames 9300... +[2024-11-28 15:34:12,399][00190] Num frames 9400... +[2024-11-28 15:34:12,520][00190] Num frames 9500... +[2024-11-28 15:34:12,634][00190] Avg episode rewards: #0: 22.606, true rewards: #0: 10.606 +[2024-11-28 15:34:12,636][00190] Avg episode reward: 22.606, avg true_objective: 10.606 +[2024-11-28 15:34:12,703][00190] Num frames 9600... +[2024-11-28 15:34:12,825][00190] Num frames 9700... +[2024-11-28 15:34:12,949][00190] Num frames 9800... +[2024-11-28 15:34:13,069][00190] Num frames 9900... +[2024-11-28 15:34:13,200][00190] Num frames 10000... +[2024-11-28 15:34:13,330][00190] Num frames 10100... +[2024-11-28 15:34:13,451][00190] Num frames 10200... +[2024-11-28 15:34:13,576][00190] Num frames 10300... +[2024-11-28 15:34:13,701][00190] Num frames 10400... +[2024-11-28 15:34:13,826][00190] Num frames 10500... +[2024-11-28 15:34:13,948][00190] Num frames 10600... +[2024-11-28 15:34:14,070][00190] Num frames 10700... +[2024-11-28 15:34:14,202][00190] Num frames 10800... +[2024-11-28 15:34:14,331][00190] Num frames 10900... +[2024-11-28 15:34:14,455][00190] Num frames 11000... +[2024-11-28 15:34:14,579][00190] Num frames 11100... +[2024-11-28 15:34:14,652][00190] Avg episode rewards: #0: 24.113, true rewards: #0: 11.113 +[2024-11-28 15:34:14,654][00190] Avg episode reward: 24.113, avg true_objective: 11.113 +[2024-11-28 15:35:18,811][00190] Replay video saved to /content/train_dir/default_experiment/replay.mp4!