diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -3587,3 +3587,1539 @@ main_loop: 35.2166 [2023-02-23 00:51:42,528][05631] Avg episode rewards: #0: 4.692, true rewards: #0: 4.192 [2023-02-23 00:51:42,532][05631] Avg episode reward: 4.692, avg true_objective: 4.192 [2023-02-23 00:52:02,869][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-23 00:52:08,657][05631] The model has been pushed to https://huggingface.co/pittawat/rl_course_vizdoom_health_gathering_supreme +[2023-02-23 00:54:19,819][05631] Environment doom_basic already registered, overwriting... +[2023-02-23 00:54:19,822][05631] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-23 00:54:19,825][05631] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-23 00:54:19,827][05631] Environment doom_dm already registered, overwriting... +[2023-02-23 00:54:19,829][05631] Environment doom_dwango5 already registered, overwriting... +[2023-02-23 00:54:19,832][05631] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-23 00:54:19,833][05631] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-23 00:54:19,834][05631] Environment doom_my_way_home already registered, overwriting... +[2023-02-23 00:54:19,835][05631] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-23 00:54:19,836][05631] Environment doom_defend_the_center already registered, overwriting... +[2023-02-23 00:54:19,838][05631] Environment doom_defend_the_line already registered, overwriting... +[2023-02-23 00:54:19,840][05631] Environment doom_health_gathering already registered, overwriting... +[2023-02-23 00:54:19,841][05631] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-23 00:54:19,842][05631] Environment doom_battle already registered, overwriting... +[2023-02-23 00:54:19,844][05631] Environment doom_battle2 already registered, overwriting... +[2023-02-23 00:54:19,845][05631] Environment doom_duel_bots already registered, overwriting... +[2023-02-23 00:54:19,847][05631] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-23 00:54:19,848][05631] Environment doom_duel already registered, overwriting... +[2023-02-23 00:54:19,849][05631] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-23 00:54:19,850][05631] Environment doom_benchmark already registered, overwriting... +[2023-02-23 00:54:19,852][05631] register_encoder_factory: +[2023-02-23 00:54:19,883][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-23 00:54:19,884][05631] Overriding arg 'gamma' with value 0.96 passed from command line +[2023-02-23 00:54:19,886][05631] Overriding arg 'train_for_env_steps' with value 15000000 passed from command line +[2023-02-23 00:54:19,889][05631] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-23 00:54:19,890][05631] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-23 00:54:19,891][05631] Weights and Biases integration disabled +[2023-02-23 00:54:19,895][05631] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-23 00:54:22,147][05631] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.96 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=15000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-23 00:54:22,151][05631] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-23 00:54:22,156][05631] Rollout worker 0 uses device cpu +[2023-02-23 00:54:22,158][05631] Rollout worker 1 uses device cpu +[2023-02-23 00:54:22,161][05631] Rollout worker 2 uses device cpu +[2023-02-23 00:54:22,162][05631] Rollout worker 3 uses device cpu +[2023-02-23 00:54:22,163][05631] Rollout worker 4 uses device cpu +[2023-02-23 00:54:22,172][05631] Rollout worker 5 uses device cpu +[2023-02-23 00:54:22,175][05631] Rollout worker 6 uses device cpu +[2023-02-23 00:54:22,177][05631] Rollout worker 7 uses device cpu +[2023-02-23 00:54:22,353][05631] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-23 00:54:22,356][05631] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-23 00:54:22,397][05631] Starting all processes... +[2023-02-23 00:54:22,400][05631] Starting process learner_proc0 +[2023-02-23 00:54:22,591][05631] Starting all processes... +[2023-02-23 00:54:22,608][05631] Starting process inference_proc0-0 +[2023-02-23 00:54:22,609][05631] Starting process rollout_proc0 +[2023-02-23 00:54:22,612][05631] Starting process rollout_proc1 +[2023-02-23 00:54:22,760][05631] Starting process rollout_proc2 +[2023-02-23 00:54:22,761][05631] Starting process rollout_proc3 +[2023-02-23 00:54:22,762][05631] Starting process rollout_proc4 +[2023-02-23 00:54:22,762][05631] Starting process rollout_proc5 +[2023-02-23 00:54:22,762][05631] Starting process rollout_proc6 +[2023-02-23 00:54:22,762][05631] Starting process rollout_proc7 +[2023-02-23 00:54:31,419][45637] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-23 00:54:31,421][45637] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-23 00:54:31,476][45637] Num visible devices: 1 +[2023-02-23 00:54:31,516][45637] Starting seed is not provided +[2023-02-23 00:54:31,517][45637] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-23 00:54:31,518][45637] Initializing actor-critic model on device cuda:0 +[2023-02-23 00:54:31,519][45637] RunningMeanStd input shape: (3, 72, 128) +[2023-02-23 00:54:31,521][45637] RunningMeanStd input shape: (1,) +[2023-02-23 00:54:31,653][45637] ConvEncoder: input_channels=3 +[2023-02-23 00:54:32,511][45651] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-23 00:54:32,515][45651] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-23 00:54:32,580][45637] Conv encoder output size: 512 +[2023-02-23 00:54:32,587][45637] Policy head output size: 512 +[2023-02-23 00:54:32,594][45651] Num visible devices: 1 +[2023-02-23 00:54:32,712][45637] Created Actor Critic model with architecture: +[2023-02-23 00:54:32,716][45637] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-23 00:54:32,847][45652] Worker 0 uses CPU cores [0] +[2023-02-23 00:54:33,106][45653] Worker 2 uses CPU cores [0] +[2023-02-23 00:54:33,432][45660] Worker 1 uses CPU cores [1] +[2023-02-23 00:54:33,794][45670] Worker 6 uses CPU cores [0] +[2023-02-23 00:54:33,902][45662] Worker 3 uses CPU cores [1] +[2023-02-23 00:54:34,056][45672] Worker 7 uses CPU cores [1] +[2023-02-23 00:54:34,067][45664] Worker 4 uses CPU cores [0] +[2023-02-23 00:54:34,145][45674] Worker 5 uses CPU cores [1] +[2023-02-23 00:54:36,782][45637] Using optimizer +[2023-02-23 00:54:36,783][45637] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth... +[2023-02-23 00:54:36,825][45637] Loading model from checkpoint +[2023-02-23 00:54:36,832][45637] Loaded experiment state at self.train_step=2447, self.env_steps=10022912 +[2023-02-23 00:54:36,833][45637] Initialized policy 0 weights for model version 2447 +[2023-02-23 00:54:36,842][45637] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-23 00:54:36,853][45637] LearnerWorker_p0 finished initialization! +[2023-02-23 00:54:37,027][45651] RunningMeanStd input shape: (3, 72, 128) +[2023-02-23 00:54:37,029][45651] RunningMeanStd input shape: (1,) +[2023-02-23 00:54:37,047][45651] ConvEncoder: input_channels=3 +[2023-02-23 00:54:37,203][45651] Conv encoder output size: 512 +[2023-02-23 00:54:37,205][45651] Policy head output size: 512 +[2023-02-23 00:54:39,653][05631] Inference worker 0-0 is ready! +[2023-02-23 00:54:39,655][05631] All inference workers are ready! Signal rollout workers to start! +[2023-02-23 00:54:39,750][45664] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,754][45670] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,752][45653] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,754][45652] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,759][45674] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,762][45662] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,768][45672] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,761][45660] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-23 00:54:39,895][05631] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 10022912. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-23 00:54:40,581][45670] Decorrelating experience for 0 frames... +[2023-02-23 00:54:40,585][45653] Decorrelating experience for 0 frames... +[2023-02-23 00:54:40,964][45653] Decorrelating experience for 32 frames... +[2023-02-23 00:54:41,036][45662] Decorrelating experience for 0 frames... +[2023-02-23 00:54:41,039][45672] Decorrelating experience for 0 frames... +[2023-02-23 00:54:41,041][45674] Decorrelating experience for 0 frames... +[2023-02-23 00:54:41,708][45653] Decorrelating experience for 64 frames... +[2023-02-23 00:54:41,761][45670] Decorrelating experience for 32 frames... +[2023-02-23 00:54:41,967][45660] Decorrelating experience for 0 frames... +[2023-02-23 00:54:42,343][05631] Heartbeat connected on Batcher_0 +[2023-02-23 00:54:42,350][05631] Heartbeat connected on LearnerWorker_p0 +[2023-02-23 00:54:42,369][45674] Decorrelating experience for 32 frames... +[2023-02-23 00:54:42,376][45662] Decorrelating experience for 32 frames... +[2023-02-23 00:54:42,384][45672] Decorrelating experience for 32 frames... +[2023-02-23 00:54:42,383][05631] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-23 00:54:42,431][45670] Decorrelating experience for 64 frames... +[2023-02-23 00:54:42,949][45664] Decorrelating experience for 0 frames... +[2023-02-23 00:54:43,386][45660] Decorrelating experience for 32 frames... +[2023-02-23 00:54:43,521][45653] Decorrelating experience for 96 frames... +[2023-02-23 00:54:43,664][45670] Decorrelating experience for 96 frames... +[2023-02-23 00:54:43,713][05631] Heartbeat connected on RolloutWorker_w2 +[2023-02-23 00:54:43,951][05631] Heartbeat connected on RolloutWorker_w6 +[2023-02-23 00:54:43,969][45674] Decorrelating experience for 64 frames... +[2023-02-23 00:54:43,972][45662] Decorrelating experience for 64 frames... +[2023-02-23 00:54:43,975][45672] Decorrelating experience for 64 frames... +[2023-02-23 00:54:44,217][45664] Decorrelating experience for 32 frames... +[2023-02-23 00:54:44,895][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10022912. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-23 00:54:45,587][45660] Decorrelating experience for 64 frames... +[2023-02-23 00:54:45,723][45672] Decorrelating experience for 96 frames... +[2023-02-23 00:54:45,737][45662] Decorrelating experience for 96 frames... +[2023-02-23 00:54:45,740][45674] Decorrelating experience for 96 frames... +[2023-02-23 00:54:45,899][45664] Decorrelating experience for 64 frames... +[2023-02-23 00:54:46,056][05631] Heartbeat connected on RolloutWorker_w3 +[2023-02-23 00:54:46,061][05631] Heartbeat connected on RolloutWorker_w7 +[2023-02-23 00:54:46,066][05631] Heartbeat connected on RolloutWorker_w5 +[2023-02-23 00:54:48,065][45660] Decorrelating experience for 96 frames... +[2023-02-23 00:54:49,137][05631] Heartbeat connected on RolloutWorker_w1 +[2023-02-23 00:54:49,569][45652] Decorrelating experience for 0 frames... +[2023-02-23 00:54:49,895][05631] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 10022912. Throughput: 0: 191.0. Samples: 1910. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-23 00:54:49,904][05631] Avg episode reward: [(0, '2.787')] +[2023-02-23 00:54:50,044][45664] Decorrelating experience for 96 frames... +[2023-02-23 00:54:50,400][45637] Signal inference workers to stop experience collection... +[2023-02-23 00:54:50,437][45651] InferenceWorker_p0-w0: stopping experience collection +[2023-02-23 00:54:50,762][05631] Heartbeat connected on RolloutWorker_w4 +[2023-02-23 00:54:51,399][45652] Decorrelating experience for 32 frames... +[2023-02-23 00:54:51,728][45637] Signal inference workers to resume experience collection... +[2023-02-23 00:54:51,728][45651] InferenceWorker_p0-w0: resuming experience collection +[2023-02-23 00:54:54,895][05631] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 10035200. Throughput: 0: 219.2. Samples: 3288. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-23 00:54:54,900][05631] Avg episode reward: [(0, '3.027')] +[2023-02-23 00:54:55,514][45652] Decorrelating experience for 64 frames... +[2023-02-23 00:54:58,813][45652] Decorrelating experience for 96 frames... +[2023-02-23 00:54:59,251][05631] Heartbeat connected on RolloutWorker_w0 +[2023-02-23 00:54:59,895][05631] Fps is (10 sec: 2457.6, 60 sec: 1228.8, 300 sec: 1228.8). Total num frames: 10047488. Throughput: 0: 256.6. Samples: 5132. Policy #0 lag: (min: 0.0, avg: 0.1, max: 1.0) +[2023-02-23 00:54:59,902][05631] Avg episode reward: [(0, '3.913')] +[2023-02-23 00:55:03,567][45651] Updated weights for policy 0, policy_version 2457 (0.0654) +[2023-02-23 00:55:04,895][05631] Fps is (10 sec: 3276.7, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 10067968. Throughput: 0: 433.6. Samples: 10840. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-23 00:55:04,898][05631] Avg episode reward: [(0, '4.593')] +[2023-02-23 00:55:09,895][05631] Fps is (10 sec: 3686.4, 60 sec: 2048.0, 300 sec: 2048.0). Total num frames: 10084352. Throughput: 0: 551.1. Samples: 16534. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 00:55:09,898][05631] Avg episode reward: [(0, '4.883')] +[2023-02-23 00:55:14,895][05631] Fps is (10 sec: 2867.2, 60 sec: 2106.5, 300 sec: 2106.5). Total num frames: 10096640. Throughput: 0: 527.7. Samples: 18470. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:55:14,903][05631] Avg episode reward: [(0, '4.908')] +[2023-02-23 00:55:16,754][45651] Updated weights for policy 0, policy_version 2467 (0.0030) +[2023-02-23 00:55:19,895][05631] Fps is (10 sec: 2867.2, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 10113024. Throughput: 0: 558.4. Samples: 22336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:55:19,902][05631] Avg episode reward: [(0, '4.924')] +[2023-02-23 00:55:24,895][05631] Fps is (10 sec: 3686.4, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 10133504. Throughput: 0: 629.9. Samples: 28344. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 00:55:24,897][05631] Avg episode reward: [(0, '4.619')] +[2023-02-23 00:55:27,532][45651] Updated weights for policy 0, policy_version 2477 (0.0027) +[2023-02-23 00:55:29,895][05631] Fps is (10 sec: 4096.0, 60 sec: 2621.4, 300 sec: 2621.4). Total num frames: 10153984. Throughput: 0: 698.1. Samples: 31416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:55:29,898][05631] Avg episode reward: [(0, '4.531')] +[2023-02-23 00:55:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 2606.5, 300 sec: 2606.5). Total num frames: 10166272. Throughput: 0: 756.6. Samples: 35958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:55:34,906][05631] Avg episode reward: [(0, '4.622')] +[2023-02-23 00:55:39,896][05631] Fps is (10 sec: 2457.3, 60 sec: 2594.1, 300 sec: 2594.1). Total num frames: 10178560. Throughput: 0: 814.6. Samples: 39948. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 00:55:39,899][05631] Avg episode reward: [(0, '4.828')] +[2023-02-23 00:55:41,409][45651] Updated weights for policy 0, policy_version 2487 (0.0034) +[2023-02-23 00:55:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 2935.5, 300 sec: 2709.7). Total num frames: 10199040. Throughput: 0: 837.3. Samples: 42810. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:55:44,898][05631] Avg episode reward: [(0, '4.735')] +[2023-02-23 00:55:49,895][05631] Fps is (10 sec: 4096.5, 60 sec: 3276.8, 300 sec: 2808.7). Total num frames: 10219520. Throughput: 0: 848.6. Samples: 49028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:55:49,898][05631] Avg episode reward: [(0, '4.727')] +[2023-02-23 00:55:51,857][45651] Updated weights for policy 0, policy_version 2497 (0.0015) +[2023-02-23 00:55:54,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2785.3). Total num frames: 10231808. Throughput: 0: 825.7. Samples: 53690. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:55:54,898][05631] Avg episode reward: [(0, '4.731')] +[2023-02-23 00:55:59,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 2816.0). Total num frames: 10248192. Throughput: 0: 825.6. Samples: 55620. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:55:59,898][05631] Avg episode reward: [(0, '4.853')] +[2023-02-23 00:56:04,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 2843.1). Total num frames: 10264576. Throughput: 0: 846.6. Samples: 60432. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:56:04,898][05631] Avg episode reward: [(0, '4.780')] +[2023-02-23 00:56:05,187][45651] Updated weights for policy 0, policy_version 2507 (0.0020) +[2023-02-23 00:56:09,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 2912.7). Total num frames: 10285056. Throughput: 0: 852.0. Samples: 66686. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:56:09,902][05631] Avg episode reward: [(0, '4.848')] +[2023-02-23 00:56:14,895][05631] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 2931.9). Total num frames: 10301440. Throughput: 0: 841.4. Samples: 69280. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:56:14,899][05631] Avg episode reward: [(0, '4.763')] +[2023-02-23 00:56:17,335][45651] Updated weights for policy 0, policy_version 2517 (0.0028) +[2023-02-23 00:56:19,897][05631] Fps is (10 sec: 2866.7, 60 sec: 3345.0, 300 sec: 2908.1). Total num frames: 10313728. Throughput: 0: 827.7. Samples: 73204. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 00:56:19,900][05631] Avg episode reward: [(0, '4.814')] +[2023-02-23 00:56:19,915][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002518_10313728.pth... +[2023-02-23 00:56:20,104][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002445_10014720.pth +[2023-02-23 00:56:24,895][05631] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 2925.7). Total num frames: 10330112. Throughput: 0: 844.7. Samples: 77960. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:56:24,897][05631] Avg episode reward: [(0, '4.862')] +[2023-02-23 00:56:29,237][45651] Updated weights for policy 0, policy_version 2527 (0.0033) +[2023-02-23 00:56:29,895][05631] Fps is (10 sec: 3687.1, 60 sec: 3276.8, 300 sec: 2978.9). Total num frames: 10350592. Throughput: 0: 848.4. Samples: 80988. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:56:29,898][05631] Avg episode reward: [(0, '4.880')] +[2023-02-23 00:56:34,900][05631] Fps is (10 sec: 3684.7, 60 sec: 3344.8, 300 sec: 2991.7). Total num frames: 10366976. Throughput: 0: 836.6. Samples: 86678. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:56:34,902][05631] Avg episode reward: [(0, '4.507')] +[2023-02-23 00:56:39,903][05631] Fps is (10 sec: 3274.1, 60 sec: 3412.9, 300 sec: 3003.5). Total num frames: 10383360. Throughput: 0: 821.3. Samples: 90656. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:56:39,907][05631] Avg episode reward: [(0, '4.497')] +[2023-02-23 00:56:42,485][45651] Updated weights for policy 0, policy_version 2537 (0.0020) +[2023-02-23 00:56:44,895][05631] Fps is (10 sec: 3278.3, 60 sec: 3345.1, 300 sec: 3014.7). Total num frames: 10399744. Throughput: 0: 822.2. Samples: 92620. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-23 00:56:44,902][05631] Avg episode reward: [(0, '4.669')] +[2023-02-23 00:56:49,895][05631] Fps is (10 sec: 3689.5, 60 sec: 3345.1, 300 sec: 3056.2). Total num frames: 10420224. Throughput: 0: 850.7. Samples: 98712. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 00:56:49,902][05631] Avg episode reward: [(0, '4.774')] +[2023-02-23 00:56:52,595][45651] Updated weights for policy 0, policy_version 2547 (0.0013) +[2023-02-23 00:56:54,895][05631] Fps is (10 sec: 3686.3, 60 sec: 3413.3, 300 sec: 3064.4). Total num frames: 10436608. Throughput: 0: 838.9. Samples: 104436. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:56:54,898][05631] Avg episode reward: [(0, '4.482')] +[2023-02-23 00:56:59,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3042.7). Total num frames: 10448896. Throughput: 0: 823.1. Samples: 106320. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 00:56:59,904][05631] Avg episode reward: [(0, '4.475')] +[2023-02-23 00:57:04,896][05631] Fps is (10 sec: 2867.0, 60 sec: 3345.0, 300 sec: 3050.8). Total num frames: 10465280. Throughput: 0: 822.2. Samples: 110200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 00:57:04,899][05631] Avg episode reward: [(0, '4.676')] +[2023-02-23 00:57:06,618][45651] Updated weights for policy 0, policy_version 2557 (0.0020) +[2023-02-23 00:57:09,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3085.7). Total num frames: 10485760. Throughput: 0: 850.8. Samples: 116248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:57:09,902][05631] Avg episode reward: [(0, '4.709')] +[2023-02-23 00:57:14,895][05631] Fps is (10 sec: 3686.7, 60 sec: 3345.1, 300 sec: 3091.8). Total num frames: 10502144. Throughput: 0: 851.7. Samples: 119314. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:57:14,900][05631] Avg episode reward: [(0, '4.603')] +[2023-02-23 00:57:18,298][45651] Updated weights for policy 0, policy_version 2567 (0.0020) +[2023-02-23 00:57:19,898][05631] Fps is (10 sec: 2866.5, 60 sec: 3345.0, 300 sec: 3072.0). Total num frames: 10514432. Throughput: 0: 822.2. Samples: 123674. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:57:19,903][05631] Avg episode reward: [(0, '4.601')] +[2023-02-23 00:57:24,895][05631] Fps is (10 sec: 2867.1, 60 sec: 3345.0, 300 sec: 3078.2). Total num frames: 10530816. Throughput: 0: 820.0. Samples: 127548. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:57:24,898][05631] Avg episode reward: [(0, '4.578')] +[2023-02-23 00:57:29,895][05631] Fps is (10 sec: 3687.3, 60 sec: 3345.1, 300 sec: 3108.1). Total num frames: 10551296. Throughput: 0: 842.8. Samples: 130544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:57:29,904][05631] Avg episode reward: [(0, '4.698')] +[2023-02-23 00:57:30,882][45651] Updated weights for policy 0, policy_version 2577 (0.0031) +[2023-02-23 00:57:34,895][05631] Fps is (10 sec: 4096.2, 60 sec: 3413.6, 300 sec: 3136.4). Total num frames: 10571776. Throughput: 0: 842.9. Samples: 136642. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:57:34,898][05631] Avg episode reward: [(0, '4.803')] +[2023-02-23 00:57:39,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.5, 300 sec: 3117.5). Total num frames: 10584064. Throughput: 0: 814.7. Samples: 141096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:57:39,900][05631] Avg episode reward: [(0, '4.791')] +[2023-02-23 00:57:44,034][45651] Updated weights for policy 0, policy_version 2587 (0.0025) +[2023-02-23 00:57:44,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3276.8, 300 sec: 3099.7). Total num frames: 10596352. Throughput: 0: 815.3. Samples: 143008. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:57:44,899][05631] Avg episode reward: [(0, '4.920')] +[2023-02-23 00:57:49,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3125.9). Total num frames: 10616832. Throughput: 0: 842.1. Samples: 148092. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:57:49,898][05631] Avg episode reward: [(0, '5.085')] +[2023-02-23 00:57:54,539][45651] Updated weights for policy 0, policy_version 2597 (0.0016) +[2023-02-23 00:57:54,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3150.8). Total num frames: 10637312. Throughput: 0: 846.8. Samples: 154356. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 00:57:54,903][05631] Avg episode reward: [(0, '4.804')] +[2023-02-23 00:57:59,901][05631] Fps is (10 sec: 3274.7, 60 sec: 3344.7, 300 sec: 3133.3). Total num frames: 10649600. Throughput: 0: 830.9. Samples: 156708. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:57:59,905][05631] Avg episode reward: [(0, '4.581')] +[2023-02-23 00:58:04,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3136.9). Total num frames: 10665984. Throughput: 0: 818.8. Samples: 160520. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:58:04,898][05631] Avg episode reward: [(0, '4.692')] +[2023-02-23 00:58:08,407][45651] Updated weights for policy 0, policy_version 2607 (0.0031) +[2023-02-23 00:58:09,895][05631] Fps is (10 sec: 3278.9, 60 sec: 3276.8, 300 sec: 3140.3). Total num frames: 10682368. Throughput: 0: 850.8. Samples: 165832. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:58:09,904][05631] Avg episode reward: [(0, '4.884')] +[2023-02-23 00:58:14,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3162.5). Total num frames: 10702848. Throughput: 0: 852.1. Samples: 168890. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:58:14,897][05631] Avg episode reward: [(0, '4.921')] +[2023-02-23 00:58:19,350][45651] Updated weights for policy 0, policy_version 2617 (0.0013) +[2023-02-23 00:58:19,895][05631] Fps is (10 sec: 3686.3, 60 sec: 3413.5, 300 sec: 3165.1). Total num frames: 10719232. Throughput: 0: 838.7. Samples: 174386. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:58:19,899][05631] Avg episode reward: [(0, '4.714')] +[2023-02-23 00:58:19,911][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002617_10719232.pth... +[2023-02-23 00:58:20,116][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002447_10022912.pth +[2023-02-23 00:58:24,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3149.4). Total num frames: 10731520. Throughput: 0: 826.2. Samples: 178274. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-23 00:58:24,902][05631] Avg episode reward: [(0, '4.534')] +[2023-02-23 00:58:29,895][05631] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 3152.1). Total num frames: 10747904. Throughput: 0: 832.3. Samples: 180462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:58:29,898][05631] Avg episode reward: [(0, '4.472')] +[2023-02-23 00:58:32,043][45651] Updated weights for policy 0, policy_version 2627 (0.0013) +[2023-02-23 00:58:34,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3276.8, 300 sec: 3172.2). Total num frames: 10768384. Throughput: 0: 857.9. Samples: 186696. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:58:34,903][05631] Avg episode reward: [(0, '4.644')] +[2023-02-23 00:58:39,897][05631] Fps is (10 sec: 4095.2, 60 sec: 3413.2, 300 sec: 3191.4). Total num frames: 10788864. Throughput: 0: 837.3. Samples: 192036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 00:58:39,905][05631] Avg episode reward: [(0, '4.651')] +[2023-02-23 00:58:44,281][45651] Updated weights for policy 0, policy_version 2637 (0.0014) +[2023-02-23 00:58:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3176.5). Total num frames: 10801152. Throughput: 0: 829.3. Samples: 194020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:58:44,904][05631] Avg episode reward: [(0, '4.730')] +[2023-02-23 00:58:49,895][05631] Fps is (10 sec: 2867.8, 60 sec: 3345.1, 300 sec: 3178.5). Total num frames: 10817536. Throughput: 0: 838.8. Samples: 198268. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:58:49,897][05631] Avg episode reward: [(0, '5.068')] +[2023-02-23 00:58:54,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3196.5). Total num frames: 10838016. Throughput: 0: 857.2. Samples: 204404. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:58:54,898][05631] Avg episode reward: [(0, '5.087')] +[2023-02-23 00:58:55,545][45651] Updated weights for policy 0, policy_version 2647 (0.0021) +[2023-02-23 00:58:59,898][05631] Fps is (10 sec: 3685.4, 60 sec: 3413.5, 300 sec: 3198.0). Total num frames: 10854400. Throughput: 0: 858.9. Samples: 207542. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:58:59,902][05631] Avg episode reward: [(0, '4.912')] +[2023-02-23 00:59:04,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3199.5). Total num frames: 10870784. Throughput: 0: 829.3. Samples: 211706. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:59:04,900][05631] Avg episode reward: [(0, '4.666')] +[2023-02-23 00:59:09,375][45651] Updated weights for policy 0, policy_version 2657 (0.0023) +[2023-02-23 00:59:09,895][05631] Fps is (10 sec: 2868.0, 60 sec: 3345.1, 300 sec: 3185.8). Total num frames: 10883072. Throughput: 0: 837.0. Samples: 215940. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 00:59:09,903][05631] Avg episode reward: [(0, '4.553')] +[2023-02-23 00:59:14,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3202.3). Total num frames: 10903552. Throughput: 0: 857.7. Samples: 219060. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 00:59:14,901][05631] Avg episode reward: [(0, '4.504')] +[2023-02-23 00:59:19,232][45651] Updated weights for policy 0, policy_version 2667 (0.0038) +[2023-02-23 00:59:19,901][05631] Fps is (10 sec: 4093.4, 60 sec: 3413.0, 300 sec: 3218.2). Total num frames: 10924032. Throughput: 0: 859.9. Samples: 225396. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:59:19,904][05631] Avg episode reward: [(0, '4.636')] +[2023-02-23 00:59:24,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3204.9). Total num frames: 10936320. Throughput: 0: 830.3. Samples: 229398. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:59:24,900][05631] Avg episode reward: [(0, '4.745')] +[2023-02-23 00:59:29,895][05631] Fps is (10 sec: 2459.2, 60 sec: 3345.1, 300 sec: 3192.1). Total num frames: 10948608. Throughput: 0: 827.3. Samples: 231248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 00:59:29,901][05631] Avg episode reward: [(0, '4.467')] +[2023-02-23 00:59:33,285][45651] Updated weights for policy 0, policy_version 2677 (0.0033) +[2023-02-23 00:59:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3207.4). Total num frames: 10969088. Throughput: 0: 847.7. Samples: 236416. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:59:34,904][05631] Avg episode reward: [(0, '4.595')] +[2023-02-23 00:59:39,897][05631] Fps is (10 sec: 4095.4, 60 sec: 3345.1, 300 sec: 3276.8). Total num frames: 10989568. Throughput: 0: 851.6. Samples: 242726. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 00:59:39,904][05631] Avg episode reward: [(0, '4.756')] +[2023-02-23 00:59:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3318.5). Total num frames: 11001856. Throughput: 0: 824.9. Samples: 244662. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:59:44,900][05631] Avg episode reward: [(0, '4.875')] +[2023-02-23 00:59:45,305][45651] Updated weights for policy 0, policy_version 2687 (0.0017) +[2023-02-23 00:59:49,895][05631] Fps is (10 sec: 2458.0, 60 sec: 3276.8, 300 sec: 3318.5). Total num frames: 11014144. Throughput: 0: 818.7. Samples: 248546. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:59:49,899][05631] Avg episode reward: [(0, '4.856')] +[2023-02-23 00:59:54,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11038720. Throughput: 0: 850.3. Samples: 254202. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 00:59:54,897][05631] Avg episode reward: [(0, '4.858')] +[2023-02-23 00:59:56,934][45651] Updated weights for policy 0, policy_version 2697 (0.0013) +[2023-02-23 00:59:59,895][05631] Fps is (10 sec: 4505.6, 60 sec: 3413.5, 300 sec: 3360.1). Total num frames: 11059200. Throughput: 0: 850.9. Samples: 257352. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 00:59:59,899][05631] Avg episode reward: [(0, '4.786')] +[2023-02-23 01:00:04,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3346.2). Total num frames: 11071488. Throughput: 0: 819.4. Samples: 262264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:00:04,898][05631] Avg episode reward: [(0, '4.617')] +[2023-02-23 01:00:09,897][05631] Fps is (10 sec: 2457.2, 60 sec: 3345.0, 300 sec: 3346.2). Total num frames: 11083776. Throughput: 0: 819.7. Samples: 266286. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:00:09,906][05631] Avg episode reward: [(0, '4.651')] +[2023-02-23 01:00:10,279][45651] Updated weights for policy 0, policy_version 2707 (0.0020) +[2023-02-23 01:00:14,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11104256. Throughput: 0: 837.0. Samples: 268914. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:00:14,900][05631] Avg episode reward: [(0, '4.596')] +[2023-02-23 01:00:19,895][05631] Fps is (10 sec: 4096.6, 60 sec: 3345.4, 300 sec: 3360.1). Total num frames: 11124736. Throughput: 0: 860.4. Samples: 275136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:00:19,903][05631] Avg episode reward: [(0, '4.642')] +[2023-02-23 01:00:19,917][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002716_11124736.pth... +[2023-02-23 01:00:20,080][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002518_10313728.pth +[2023-02-23 01:00:20,708][45651] Updated weights for policy 0, policy_version 2717 (0.0014) +[2023-02-23 01:00:24,898][05631] Fps is (10 sec: 3275.7, 60 sec: 3344.9, 300 sec: 3332.3). Total num frames: 11137024. Throughput: 0: 825.5. Samples: 279874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:00:24,901][05631] Avg episode reward: [(0, '4.615')] +[2023-02-23 01:00:29,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3346.2). Total num frames: 11153408. Throughput: 0: 826.8. Samples: 281866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:00:29,898][05631] Avg episode reward: [(0, '4.647')] +[2023-02-23 01:00:34,604][45651] Updated weights for policy 0, policy_version 2727 (0.0024) +[2023-02-23 01:00:34,895][05631] Fps is (10 sec: 3277.8, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11169792. Throughput: 0: 839.7. Samples: 286334. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:00:34,898][05631] Avg episode reward: [(0, '4.733')] +[2023-02-23 01:00:39,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3345.2, 300 sec: 3360.1). Total num frames: 11190272. Throughput: 0: 854.0. Samples: 292632. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:00:39,898][05631] Avg episode reward: [(0, '4.762')] +[2023-02-23 01:00:44,897][05631] Fps is (10 sec: 3685.8, 60 sec: 3413.2, 300 sec: 3346.2). Total num frames: 11206656. Throughput: 0: 847.3. Samples: 295480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:00:44,903][05631] Avg episode reward: [(0, '4.719')] +[2023-02-23 01:00:45,871][45651] Updated weights for policy 0, policy_version 2737 (0.0032) +[2023-02-23 01:00:49,897][05631] Fps is (10 sec: 2866.5, 60 sec: 3413.2, 300 sec: 3346.2). Total num frames: 11218944. Throughput: 0: 824.6. Samples: 299374. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:00:49,904][05631] Avg episode reward: [(0, '4.637')] +[2023-02-23 01:00:54,896][05631] Fps is (10 sec: 2867.5, 60 sec: 3276.8, 300 sec: 3346.2). Total num frames: 11235328. Throughput: 0: 844.2. Samples: 304274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:00:54,899][05631] Avg episode reward: [(0, '4.677')] +[2023-02-23 01:00:58,096][45651] Updated weights for policy 0, policy_version 2747 (0.0013) +[2023-02-23 01:00:59,895][05631] Fps is (10 sec: 3687.2, 60 sec: 3276.8, 300 sec: 3360.1). Total num frames: 11255808. Throughput: 0: 855.7. Samples: 307420. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:00:59,903][05631] Avg episode reward: [(0, '4.830')] +[2023-02-23 01:01:04,897][05631] Fps is (10 sec: 3685.8, 60 sec: 3344.9, 300 sec: 3346.2). Total num frames: 11272192. Throughput: 0: 843.7. Samples: 313104. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:01:04,900][05631] Avg episode reward: [(0, '4.840')] +[2023-02-23 01:01:09,895][05631] Fps is (10 sec: 2867.1, 60 sec: 3345.1, 300 sec: 3332.3). Total num frames: 11284480. Throughput: 0: 828.4. Samples: 317148. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:09,898][05631] Avg episode reward: [(0, '4.770')] +[2023-02-23 01:01:11,463][45651] Updated weights for policy 0, policy_version 2757 (0.0043) +[2023-02-23 01:01:14,895][05631] Fps is (10 sec: 2867.8, 60 sec: 3276.8, 300 sec: 3346.2). Total num frames: 11300864. Throughput: 0: 826.7. Samples: 319066. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-23 01:01:14,899][05631] Avg episode reward: [(0, '4.703')] +[2023-02-23 01:01:19,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3276.8, 300 sec: 3360.1). Total num frames: 11321344. Throughput: 0: 860.7. Samples: 325066. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:19,900][05631] Avg episode reward: [(0, '4.784')] +[2023-02-23 01:01:21,790][45651] Updated weights for policy 0, policy_version 2767 (0.0017) +[2023-02-23 01:01:24,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.5, 300 sec: 3360.1). Total num frames: 11341824. Throughput: 0: 851.3. Samples: 330942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:01:24,904][05631] Avg episode reward: [(0, '4.672')] +[2023-02-23 01:01:29,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3346.3). Total num frames: 11354112. Throughput: 0: 831.9. Samples: 332914. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:01:29,897][05631] Avg episode reward: [(0, '4.539')] +[2023-02-23 01:01:34,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3346.3). Total num frames: 11370496. Throughput: 0: 833.2. Samples: 336864. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:34,903][05631] Avg episode reward: [(0, '4.577')] +[2023-02-23 01:01:35,597][45651] Updated weights for policy 0, policy_version 2777 (0.0013) +[2023-02-23 01:01:39,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11390976. Throughput: 0: 858.8. Samples: 342918. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:39,902][05631] Avg episode reward: [(0, '4.821')] +[2023-02-23 01:01:44,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3360.1). Total num frames: 11411456. Throughput: 0: 858.8. Samples: 346066. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:44,898][05631] Avg episode reward: [(0, '4.698')] +[2023-02-23 01:01:46,031][45651] Updated weights for policy 0, policy_version 2787 (0.0013) +[2023-02-23 01:01:49,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.5, 300 sec: 3346.2). Total num frames: 11423744. Throughput: 0: 833.9. Samples: 350628. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:01:49,898][05631] Avg episode reward: [(0, '4.570')] +[2023-02-23 01:01:54,895][05631] Fps is (10 sec: 2457.5, 60 sec: 3345.1, 300 sec: 3346.2). Total num frames: 11436032. Throughput: 0: 831.9. Samples: 354584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-23 01:01:54,904][05631] Avg episode reward: [(0, '4.489')] +[2023-02-23 01:01:59,243][45651] Updated weights for policy 0, policy_version 2797 (0.0017) +[2023-02-23 01:01:59,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11456512. Throughput: 0: 853.5. Samples: 357474. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:01:59,902][05631] Avg episode reward: [(0, '4.342')] +[2023-02-23 01:02:04,895][05631] Fps is (10 sec: 4096.1, 60 sec: 3413.5, 300 sec: 3360.1). Total num frames: 11476992. Throughput: 0: 859.9. Samples: 363762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:02:04,898][05631] Avg episode reward: [(0, '4.529')] +[2023-02-23 01:02:09,896][05631] Fps is (10 sec: 3686.2, 60 sec: 3481.6, 300 sec: 3360.1). Total num frames: 11493376. Throughput: 0: 828.3. Samples: 368218. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:02:09,899][05631] Avg episode reward: [(0, '4.388')] +[2023-02-23 01:02:11,359][45651] Updated weights for policy 0, policy_version 2807 (0.0019) +[2023-02-23 01:02:14,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3360.1). Total num frames: 11505664. Throughput: 0: 828.4. Samples: 370190. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:02:14,897][05631] Avg episode reward: [(0, '4.362')] +[2023-02-23 01:02:19,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 11526144. Throughput: 0: 855.6. Samples: 375366. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:02:19,901][05631] Avg episode reward: [(0, '4.558')] +[2023-02-23 01:02:19,917][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002814_11526144.pth... +[2023-02-23 01:02:20,064][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002617_10719232.pth +[2023-02-23 01:02:22,779][45651] Updated weights for policy 0, policy_version 2817 (0.0020) +[2023-02-23 01:02:24,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 11546624. Throughput: 0: 856.6. Samples: 381466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:02:24,902][05631] Avg episode reward: [(0, '4.469')] +[2023-02-23 01:02:29,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3346.2). Total num frames: 11558912. Throughput: 0: 839.2. Samples: 383832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:02:29,901][05631] Avg episode reward: [(0, '4.539')] +[2023-02-23 01:02:34,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3346.2). Total num frames: 11571200. Throughput: 0: 824.6. Samples: 387734. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:02:34,898][05631] Avg episode reward: [(0, '4.643')] +[2023-02-23 01:02:36,713][45651] Updated weights for policy 0, policy_version 2827 (0.0026) +[2023-02-23 01:02:39,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 11591680. Throughput: 0: 847.9. Samples: 392740. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:02:39,898][05631] Avg episode reward: [(0, '4.634')] +[2023-02-23 01:02:44,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 11612160. Throughput: 0: 853.1. Samples: 395862. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:02:44,899][05631] Avg episode reward: [(0, '4.616')] +[2023-02-23 01:02:46,528][45651] Updated weights for policy 0, policy_version 2837 (0.0015) +[2023-02-23 01:02:49,900][05631] Fps is (10 sec: 3684.6, 60 sec: 3413.0, 300 sec: 3360.0). Total num frames: 11628544. Throughput: 0: 840.3. Samples: 401582. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:02:49,903][05631] Avg episode reward: [(0, '4.600')] +[2023-02-23 01:02:54,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3360.2). Total num frames: 11640832. Throughput: 0: 828.6. Samples: 405504. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:02:54,902][05631] Avg episode reward: [(0, '4.534')] +[2023-02-23 01:02:59,895][05631] Fps is (10 sec: 2868.6, 60 sec: 3345.0, 300 sec: 3360.1). Total num frames: 11657216. Throughput: 0: 831.1. Samples: 407592. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:02:59,908][05631] Avg episode reward: [(0, '4.532')] +[2023-02-23 01:03:00,189][45651] Updated weights for policy 0, policy_version 2847 (0.0018) +[2023-02-23 01:03:04,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 11677696. Throughput: 0: 855.7. Samples: 413874. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:03:04,904][05631] Avg episode reward: [(0, '4.745')] +[2023-02-23 01:03:09,896][05631] Fps is (10 sec: 4095.6, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 11698176. Throughput: 0: 839.2. Samples: 419232. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:09,907][05631] Avg episode reward: [(0, '4.788')] +[2023-02-23 01:03:11,512][45651] Updated weights for policy 0, policy_version 2857 (0.0020) +[2023-02-23 01:03:14,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3360.1). Total num frames: 11710464. Throughput: 0: 830.9. Samples: 421224. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:14,901][05631] Avg episode reward: [(0, '4.857')] +[2023-02-23 01:03:19,895][05631] Fps is (10 sec: 2867.6, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 11726848. Throughput: 0: 837.6. Samples: 425426. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:19,898][05631] Avg episode reward: [(0, '4.875')] +[2023-02-23 01:03:23,691][45651] Updated weights for policy 0, policy_version 2867 (0.0019) +[2023-02-23 01:03:24,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 11747328. Throughput: 0: 864.4. Samples: 431640. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:24,898][05631] Avg episode reward: [(0, '4.924')] +[2023-02-23 01:03:29,897][05631] Fps is (10 sec: 3685.8, 60 sec: 3413.2, 300 sec: 3374.0). Total num frames: 11763712. Throughput: 0: 866.8. Samples: 434868. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:29,902][05631] Avg episode reward: [(0, '5.046')] +[2023-02-23 01:03:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3360.1). Total num frames: 11780096. Throughput: 0: 832.3. Samples: 439032. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:03:34,898][05631] Avg episode reward: [(0, '5.050')] +[2023-02-23 01:03:36,605][45651] Updated weights for policy 0, policy_version 2877 (0.0014) +[2023-02-23 01:03:39,895][05631] Fps is (10 sec: 2867.7, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11792384. Throughput: 0: 839.5. Samples: 443282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:03:39,898][05631] Avg episode reward: [(0, '5.342')] +[2023-02-23 01:03:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 11812864. Throughput: 0: 863.5. Samples: 446450. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:03:44,902][05631] Avg episode reward: [(0, '5.616')] +[2023-02-23 01:03:44,908][45637] Saving new best policy, reward=5.616! +[2023-02-23 01:03:47,080][45651] Updated weights for policy 0, policy_version 2887 (0.0024) +[2023-02-23 01:03:49,895][05631] Fps is (10 sec: 4095.9, 60 sec: 3413.6, 300 sec: 3374.0). Total num frames: 11833344. Throughput: 0: 862.2. Samples: 452674. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:03:49,902][05631] Avg episode reward: [(0, '5.501')] +[2023-02-23 01:03:54,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3360.1). Total num frames: 11845632. Throughput: 0: 835.4. Samples: 456826. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:03:54,903][05631] Avg episode reward: [(0, '5.344')] +[2023-02-23 01:03:59,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3360.1). Total num frames: 11862016. Throughput: 0: 836.2. Samples: 458852. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:03:59,897][05631] Avg episode reward: [(0, '5.217')] +[2023-02-23 01:04:00,791][45651] Updated weights for policy 0, policy_version 2897 (0.0033) +[2023-02-23 01:04:04,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3387.9). Total num frames: 11882496. Throughput: 0: 866.1. Samples: 464400. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:04:04,902][05631] Avg episode reward: [(0, '5.303')] +[2023-02-23 01:04:09,900][05631] Fps is (10 sec: 4094.9, 60 sec: 3413.2, 300 sec: 3387.8). Total num frames: 11902976. Throughput: 0: 870.2. Samples: 470800. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:04:09,904][05631] Avg episode reward: [(0, '5.586')] +[2023-02-23 01:04:10,841][45651] Updated weights for policy 0, policy_version 2907 (0.0013) +[2023-02-23 01:04:14,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3360.2). Total num frames: 11915264. Throughput: 0: 844.0. Samples: 472848. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:04:14,901][05631] Avg episode reward: [(0, '5.629')] +[2023-02-23 01:04:14,909][45637] Saving new best policy, reward=5.629! +[2023-02-23 01:04:19,895][05631] Fps is (10 sec: 2458.3, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11927552. Throughput: 0: 838.3. Samples: 476756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:04:19,898][05631] Avg episode reward: [(0, '5.828')] +[2023-02-23 01:04:19,916][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002912_11927552.pth... +[2023-02-23 01:04:20,164][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002716_11124736.pth +[2023-02-23 01:04:20,188][45637] Saving new best policy, reward=5.828! +[2023-02-23 01:04:24,270][45651] Updated weights for policy 0, policy_version 2917 (0.0023) +[2023-02-23 01:04:24,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 11948032. Throughput: 0: 866.8. Samples: 482288. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-23 01:04:24,898][05631] Avg episode reward: [(0, '5.835')] +[2023-02-23 01:04:24,902][45637] Saving new best policy, reward=5.835! +[2023-02-23 01:04:29,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3387.9). Total num frames: 11968512. Throughput: 0: 857.6. Samples: 485044. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:04:29,903][05631] Avg episode reward: [(0, '6.089')] +[2023-02-23 01:04:29,915][45637] Saving new best policy, reward=6.089! +[2023-02-23 01:04:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11980800. Throughput: 0: 820.9. Samples: 489616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:04:34,900][05631] Avg episode reward: [(0, '6.125')] +[2023-02-23 01:04:34,903][45637] Saving new best policy, reward=6.125! +[2023-02-23 01:04:37,466][45651] Updated weights for policy 0, policy_version 2927 (0.0018) +[2023-02-23 01:04:39,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 11993088. Throughput: 0: 814.0. Samples: 493458. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:04:39,898][05631] Avg episode reward: [(0, '5.904')] +[2023-02-23 01:04:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 12013568. Throughput: 0: 824.9. Samples: 495974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:04:44,898][05631] Avg episode reward: [(0, '6.066')] +[2023-02-23 01:04:48,763][45651] Updated weights for policy 0, policy_version 2937 (0.0016) +[2023-02-23 01:04:49,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 12034048. Throughput: 0: 843.7. Samples: 502366. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:04:49,898][05631] Avg episode reward: [(0, '6.302')] +[2023-02-23 01:04:49,910][45637] Saving new best policy, reward=6.302! +[2023-02-23 01:04:54,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3360.1). Total num frames: 12050432. Throughput: 0: 815.5. Samples: 507494. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:04:54,902][05631] Avg episode reward: [(0, '6.597')] +[2023-02-23 01:04:54,906][45637] Saving new best policy, reward=6.597! +[2023-02-23 01:04:59,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 12062720. Throughput: 0: 813.8. Samples: 509468. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:04:59,907][05631] Avg episode reward: [(0, '6.749')] +[2023-02-23 01:04:59,919][45637] Saving new best policy, reward=6.749! +[2023-02-23 01:05:02,360][45651] Updated weights for policy 0, policy_version 2947 (0.0020) +[2023-02-23 01:05:04,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3374.0). Total num frames: 12079104. Throughput: 0: 825.2. Samples: 513892. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:05:04,903][05631] Avg episode reward: [(0, '6.564')] +[2023-02-23 01:05:09,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3277.0, 300 sec: 3374.0). Total num frames: 12099584. Throughput: 0: 844.4. Samples: 520286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:05:09,903][05631] Avg episode reward: [(0, '7.006')] +[2023-02-23 01:05:09,914][45637] Saving new best policy, reward=7.006! +[2023-02-23 01:05:12,011][45651] Updated weights for policy 0, policy_version 2957 (0.0018) +[2023-02-23 01:05:14,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3360.1). Total num frames: 12115968. Throughput: 0: 847.9. Samples: 523200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:05:14,901][05631] Avg episode reward: [(0, '6.761')] +[2023-02-23 01:05:19,896][05631] Fps is (10 sec: 3276.6, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 12132352. Throughput: 0: 834.5. Samples: 527170. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:05:19,902][05631] Avg episode reward: [(0, '6.529')] +[2023-02-23 01:05:24,897][05631] Fps is (10 sec: 3276.3, 60 sec: 3345.0, 300 sec: 3374.0). Total num frames: 12148736. Throughput: 0: 855.2. Samples: 531944. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:05:24,903][05631] Avg episode reward: [(0, '6.062')] +[2023-02-23 01:05:25,769][45651] Updated weights for policy 0, policy_version 2967 (0.0013) +[2023-02-23 01:05:29,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 12169216. Throughput: 0: 872.1. Samples: 535220. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:05:29,898][05631] Avg episode reward: [(0, '5.930')] +[2023-02-23 01:05:34,895][05631] Fps is (10 sec: 3687.0, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 12185600. Throughput: 0: 864.0. Samples: 541244. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:05:34,900][05631] Avg episode reward: [(0, '5.824')] +[2023-02-23 01:05:36,978][45651] Updated weights for policy 0, policy_version 2977 (0.0023) +[2023-02-23 01:05:39,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3481.6, 300 sec: 3374.0). Total num frames: 12201984. Throughput: 0: 838.9. Samples: 545246. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:05:39,901][05631] Avg episode reward: [(0, '5.892')] +[2023-02-23 01:05:44,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 12214272. Throughput: 0: 840.6. Samples: 547294. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:05:44,902][05631] Avg episode reward: [(0, '5.849')] +[2023-02-23 01:05:48,954][45651] Updated weights for policy 0, policy_version 2987 (0.0017) +[2023-02-23 01:05:49,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 12234752. Throughput: 0: 877.6. Samples: 553382. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:05:49,897][05631] Avg episode reward: [(0, '5.986')] +[2023-02-23 01:05:54,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3387.9). Total num frames: 12255232. Throughput: 0: 866.7. Samples: 559288. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:05:54,901][05631] Avg episode reward: [(0, '6.156')] +[2023-02-23 01:05:59,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3387.9). Total num frames: 12271616. Throughput: 0: 845.7. Samples: 561258. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:05:59,898][05631] Avg episode reward: [(0, '6.426')] +[2023-02-23 01:06:01,587][45651] Updated weights for policy 0, policy_version 2997 (0.0019) +[2023-02-23 01:06:04,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3387.9). Total num frames: 12283904. Throughput: 0: 847.8. Samples: 565322. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:04,898][05631] Avg episode reward: [(0, '6.678')] +[2023-02-23 01:06:09,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12304384. Throughput: 0: 874.3. Samples: 571286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:09,898][05631] Avg episode reward: [(0, '6.951')] +[2023-02-23 01:06:12,362][45651] Updated weights for policy 0, policy_version 3007 (0.0020) +[2023-02-23 01:06:14,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 12324864. Throughput: 0: 868.7. Samples: 574310. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:06:14,901][05631] Avg episode reward: [(0, '6.966')] +[2023-02-23 01:06:19,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.4, 300 sec: 3374.0). Total num frames: 12337152. Throughput: 0: 842.2. Samples: 579144. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:06:19,904][05631] Avg episode reward: [(0, '6.444')] +[2023-02-23 01:06:19,915][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003012_12337152.pth... +[2023-02-23 01:06:20,190][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002814_11526144.pth +[2023-02-23 01:06:24,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3374.0). Total num frames: 12349440. Throughput: 0: 840.2. Samples: 583054. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:24,902][05631] Avg episode reward: [(0, '6.206')] +[2023-02-23 01:06:26,156][45651] Updated weights for policy 0, policy_version 3017 (0.0022) +[2023-02-23 01:06:29,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3387.9). Total num frames: 12369920. Throughput: 0: 859.2. Samples: 585956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:29,901][05631] Avg episode reward: [(0, '6.142')] +[2023-02-23 01:06:34,895][05631] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 12394496. Throughput: 0: 865.6. Samples: 592334. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:34,902][05631] Avg episode reward: [(0, '6.648')] +[2023-02-23 01:06:35,942][45651] Updated weights for policy 0, policy_version 3027 (0.0015) +[2023-02-23 01:06:39,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 12406784. Throughput: 0: 839.0. Samples: 597044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:06:39,899][05631] Avg episode reward: [(0, '6.811')] +[2023-02-23 01:06:44,896][05631] Fps is (10 sec: 2457.4, 60 sec: 3413.3, 300 sec: 3374.0). Total num frames: 12419072. Throughput: 0: 838.7. Samples: 599002. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:06:44,907][05631] Avg episode reward: [(0, '6.693')] +[2023-02-23 01:06:49,224][45651] Updated weights for policy 0, policy_version 3037 (0.0047) +[2023-02-23 01:06:49,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12439552. Throughput: 0: 864.9. Samples: 604244. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:06:49,900][05631] Avg episode reward: [(0, '6.716')] +[2023-02-23 01:06:54,895][05631] Fps is (10 sec: 4505.9, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 12464128. Throughput: 0: 874.9. Samples: 610658. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:06:54,902][05631] Avg episode reward: [(0, '7.022')] +[2023-02-23 01:06:54,905][45637] Saving new best policy, reward=7.022! +[2023-02-23 01:06:59,896][05631] Fps is (10 sec: 3686.2, 60 sec: 3413.3, 300 sec: 3387.9). Total num frames: 12476416. Throughput: 0: 862.4. Samples: 613118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:06:59,899][05631] Avg episode reward: [(0, '7.130')] +[2023-02-23 01:06:59,921][45637] Saving new best policy, reward=7.130! +[2023-02-23 01:07:00,611][45651] Updated weights for policy 0, policy_version 3047 (0.0013) +[2023-02-23 01:07:04,897][05631] Fps is (10 sec: 2866.6, 60 sec: 3481.5, 300 sec: 3387.9). Total num frames: 12492800. Throughput: 0: 846.6. Samples: 617244. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:07:04,900][05631] Avg episode reward: [(0, '7.680')] +[2023-02-23 01:07:04,906][45637] Saving new best policy, reward=7.680! +[2023-02-23 01:07:09,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12509184. Throughput: 0: 873.8. Samples: 622376. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:07:09,900][05631] Avg episode reward: [(0, '7.844')] +[2023-02-23 01:07:09,916][45637] Saving new best policy, reward=7.844! +[2023-02-23 01:07:12,387][45651] Updated weights for policy 0, policy_version 3057 (0.0016) +[2023-02-23 01:07:14,895][05631] Fps is (10 sec: 3687.2, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12529664. Throughput: 0: 877.9. Samples: 625460. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:07:14,898][05631] Avg episode reward: [(0, '8.428')] +[2023-02-23 01:07:14,901][45637] Saving new best policy, reward=8.428! +[2023-02-23 01:07:19,900][05631] Fps is (10 sec: 3684.8, 60 sec: 3481.3, 300 sec: 3387.8). Total num frames: 12546048. Throughput: 0: 859.8. Samples: 631028. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:07:19,906][05631] Avg episode reward: [(0, '8.101')] +[2023-02-23 01:07:24,899][05631] Fps is (10 sec: 2866.0, 60 sec: 3481.4, 300 sec: 3387.8). Total num frames: 12558336. Throughput: 0: 845.1. Samples: 635078. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:07:24,902][05631] Avg episode reward: [(0, '8.047')] +[2023-02-23 01:07:25,298][45651] Updated weights for policy 0, policy_version 3067 (0.0019) +[2023-02-23 01:07:29,897][05631] Fps is (10 sec: 3277.5, 60 sec: 3481.5, 300 sec: 3415.6). Total num frames: 12578816. Throughput: 0: 848.9. Samples: 637204. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:07:29,904][05631] Avg episode reward: [(0, '7.663')] +[2023-02-23 01:07:34,895][05631] Fps is (10 sec: 4097.8, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 12599296. Throughput: 0: 875.3. Samples: 643634. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:07:34,901][05631] Avg episode reward: [(0, '7.817')] +[2023-02-23 01:07:35,568][45651] Updated weights for policy 0, policy_version 3077 (0.0013) +[2023-02-23 01:07:39,895][05631] Fps is (10 sec: 3687.2, 60 sec: 3481.6, 300 sec: 3401.8). Total num frames: 12615680. Throughput: 0: 856.8. Samples: 649212. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:07:39,898][05631] Avg episode reward: [(0, '8.678')] +[2023-02-23 01:07:39,906][45637] Saving new best policy, reward=8.678! +[2023-02-23 01:07:44,898][05631] Fps is (10 sec: 2866.5, 60 sec: 3481.5, 300 sec: 3387.9). Total num frames: 12627968. Throughput: 0: 845.1. Samples: 651150. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:07:44,904][05631] Avg episode reward: [(0, '9.092')] +[2023-02-23 01:07:44,907][45637] Saving new best policy, reward=9.092! +[2023-02-23 01:07:49,266][45651] Updated weights for policy 0, policy_version 3087 (0.0031) +[2023-02-23 01:07:49,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12644352. Throughput: 0: 844.3. Samples: 655234. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:07:49,898][05631] Avg episode reward: [(0, '9.950')] +[2023-02-23 01:07:49,906][45637] Saving new best policy, reward=9.950! +[2023-02-23 01:07:54,895][05631] Fps is (10 sec: 3687.4, 60 sec: 3345.1, 300 sec: 3415.7). Total num frames: 12664832. Throughput: 0: 870.3. Samples: 661540. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:07:54,897][05631] Avg episode reward: [(0, '9.684')] +[2023-02-23 01:07:59,184][45651] Updated weights for policy 0, policy_version 3097 (0.0015) +[2023-02-23 01:07:59,900][05631] Fps is (10 sec: 4093.8, 60 sec: 3481.3, 300 sec: 3415.6). Total num frames: 12685312. Throughput: 0: 874.9. Samples: 664834. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:07:59,907][05631] Avg episode reward: [(0, '10.341')] +[2023-02-23 01:07:59,920][45637] Saving new best policy, reward=10.341! +[2023-02-23 01:08:04,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.5, 300 sec: 3387.9). Total num frames: 12697600. Throughput: 0: 847.3. Samples: 669152. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:08:04,901][05631] Avg episode reward: [(0, '10.326')] +[2023-02-23 01:08:09,895][05631] Fps is (10 sec: 2868.7, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12713984. Throughput: 0: 851.5. Samples: 673390. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:08:09,904][05631] Avg episode reward: [(0, '10.086')] +[2023-02-23 01:08:12,637][45651] Updated weights for policy 0, policy_version 3107 (0.0029) +[2023-02-23 01:08:14,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 12734464. Throughput: 0: 873.8. Samples: 676524. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:08:14,897][05631] Avg episode reward: [(0, '10.051')] +[2023-02-23 01:08:19,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3481.9, 300 sec: 3415.6). Total num frames: 12754944. Throughput: 0: 875.3. Samples: 683022. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:08:19,902][05631] Avg episode reward: [(0, '10.714')] +[2023-02-23 01:08:19,919][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003114_12754944.pth... +[2023-02-23 01:08:20,139][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002912_11927552.pth +[2023-02-23 01:08:20,165][45637] Saving new best policy, reward=10.714! +[2023-02-23 01:08:24,021][45651] Updated weights for policy 0, policy_version 3117 (0.0024) +[2023-02-23 01:08:24,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3481.8, 300 sec: 3401.8). Total num frames: 12767232. Throughput: 0: 842.6. Samples: 687128. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:08:24,903][05631] Avg episode reward: [(0, '10.647')] +[2023-02-23 01:08:29,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.2, 300 sec: 3387.9). Total num frames: 12779520. Throughput: 0: 846.3. Samples: 689232. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:08:29,900][05631] Avg episode reward: [(0, '10.786')] +[2023-02-23 01:08:29,925][45637] Saving new best policy, reward=10.786! +[2023-02-23 01:08:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 12800000. Throughput: 0: 873.2. Samples: 694530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:08:34,898][05631] Avg episode reward: [(0, '11.436')] +[2023-02-23 01:08:34,969][45637] Saving new best policy, reward=11.436! +[2023-02-23 01:08:36,076][45651] Updated weights for policy 0, policy_version 3127 (0.0017) +[2023-02-23 01:08:39,895][05631] Fps is (10 sec: 4505.7, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 12824576. Throughput: 0: 874.1. Samples: 700876. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:08:39,897][05631] Avg episode reward: [(0, '11.847')] +[2023-02-23 01:08:39,912][45637] Saving new best policy, reward=11.847! +[2023-02-23 01:08:44,896][05631] Fps is (10 sec: 3686.0, 60 sec: 3481.7, 300 sec: 3401.7). Total num frames: 12836864. Throughput: 0: 844.7. Samples: 702842. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:08:44,902][05631] Avg episode reward: [(0, '13.454')] +[2023-02-23 01:08:44,905][45637] Saving new best policy, reward=13.454! +[2023-02-23 01:08:49,166][45651] Updated weights for policy 0, policy_version 3137 (0.0021) +[2023-02-23 01:08:49,896][05631] Fps is (10 sec: 2457.4, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12849152. Throughput: 0: 836.4. Samples: 706792. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:08:49,899][05631] Avg episode reward: [(0, '13.977')] +[2023-02-23 01:08:49,909][45637] Saving new best policy, reward=13.977! +[2023-02-23 01:08:54,895][05631] Fps is (10 sec: 3277.2, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 12869632. Throughput: 0: 863.2. Samples: 712234. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:08:54,898][05631] Avg episode reward: [(0, '15.807')] +[2023-02-23 01:08:54,903][45637] Saving new best policy, reward=15.807! +[2023-02-23 01:08:59,610][45651] Updated weights for policy 0, policy_version 3147 (0.0014) +[2023-02-23 01:08:59,896][05631] Fps is (10 sec: 4096.1, 60 sec: 3413.6, 300 sec: 3415.6). Total num frames: 12890112. Throughput: 0: 863.8. Samples: 715396. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:08:59,900][05631] Avg episode reward: [(0, '16.315')] +[2023-02-23 01:08:59,916][45637] Saving new best policy, reward=16.315! +[2023-02-23 01:09:04,899][05631] Fps is (10 sec: 3275.7, 60 sec: 3413.1, 300 sec: 3387.9). Total num frames: 12902400. Throughput: 0: 833.6. Samples: 720538. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:09:04,901][05631] Avg episode reward: [(0, '18.288')] +[2023-02-23 01:09:04,997][45637] Saving new best policy, reward=18.288! +[2023-02-23 01:09:09,895][05631] Fps is (10 sec: 2867.4, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12918784. Throughput: 0: 831.2. Samples: 724534. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:09:09,898][05631] Avg episode reward: [(0, '18.817')] +[2023-02-23 01:09:09,907][45637] Saving new best policy, reward=18.817! +[2023-02-23 01:09:13,324][45651] Updated weights for policy 0, policy_version 3157 (0.0029) +[2023-02-23 01:09:14,895][05631] Fps is (10 sec: 3278.0, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 12935168. Throughput: 0: 835.9. Samples: 726846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:09:14,900][05631] Avg episode reward: [(0, '18.814')] +[2023-02-23 01:09:19,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 12955648. Throughput: 0: 861.4. Samples: 733294. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:09:19,898][05631] Avg episode reward: [(0, '18.579')] +[2023-02-23 01:09:23,718][45651] Updated weights for policy 0, policy_version 3167 (0.0015) +[2023-02-23 01:09:24,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 12972032. Throughput: 0: 837.8. Samples: 738576. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:09:24,902][05631] Avg episode reward: [(0, '19.201')] +[2023-02-23 01:09:24,909][45637] Saving new best policy, reward=19.201! +[2023-02-23 01:09:29,896][05631] Fps is (10 sec: 2866.8, 60 sec: 3413.3, 300 sec: 3401.7). Total num frames: 12984320. Throughput: 0: 835.1. Samples: 740420. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:09:29,906][05631] Avg episode reward: [(0, '18.018')] +[2023-02-23 01:09:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 13004800. Throughput: 0: 843.6. Samples: 744752. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:09:34,897][05631] Avg episode reward: [(0, '17.767')] +[2023-02-23 01:09:36,633][45651] Updated weights for policy 0, policy_version 3177 (0.0022) +[2023-02-23 01:09:39,895][05631] Fps is (10 sec: 4096.6, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 13025280. Throughput: 0: 865.9. Samples: 751200. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:09:39,900][05631] Avg episode reward: [(0, '18.498')] +[2023-02-23 01:09:44,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.4, 300 sec: 3415.6). Total num frames: 13041664. Throughput: 0: 863.7. Samples: 754264. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:09:44,898][05631] Avg episode reward: [(0, '19.606')] +[2023-02-23 01:09:44,902][45637] Saving new best policy, reward=19.606! +[2023-02-23 01:09:48,603][45651] Updated weights for policy 0, policy_version 3187 (0.0021) +[2023-02-23 01:09:49,895][05631] Fps is (10 sec: 2867.1, 60 sec: 3413.4, 300 sec: 3401.8). Total num frames: 13053952. Throughput: 0: 839.9. Samples: 758332. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:09:49,902][05631] Avg episode reward: [(0, '19.044')] +[2023-02-23 01:09:54,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 13070336. Throughput: 0: 856.6. Samples: 763082. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:09:54,902][05631] Avg episode reward: [(0, '19.874')] +[2023-02-23 01:09:54,990][45637] Saving new best policy, reward=19.874! +[2023-02-23 01:09:59,866][45651] Updated weights for policy 0, policy_version 3197 (0.0024) +[2023-02-23 01:09:59,895][05631] Fps is (10 sec: 4096.1, 60 sec: 3413.4, 300 sec: 3443.4). Total num frames: 13094912. Throughput: 0: 874.7. Samples: 766206. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:09:59,898][05631] Avg episode reward: [(0, '22.045')] +[2023-02-23 01:09:59,917][45637] Saving new best policy, reward=22.045! +[2023-02-23 01:10:04,897][05631] Fps is (10 sec: 4095.4, 60 sec: 3481.7, 300 sec: 3429.5). Total num frames: 13111296. Throughput: 0: 866.6. Samples: 772292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:04,902][05631] Avg episode reward: [(0, '22.070')] +[2023-02-23 01:10:04,906][45637] Saving new best policy, reward=22.070! +[2023-02-23 01:10:09,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 13123584. Throughput: 0: 837.5. Samples: 776262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:10:09,898][05631] Avg episode reward: [(0, '22.871')] +[2023-02-23 01:10:09,912][45637] Saving new best policy, reward=22.871! +[2023-02-23 01:10:13,613][45651] Updated weights for policy 0, policy_version 3207 (0.0020) +[2023-02-23 01:10:14,895][05631] Fps is (10 sec: 2867.6, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 13139968. Throughput: 0: 839.8. Samples: 778208. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:10:14,902][05631] Avg episode reward: [(0, '23.593')] +[2023-02-23 01:10:14,905][45637] Saving new best policy, reward=23.593! +[2023-02-23 01:10:19,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 13160448. Throughput: 0: 870.4. Samples: 783920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:10:19,897][05631] Avg episode reward: [(0, '24.125')] +[2023-02-23 01:10:19,920][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003213_13160448.pth... +[2023-02-23 01:10:20,090][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003012_12337152.pth +[2023-02-23 01:10:20,102][45637] Saving new best policy, reward=24.125! +[2023-02-23 01:10:23,671][45651] Updated weights for policy 0, policy_version 3217 (0.0025) +[2023-02-23 01:10:24,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 13176832. Throughput: 0: 860.6. Samples: 789926. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:24,908][05631] Avg episode reward: [(0, '24.725')] +[2023-02-23 01:10:24,943][45637] Saving new best policy, reward=24.725! +[2023-02-23 01:10:29,897][05631] Fps is (10 sec: 3276.1, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13193216. Throughput: 0: 835.6. Samples: 791868. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:10:29,904][05631] Avg episode reward: [(0, '24.600')] +[2023-02-23 01:10:34,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3401.8). Total num frames: 13205504. Throughput: 0: 835.9. Samples: 795948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:34,898][05631] Avg episode reward: [(0, '24.329')] +[2023-02-23 01:10:37,271][45651] Updated weights for policy 0, policy_version 3227 (0.0025) +[2023-02-23 01:10:39,895][05631] Fps is (10 sec: 3277.5, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 13225984. Throughput: 0: 864.1. Samples: 801966. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:39,902][05631] Avg episode reward: [(0, '24.278')] +[2023-02-23 01:10:44,895][05631] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 13250560. Throughput: 0: 865.9. Samples: 805172. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:44,903][05631] Avg episode reward: [(0, '23.675')] +[2023-02-23 01:10:47,937][45651] Updated weights for policy 0, policy_version 3237 (0.0057) +[2023-02-23 01:10:49,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13262848. Throughput: 0: 838.4. Samples: 810020. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:49,902][05631] Avg episode reward: [(0, '24.430')] +[2023-02-23 01:10:54,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3401.8). Total num frames: 13275136. Throughput: 0: 839.5. Samples: 814038. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:10:54,905][05631] Avg episode reward: [(0, '24.569')] +[2023-02-23 01:10:59,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 13295616. Throughput: 0: 862.8. Samples: 817032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:10:59,898][05631] Avg episode reward: [(0, '23.907')] +[2023-02-23 01:11:00,034][45651] Updated weights for policy 0, policy_version 3247 (0.0024) +[2023-02-23 01:11:04,895][05631] Fps is (10 sec: 4505.7, 60 sec: 3481.7, 300 sec: 3443.4). Total num frames: 13320192. Throughput: 0: 879.6. Samples: 823504. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-23 01:11:04,897][05631] Avg episode reward: [(0, '24.881')] +[2023-02-23 01:11:04,905][45637] Saving new best policy, reward=24.881! +[2023-02-23 01:11:09,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13332480. Throughput: 0: 847.1. Samples: 828046. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:11:09,898][05631] Avg episode reward: [(0, '25.242')] +[2023-02-23 01:11:09,919][45637] Saving new best policy, reward=25.242! +[2023-02-23 01:11:12,994][45651] Updated weights for policy 0, policy_version 3257 (0.0012) +[2023-02-23 01:11:14,897][05631] Fps is (10 sec: 2457.0, 60 sec: 3413.2, 300 sec: 3415.6). Total num frames: 13344768. Throughput: 0: 846.6. Samples: 829964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:11:14,900][05631] Avg episode reward: [(0, '25.806')] +[2023-02-23 01:11:14,903][45637] Saving new best policy, reward=25.806! +[2023-02-23 01:11:19,898][05631] Fps is (10 sec: 2867.0, 60 sec: 3345.0, 300 sec: 3429.5). Total num frames: 13361152. Throughput: 0: 867.5. Samples: 834984. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:11:19,901][05631] Avg episode reward: [(0, '25.705')] +[2023-02-23 01:11:23,720][45651] Updated weights for policy 0, policy_version 3267 (0.0013) +[2023-02-23 01:11:24,895][05631] Fps is (10 sec: 4096.8, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 13385728. Throughput: 0: 874.7. Samples: 841326. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:11:24,901][05631] Avg episode reward: [(0, '24.753')] +[2023-02-23 01:11:29,895][05631] Fps is (10 sec: 3686.7, 60 sec: 3413.5, 300 sec: 3401.8). Total num frames: 13398016. Throughput: 0: 861.4. Samples: 843934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:11:29,903][05631] Avg episode reward: [(0, '25.665')] +[2023-02-23 01:11:34,896][05631] Fps is (10 sec: 2867.1, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13414400. Throughput: 0: 845.4. Samples: 848062. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:11:34,903][05631] Avg episode reward: [(0, '25.659')] +[2023-02-23 01:11:37,075][45651] Updated weights for policy 0, policy_version 3277 (0.0019) +[2023-02-23 01:11:39,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 13430784. Throughput: 0: 871.9. Samples: 853272. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:11:39,898][05631] Avg episode reward: [(0, '25.147')] +[2023-02-23 01:11:44,895][05631] Fps is (10 sec: 4096.2, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 13455360. Throughput: 0: 877.0. Samples: 856496. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:11:44,898][05631] Avg episode reward: [(0, '25.970')] +[2023-02-23 01:11:44,902][45637] Saving new best policy, reward=25.970! +[2023-02-23 01:11:46,587][45651] Updated weights for policy 0, policy_version 3287 (0.0020) +[2023-02-23 01:11:49,900][05631] Fps is (10 sec: 4093.8, 60 sec: 3481.3, 300 sec: 3415.6). Total num frames: 13471744. Throughput: 0: 856.5. Samples: 862052. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:11:49,903][05631] Avg episode reward: [(0, '26.362')] +[2023-02-23 01:11:49,922][45637] Saving new best policy, reward=26.362! +[2023-02-23 01:11:54,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3415.7). Total num frames: 13484032. Throughput: 0: 843.0. Samples: 865980. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:11:54,901][05631] Avg episode reward: [(0, '25.805')] +[2023-02-23 01:11:59,895][05631] Fps is (10 sec: 2868.7, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 13500416. Throughput: 0: 849.7. Samples: 868198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:11:59,897][05631] Avg episode reward: [(0, '26.384')] +[2023-02-23 01:11:59,909][45637] Saving new best policy, reward=26.384! +[2023-02-23 01:12:00,454][45651] Updated weights for policy 0, policy_version 3297 (0.0037) +[2023-02-23 01:12:04,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 13520896. Throughput: 0: 878.9. Samples: 874534. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:12:04,897][05631] Avg episode reward: [(0, '25.979')] +[2023-02-23 01:12:09,900][05631] Fps is (10 sec: 3684.5, 60 sec: 3413.0, 300 sec: 3415.6). Total num frames: 13537280. Throughput: 0: 858.0. Samples: 879940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:09,903][05631] Avg episode reward: [(0, '25.287')] +[2023-02-23 01:12:11,565][45651] Updated weights for policy 0, policy_version 3307 (0.0013) +[2023-02-23 01:12:14,896][05631] Fps is (10 sec: 3276.4, 60 sec: 3481.7, 300 sec: 3415.7). Total num frames: 13553664. Throughput: 0: 846.1. Samples: 882010. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:14,901][05631] Avg episode reward: [(0, '24.655')] +[2023-02-23 01:12:19,895][05631] Fps is (10 sec: 3278.6, 60 sec: 3481.6, 300 sec: 3429.6). Total num frames: 13570048. Throughput: 0: 850.3. Samples: 886326. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:12:19,904][05631] Avg episode reward: [(0, '24.571')] +[2023-02-23 01:12:19,915][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003313_13570048.pth... +[2023-02-23 01:12:20,102][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003114_12754944.pth +[2023-02-23 01:12:23,583][45651] Updated weights for policy 0, policy_version 3317 (0.0034) +[2023-02-23 01:12:24,895][05631] Fps is (10 sec: 3686.8, 60 sec: 3413.3, 300 sec: 3429.6). Total num frames: 13590528. Throughput: 0: 873.6. Samples: 892586. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:12:24,907][05631] Avg episode reward: [(0, '22.477')] +[2023-02-23 01:12:29,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13606912. Throughput: 0: 873.0. Samples: 895780. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:29,903][05631] Avg episode reward: [(0, '22.109')] +[2023-02-23 01:12:34,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13623296. Throughput: 0: 840.4. Samples: 899864. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:34,901][05631] Avg episode reward: [(0, '21.858')] +[2023-02-23 01:12:36,210][45651] Updated weights for policy 0, policy_version 3327 (0.0015) +[2023-02-23 01:12:39,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3415.7). Total num frames: 13635584. Throughput: 0: 852.7. Samples: 904350. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:12:39,897][05631] Avg episode reward: [(0, '24.157')] +[2023-02-23 01:12:44,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 13660160. Throughput: 0: 875.8. Samples: 907610. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:44,902][05631] Avg episode reward: [(0, '23.659')] +[2023-02-23 01:12:46,831][45651] Updated weights for policy 0, policy_version 3337 (0.0018) +[2023-02-23 01:12:49,895][05631] Fps is (10 sec: 4095.8, 60 sec: 3413.6, 300 sec: 3429.5). Total num frames: 13676544. Throughput: 0: 874.8. Samples: 913900. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:49,903][05631] Avg episode reward: [(0, '24.030')] +[2023-02-23 01:12:54,898][05631] Fps is (10 sec: 2866.3, 60 sec: 3413.1, 300 sec: 3401.8). Total num frames: 13688832. Throughput: 0: 844.8. Samples: 917954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:12:54,901][05631] Avg episode reward: [(0, '24.643')] +[2023-02-23 01:12:59,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 13705216. Throughput: 0: 843.9. Samples: 919984. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:12:59,898][05631] Avg episode reward: [(0, '26.372')] +[2023-02-23 01:13:00,374][45651] Updated weights for policy 0, policy_version 3347 (0.0023) +[2023-02-23 01:13:04,897][05631] Fps is (10 sec: 3686.9, 60 sec: 3413.2, 300 sec: 3429.5). Total num frames: 13725696. Throughput: 0: 875.9. Samples: 925742. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:13:04,900][05631] Avg episode reward: [(0, '27.486')] +[2023-02-23 01:13:04,905][45637] Saving new best policy, reward=27.486! +[2023-02-23 01:13:09,787][45651] Updated weights for policy 0, policy_version 3357 (0.0013) +[2023-02-23 01:13:09,895][05631] Fps is (10 sec: 4505.7, 60 sec: 3550.2, 300 sec: 3443.4). Total num frames: 13750272. Throughput: 0: 879.8. Samples: 932176. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:13:09,901][05631] Avg episode reward: [(0, '27.197')] +[2023-02-23 01:13:14,897][05631] Fps is (10 sec: 3686.2, 60 sec: 3481.5, 300 sec: 3415.6). Total num frames: 13762560. Throughput: 0: 854.6. Samples: 934240. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:13:14,900][05631] Avg episode reward: [(0, '26.746')] +[2023-02-23 01:13:19,898][05631] Fps is (10 sec: 2457.0, 60 sec: 3413.2, 300 sec: 3415.6). Total num frames: 13774848. Throughput: 0: 852.4. Samples: 938222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:13:19,908][05631] Avg episode reward: [(0, '28.616')] +[2023-02-23 01:13:19,923][45637] Saving new best policy, reward=28.616! +[2023-02-23 01:13:23,491][45651] Updated weights for policy 0, policy_version 3367 (0.0018) +[2023-02-23 01:13:24,895][05631] Fps is (10 sec: 3277.6, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 13795328. Throughput: 0: 880.3. Samples: 943964. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:13:24,899][05631] Avg episode reward: [(0, '29.309')] +[2023-02-23 01:13:24,906][45637] Saving new best policy, reward=29.309! +[2023-02-23 01:13:29,895][05631] Fps is (10 sec: 4097.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 13815808. Throughput: 0: 877.4. Samples: 947094. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:13:29,902][05631] Avg episode reward: [(0, '28.912')] +[2023-02-23 01:13:34,721][45651] Updated weights for policy 0, policy_version 3377 (0.0014) +[2023-02-23 01:13:34,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 13832192. Throughput: 0: 851.3. Samples: 952210. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:13:34,898][05631] Avg episode reward: [(0, '28.374')] +[2023-02-23 01:13:39,897][05631] Fps is (10 sec: 2866.6, 60 sec: 3481.5, 300 sec: 3415.6). Total num frames: 13844480. Throughput: 0: 849.4. Samples: 956176. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:13:39,905][05631] Avg episode reward: [(0, '29.335')] +[2023-02-23 01:13:39,920][45637] Saving new best policy, reward=29.335! +[2023-02-23 01:13:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 13864960. Throughput: 0: 859.2. Samples: 958648. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:13:44,898][05631] Avg episode reward: [(0, '29.085')] +[2023-02-23 01:13:46,994][45651] Updated weights for policy 0, policy_version 3387 (0.0019) +[2023-02-23 01:13:49,895][05631] Fps is (10 sec: 4096.9, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 13885440. Throughput: 0: 872.9. Samples: 965022. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:13:49,897][05631] Avg episode reward: [(0, '28.127')] +[2023-02-23 01:13:54,906][05631] Fps is (10 sec: 3273.2, 60 sec: 3481.1, 300 sec: 3415.5). Total num frames: 13897728. Throughput: 0: 842.1. Samples: 970080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:13:54,915][05631] Avg episode reward: [(0, '26.479')] +[2023-02-23 01:13:59,719][45651] Updated weights for policy 0, policy_version 3397 (0.0013) +[2023-02-23 01:13:59,897][05631] Fps is (10 sec: 2866.8, 60 sec: 3481.5, 300 sec: 3429.6). Total num frames: 13914112. Throughput: 0: 840.0. Samples: 972040. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:13:59,906][05631] Avg episode reward: [(0, '25.509')] +[2023-02-23 01:14:04,895][05631] Fps is (10 sec: 3280.4, 60 sec: 3413.4, 300 sec: 3429.5). Total num frames: 13930496. Throughput: 0: 856.0. Samples: 976740. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:14:04,898][05631] Avg episode reward: [(0, '24.994')] +[2023-02-23 01:14:09,895][05631] Fps is (10 sec: 3687.0, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 13950976. Throughput: 0: 871.4. Samples: 983176. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:14:09,898][05631] Avg episode reward: [(0, '23.322')] +[2023-02-23 01:14:10,160][45651] Updated weights for policy 0, policy_version 3407 (0.0020) +[2023-02-23 01:14:14,896][05631] Fps is (10 sec: 3685.9, 60 sec: 3413.4, 300 sec: 3429.5). Total num frames: 13967360. Throughput: 0: 867.4. Samples: 986128. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:14:14,899][05631] Avg episode reward: [(0, '23.329')] +[2023-02-23 01:14:19,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3481.7, 300 sec: 3429.5). Total num frames: 13983744. Throughput: 0: 842.7. Samples: 990132. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:14:19,897][05631] Avg episode reward: [(0, '23.106')] +[2023-02-23 01:14:19,918][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003414_13983744.pth... +[2023-02-23 01:14:20,127][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003213_13160448.pth +[2023-02-23 01:14:23,926][45651] Updated weights for policy 0, policy_version 3417 (0.0016) +[2023-02-23 01:14:24,895][05631] Fps is (10 sec: 3277.1, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14000128. Throughput: 0: 857.5. Samples: 994764. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:14:24,898][05631] Avg episode reward: [(0, '23.673')] +[2023-02-23 01:14:29,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14020608. Throughput: 0: 871.4. Samples: 997862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:14:29,903][05631] Avg episode reward: [(0, '25.290')] +[2023-02-23 01:14:34,251][45651] Updated weights for policy 0, policy_version 3427 (0.0014) +[2023-02-23 01:14:34,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 14036992. Throughput: 0: 858.6. Samples: 1003660. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:14:34,902][05631] Avg episode reward: [(0, '25.524')] +[2023-02-23 01:14:39,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.5, 300 sec: 3415.6). Total num frames: 14049280. Throughput: 0: 835.0. Samples: 1007648. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:14:39,898][05631] Avg episode reward: [(0, '26.169')] +[2023-02-23 01:14:44,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 14065664. Throughput: 0: 834.2. Samples: 1009576. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:14:44,897][05631] Avg episode reward: [(0, '26.559')] +[2023-02-23 01:14:47,454][45651] Updated weights for policy 0, policy_version 3437 (0.0029) +[2023-02-23 01:14:49,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 14086144. Throughput: 0: 863.0. Samples: 1015576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:14:49,898][05631] Avg episode reward: [(0, '26.514')] +[2023-02-23 01:14:54,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3482.2, 300 sec: 3429.5). Total num frames: 14106624. Throughput: 0: 855.2. Samples: 1021658. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:14:54,901][05631] Avg episode reward: [(0, '26.384')] +[2023-02-23 01:14:58,876][45651] Updated weights for policy 0, policy_version 3447 (0.0013) +[2023-02-23 01:14:59,895][05631] Fps is (10 sec: 3276.7, 60 sec: 3413.4, 300 sec: 3415.7). Total num frames: 14118912. Throughput: 0: 834.4. Samples: 1023676. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-23 01:14:59,899][05631] Avg episode reward: [(0, '26.063')] +[2023-02-23 01:15:04,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3415.6). Total num frames: 14131200. Throughput: 0: 835.2. Samples: 1027716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:15:04,909][05631] Avg episode reward: [(0, '26.568')] +[2023-02-23 01:15:09,895][05631] Fps is (10 sec: 3686.5, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14155776. Throughput: 0: 867.8. Samples: 1033816. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:15:09,897][05631] Avg episode reward: [(0, '26.731')] +[2023-02-23 01:15:10,714][45651] Updated weights for policy 0, policy_version 3457 (0.0018) +[2023-02-23 01:15:14,898][05631] Fps is (10 sec: 4504.1, 60 sec: 3481.5, 300 sec: 3443.4). Total num frames: 14176256. Throughput: 0: 870.1. Samples: 1037018. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:15:14,900][05631] Avg episode reward: [(0, '27.516')] +[2023-02-23 01:15:19,896][05631] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 14188544. Throughput: 0: 847.9. Samples: 1041814. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-23 01:15:19,899][05631] Avg episode reward: [(0, '26.904')] +[2023-02-23 01:15:23,590][45651] Updated weights for policy 0, policy_version 3467 (0.0030) +[2023-02-23 01:15:24,895][05631] Fps is (10 sec: 2458.4, 60 sec: 3345.1, 300 sec: 3415.7). Total num frames: 14200832. Throughput: 0: 848.1. Samples: 1045812. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:15:24,902][05631] Avg episode reward: [(0, '26.383')] +[2023-02-23 01:15:29,895][05631] Fps is (10 sec: 3276.9, 60 sec: 3345.1, 300 sec: 3443.4). Total num frames: 14221312. Throughput: 0: 868.0. Samples: 1048636. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:15:29,906][05631] Avg episode reward: [(0, '26.338')] +[2023-02-23 01:15:34,105][45651] Updated weights for policy 0, policy_version 3477 (0.0028) +[2023-02-23 01:15:34,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14241792. Throughput: 0: 875.5. Samples: 1054972. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:15:34,898][05631] Avg episode reward: [(0, '25.812')] +[2023-02-23 01:15:39,898][05631] Fps is (10 sec: 3685.2, 60 sec: 3481.4, 300 sec: 3415.6). Total num frames: 14258176. Throughput: 0: 848.1. Samples: 1059826. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-23 01:15:39,901][05631] Avg episode reward: [(0, '25.314')] +[2023-02-23 01:15:44,896][05631] Fps is (10 sec: 2867.1, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 14270464. Throughput: 0: 846.8. Samples: 1061784. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:15:44,909][05631] Avg episode reward: [(0, '24.355')] +[2023-02-23 01:15:47,579][45651] Updated weights for policy 0, policy_version 3487 (0.0012) +[2023-02-23 01:15:49,895][05631] Fps is (10 sec: 3277.9, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14290944. Throughput: 0: 864.2. Samples: 1066606. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:15:49,902][05631] Avg episode reward: [(0, '23.775')] +[2023-02-23 01:15:54,895][05631] Fps is (10 sec: 4096.1, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14311424. Throughput: 0: 874.9. Samples: 1073186. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:15:54,898][05631] Avg episode reward: [(0, '23.428')] +[2023-02-23 01:15:57,628][45651] Updated weights for policy 0, policy_version 3497 (0.0013) +[2023-02-23 01:15:59,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 14327808. Throughput: 0: 862.8. Samples: 1075840. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:15:59,898][05631] Avg episode reward: [(0, '23.884')] +[2023-02-23 01:16:04,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3415.6). Total num frames: 14340096. Throughput: 0: 845.6. Samples: 1079866. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:04,898][05631] Avg episode reward: [(0, '23.815')] +[2023-02-23 01:16:09,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14360576. Throughput: 0: 870.1. Samples: 1084966. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:09,898][05631] Avg episode reward: [(0, '23.374')] +[2023-02-23 01:16:10,784][45651] Updated weights for policy 0, policy_version 3507 (0.0020) +[2023-02-23 01:16:14,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.5, 300 sec: 3457.3). Total num frames: 14381056. Throughput: 0: 877.8. Samples: 1088138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:14,898][05631] Avg episode reward: [(0, '23.445')] +[2023-02-23 01:16:19,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 14397440. Throughput: 0: 866.1. Samples: 1093946. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:19,900][05631] Avg episode reward: [(0, '24.174')] +[2023-02-23 01:16:19,910][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003515_14397440.pth... +[2023-02-23 01:16:20,136][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003313_13570048.pth +[2023-02-23 01:16:22,403][45651] Updated weights for policy 0, policy_version 3517 (0.0016) +[2023-02-23 01:16:24,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 14409728. Throughput: 0: 842.6. Samples: 1097740. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:24,909][05631] Avg episode reward: [(0, '24.328')] +[2023-02-23 01:16:29,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 14426112. Throughput: 0: 844.3. Samples: 1099776. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:16:29,898][05631] Avg episode reward: [(0, '24.616')] +[2023-02-23 01:16:34,160][45651] Updated weights for policy 0, policy_version 3527 (0.0020) +[2023-02-23 01:16:34,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14446592. Throughput: 0: 875.1. Samples: 1105984. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:16:34,897][05631] Avg episode reward: [(0, '24.564')] +[2023-02-23 01:16:39,896][05631] Fps is (10 sec: 4095.9, 60 sec: 3481.8, 300 sec: 3429.5). Total num frames: 14467072. Throughput: 0: 854.8. Samples: 1111654. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:16:39,901][05631] Avg episode reward: [(0, '24.356')] +[2023-02-23 01:16:44,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3415.7). Total num frames: 14479360. Throughput: 0: 840.8. Samples: 1113676. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-23 01:16:44,899][05631] Avg episode reward: [(0, '25.001')] +[2023-02-23 01:16:47,088][45651] Updated weights for policy 0, policy_version 3537 (0.0015) +[2023-02-23 01:16:49,895][05631] Fps is (10 sec: 2867.3, 60 sec: 3413.3, 300 sec: 3429.5). Total num frames: 14495744. Throughput: 0: 842.0. Samples: 1117758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:16:49,898][05631] Avg episode reward: [(0, '25.580')] +[2023-02-23 01:16:54,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14516224. Throughput: 0: 867.9. Samples: 1124020. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:54,903][05631] Avg episode reward: [(0, '26.539')] +[2023-02-23 01:16:57,383][45651] Updated weights for policy 0, policy_version 3547 (0.0019) +[2023-02-23 01:16:59,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 14536704. Throughput: 0: 870.0. Samples: 1127286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:16:59,901][05631] Avg episode reward: [(0, '26.424')] +[2023-02-23 01:17:04,898][05631] Fps is (10 sec: 3276.0, 60 sec: 3481.4, 300 sec: 3429.6). Total num frames: 14548992. Throughput: 0: 839.5. Samples: 1131724. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:04,901][05631] Avg episode reward: [(0, '27.096')] +[2023-02-23 01:17:09,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3415.7). Total num frames: 14561280. Throughput: 0: 844.6. Samples: 1135746. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:17:09,903][05631] Avg episode reward: [(0, '26.816')] +[2023-02-23 01:17:11,108][45651] Updated weights for policy 0, policy_version 3557 (0.0025) +[2023-02-23 01:17:14,895][05631] Fps is (10 sec: 3277.6, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 14581760. Throughput: 0: 869.7. Samples: 1138914. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:17:14,898][05631] Avg episode reward: [(0, '29.013')] +[2023-02-23 01:17:19,895][05631] Fps is (10 sec: 4505.6, 60 sec: 3481.6, 300 sec: 3443.4). Total num frames: 14606336. Throughput: 0: 875.4. Samples: 1145376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:17:19,902][05631] Avg episode reward: [(0, '28.396')] +[2023-02-23 01:17:21,305][45651] Updated weights for policy 0, policy_version 3567 (0.0016) +[2023-02-23 01:17:24,896][05631] Fps is (10 sec: 3686.0, 60 sec: 3481.5, 300 sec: 3429.5). Total num frames: 14618624. Throughput: 0: 846.1. Samples: 1149730. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:24,905][05631] Avg episode reward: [(0, '28.227')] +[2023-02-23 01:17:29,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3415.6). Total num frames: 14630912. Throughput: 0: 844.6. Samples: 1151682. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:17:29,899][05631] Avg episode reward: [(0, '28.876')] +[2023-02-23 01:17:34,496][45651] Updated weights for policy 0, policy_version 3577 (0.0026) +[2023-02-23 01:17:34,895][05631] Fps is (10 sec: 3277.1, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14651392. Throughput: 0: 868.5. Samples: 1156840. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:34,898][05631] Avg episode reward: [(0, '29.874')] +[2023-02-23 01:17:34,901][45637] Saving new best policy, reward=29.874! +[2023-02-23 01:17:39,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3429.5). Total num frames: 14671872. Throughput: 0: 871.2. Samples: 1163224. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:39,899][05631] Avg episode reward: [(0, '29.521')] +[2023-02-23 01:17:44,895][05631] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 14688256. Throughput: 0: 853.3. Samples: 1165686. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:44,899][05631] Avg episode reward: [(0, '27.351')] +[2023-02-23 01:17:46,015][45651] Updated weights for policy 0, policy_version 3587 (0.0018) +[2023-02-23 01:17:49,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3429.6). Total num frames: 14700544. Throughput: 0: 845.2. Samples: 1169756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:49,902][05631] Avg episode reward: [(0, '27.131')] +[2023-02-23 01:17:54,897][05631] Fps is (10 sec: 2866.8, 60 sec: 3345.0, 300 sec: 3429.5). Total num frames: 14716928. Throughput: 0: 871.5. Samples: 1174964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:17:54,900][05631] Avg episode reward: [(0, '27.021')] +[2023-02-23 01:17:57,834][45651] Updated weights for policy 0, policy_version 3597 (0.0034) +[2023-02-23 01:17:59,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3443.4). Total num frames: 14741504. Throughput: 0: 871.1. Samples: 1178112. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:17:59,899][05631] Avg episode reward: [(0, '26.287')] +[2023-02-23 01:18:04,895][05631] Fps is (10 sec: 3687.0, 60 sec: 3413.5, 300 sec: 3401.8). Total num frames: 14753792. Throughput: 0: 846.6. Samples: 1183472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:04,903][05631] Avg episode reward: [(0, '26.139')] +[2023-02-23 01:18:09,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3415.7). Total num frames: 14770176. Throughput: 0: 839.5. Samples: 1187506. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:09,901][05631] Avg episode reward: [(0, '26.193')] +[2023-02-23 01:18:10,926][45651] Updated weights for policy 0, policy_version 3607 (0.0025) +[2023-02-23 01:18:14,895][05631] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3429.6). Total num frames: 14786560. Throughput: 0: 843.8. Samples: 1189652. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:14,898][05631] Avg episode reward: [(0, '26.528')] +[2023-02-23 01:18:19,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 14807040. Throughput: 0: 870.5. Samples: 1196012. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:18:19,904][05631] Avg episode reward: [(0, '27.337')] +[2023-02-23 01:18:19,919][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003615_14807040.pth... +[2023-02-23 01:18:20,024][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003414_13983744.pth +[2023-02-23 01:18:21,323][45651] Updated weights for policy 0, policy_version 3617 (0.0022) +[2023-02-23 01:18:24,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3413.4, 300 sec: 3415.6). Total num frames: 14823424. Throughput: 0: 844.5. Samples: 1201226. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:18:24,902][05631] Avg episode reward: [(0, '27.728')] +[2023-02-23 01:18:29,897][05631] Fps is (10 sec: 2866.5, 60 sec: 3413.2, 300 sec: 3401.7). Total num frames: 14835712. Throughput: 0: 833.1. Samples: 1203176. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:29,903][05631] Avg episode reward: [(0, '28.032')] +[2023-02-23 01:18:34,895][05631] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3415.7). Total num frames: 14852096. Throughput: 0: 838.1. Samples: 1207472. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:34,898][05631] Avg episode reward: [(0, '27.593')] +[2023-02-23 01:18:35,158][45651] Updated weights for policy 0, policy_version 3627 (0.0017) +[2023-02-23 01:18:39,898][05631] Fps is (10 sec: 4095.7, 60 sec: 3413.2, 300 sec: 3429.5). Total num frames: 14876672. Throughput: 0: 866.5. Samples: 1213956. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:39,902][05631] Avg episode reward: [(0, '27.846')] +[2023-02-23 01:18:44,895][05631] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3415.7). Total num frames: 14893056. Throughput: 0: 865.6. Samples: 1217064. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:44,904][05631] Avg episode reward: [(0, '25.499')] +[2023-02-23 01:18:45,421][45651] Updated weights for policy 0, policy_version 3637 (0.0014) +[2023-02-23 01:18:49,895][05631] Fps is (10 sec: 2868.0, 60 sec: 3413.3, 300 sec: 3415.8). Total num frames: 14905344. Throughput: 0: 838.3. Samples: 1221196. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-23 01:18:49,902][05631] Avg episode reward: [(0, '26.467')] +[2023-02-23 01:18:54,895][05631] Fps is (10 sec: 2867.1, 60 sec: 3413.4, 300 sec: 3415.7). Total num frames: 14921728. Throughput: 0: 843.8. Samples: 1225476. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-23 01:18:54,898][05631] Avg episode reward: [(0, '26.901')] +[2023-02-23 01:18:58,375][45651] Updated weights for policy 0, policy_version 3647 (0.0024) +[2023-02-23 01:18:59,895][05631] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3429.5). Total num frames: 14942208. Throughput: 0: 866.2. Samples: 1228630. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-23 01:18:59,904][05631] Avg episode reward: [(0, '27.805')] +[2023-02-23 01:19:04,895][05631] Fps is (10 sec: 4096.1, 60 sec: 3481.6, 300 sec: 3429.5). Total num frames: 14962688. Throughput: 0: 864.1. Samples: 1234898. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:19:04,898][05631] Avg episode reward: [(0, '28.057')] +[2023-02-23 01:19:09,897][05631] Fps is (10 sec: 3276.3, 60 sec: 3413.2, 300 sec: 3415.6). Total num frames: 14974976. Throughput: 0: 836.0. Samples: 1238846. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-23 01:19:09,905][05631] Avg episode reward: [(0, '28.742')] +[2023-02-23 01:19:10,968][45651] Updated weights for policy 0, policy_version 3657 (0.0013) +[2023-02-23 01:19:14,895][05631] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3401.8). Total num frames: 14987264. Throughput: 0: 836.1. Samples: 1240800. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-23 01:19:14,900][05631] Avg episode reward: [(0, '28.278')] +[2023-02-23 01:19:18,863][05631] Component Batcher_0 stopped! +[2023-02-23 01:19:18,862][45637] Stopping Batcher_0... +[2023-02-23 01:19:18,866][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003664_15007744.pth... +[2023-02-23 01:19:18,868][45637] Loop batcher_evt_loop terminating... +[2023-02-23 01:19:18,921][45651] Weights refcount: 2 0 +[2023-02-23 01:19:18,925][05631] Component InferenceWorker_p0-w0 stopped! +[2023-02-23 01:19:18,924][45651] Stopping InferenceWorker_p0-w0... +[2023-02-23 01:19:18,936][45651] Loop inference_proc0-0_evt_loop terminating... +[2023-02-23 01:19:19,021][45637] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003515_14397440.pth +[2023-02-23 01:19:19,032][45637] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003664_15007744.pth... +[2023-02-23 01:19:19,137][45637] Stopping LearnerWorker_p0... +[2023-02-23 01:19:19,138][45637] Loop learner_proc0_evt_loop terminating... +[2023-02-23 01:19:19,145][05631] Component LearnerWorker_p0 stopped! +[2023-02-23 01:19:19,178][05631] Component RolloutWorker_w0 stopped! +[2023-02-23 01:19:19,191][05631] Component RolloutWorker_w7 stopped! +[2023-02-23 01:19:19,199][45664] Stopping RolloutWorker_w4... +[2023-02-23 01:19:19,199][05631] Component RolloutWorker_w4 stopped! +[2023-02-23 01:19:19,209][45653] Stopping RolloutWorker_w2... +[2023-02-23 01:19:19,189][45652] Stopping RolloutWorker_w0... +[2023-02-23 01:19:19,210][45652] Loop rollout_proc0_evt_loop terminating... +[2023-02-23 01:19:19,210][45664] Loop rollout_proc4_evt_loop terminating... +[2023-02-23 01:19:19,209][05631] Component RolloutWorker_w2 stopped! +[2023-02-23 01:19:19,194][45672] Stopping RolloutWorker_w7... +[2023-02-23 01:19:19,218][45670] Stopping RolloutWorker_w6... +[2023-02-23 01:19:19,213][45653] Loop rollout_proc2_evt_loop terminating... +[2023-02-23 01:19:19,219][45670] Loop rollout_proc6_evt_loop terminating... +[2023-02-23 01:19:19,218][05631] Component RolloutWorker_w6 stopped! +[2023-02-23 01:19:19,226][45662] Stopping RolloutWorker_w3... +[2023-02-23 01:19:19,227][45662] Loop rollout_proc3_evt_loop terminating... +[2023-02-23 01:19:19,226][05631] Component RolloutWorker_w3 stopped! +[2023-02-23 01:19:19,215][45672] Loop rollout_proc7_evt_loop terminating... +[2023-02-23 01:19:19,253][05631] Component RolloutWorker_w1 stopped! +[2023-02-23 01:19:19,255][45660] Stopping RolloutWorker_w1... +[2023-02-23 01:19:19,259][05631] Component RolloutWorker_w5 stopped! +[2023-02-23 01:19:19,261][05631] Waiting for process learner_proc0 to stop... +[2023-02-23 01:19:19,268][45674] Stopping RolloutWorker_w5... +[2023-02-23 01:19:19,271][45660] Loop rollout_proc1_evt_loop terminating... +[2023-02-23 01:19:19,276][45674] Loop rollout_proc5_evt_loop terminating... +[2023-02-23 01:19:21,785][05631] Waiting for process inference_proc0-0 to join... +[2023-02-23 01:19:21,871][05631] Waiting for process rollout_proc0 to join... +[2023-02-23 01:19:21,878][05631] Waiting for process rollout_proc1 to join... +[2023-02-23 01:19:22,078][05631] Waiting for process rollout_proc2 to join... +[2023-02-23 01:19:22,079][05631] Waiting for process rollout_proc3 to join... +[2023-02-23 01:19:22,081][05631] Waiting for process rollout_proc4 to join... +[2023-02-23 01:19:22,082][05631] Waiting for process rollout_proc5 to join... +[2023-02-23 01:19:22,084][05631] Waiting for process rollout_proc6 to join... +[2023-02-23 01:19:22,086][05631] Waiting for process rollout_proc7 to join... +[2023-02-23 01:19:22,087][05631] Batcher 0 profile tree view: +batching: 33.6766, releasing_batches: 0.0367 +[2023-02-23 01:19:22,089][05631] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0033 + wait_policy_total: 706.1641 +update_model: 10.8087 + weight_update: 0.0030 +one_step: 0.0029 + handle_policy_step: 706.2869 + deserialize: 19.9182, stack: 4.0569, obs_to_device_normalize: 153.0930, forward: 346.6872, send_messages: 34.5198 + prepare_outputs: 112.4429 + to_cpu: 69.9142 +[2023-02-23 01:19:22,091][05631] Learner 0 profile tree view: +misc: 0.0073, prepare_batch: 20.8492 +train: 101.1410 + epoch_init: 0.0073, minibatch_init: 0.0148, losses_postprocess: 0.7208, kl_divergence: 0.7410, after_optimizer: 3.5961 + calculate_losses: 34.2226 + losses_init: 0.0099, forward_head: 2.2717, bptt_initial: 22.4144, tail: 1.5801, advantages_returns: 0.4399, losses: 4.0439 + bptt: 3.0091 + bptt_forward_core: 2.8539 + update: 60.9162 + clip: 1.8641 +[2023-02-23 01:19:22,092][05631] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.5163, enqueue_policy_requests: 197.2211, env_step: 1104.3054, overhead: 30.0232, complete_rollouts: 9.9141 +save_policy_outputs: 28.0545 + split_output_tensors: 13.4964 +[2023-02-23 01:19:22,094][05631] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4448, enqueue_policy_requests: 202.0074, env_step: 1109.5958, overhead: 30.4165, complete_rollouts: 9.5866 +save_policy_outputs: 28.6057 + split_output_tensors: 13.7530 +[2023-02-23 01:19:22,096][05631] Loop Runner_EvtLoop terminating... +[2023-02-23 01:19:22,098][05631] Runner profile tree view: +main_loop: 1499.7011 +[2023-02-23 01:19:22,100][05631] Collected {0: 15007744}, FPS: 3323.9 +[2023-02-23 01:19:22,155][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-23 01:19:22,158][05631] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-23 01:19:22,159][05631] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-23 01:19:22,160][05631] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-23 01:19:22,162][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-23 01:19:22,164][05631] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-23 01:19:22,166][05631] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-23 01:19:22,168][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-23 01:19:22,177][05631] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-23 01:19:22,179][05631] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-23 01:19:22,180][05631] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-23 01:19:22,183][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-23 01:19:22,186][05631] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-23 01:19:22,187][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-23 01:19:22,194][05631] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-23 01:19:22,220][05631] RunningMeanStd input shape: (3, 72, 128) +[2023-02-23 01:19:22,223][05631] RunningMeanStd input shape: (1,) +[2023-02-23 01:19:22,237][05631] ConvEncoder: input_channels=3 +[2023-02-23 01:19:22,293][05631] Conv encoder output size: 512 +[2023-02-23 01:19:22,296][05631] Policy head output size: 512 +[2023-02-23 01:19:22,325][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003664_15007744.pth... +[2023-02-23 01:19:22,833][05631] Num frames 100... +[2023-02-23 01:19:22,949][05631] Num frames 200... +[2023-02-23 01:19:23,063][05631] Num frames 300... +[2023-02-23 01:19:23,194][05631] Num frames 400... +[2023-02-23 01:19:23,306][05631] Num frames 500... +[2023-02-23 01:19:23,425][05631] Num frames 600... +[2023-02-23 01:19:23,538][05631] Num frames 700... +[2023-02-23 01:19:23,656][05631] Num frames 800... +[2023-02-23 01:19:23,793][05631] Num frames 900... +[2023-02-23 01:19:23,907][05631] Num frames 1000... +[2023-02-23 01:19:24,029][05631] Num frames 1100... +[2023-02-23 01:19:24,140][05631] Num frames 1200... +[2023-02-23 01:19:24,273][05631] Num frames 1300... +[2023-02-23 01:19:24,445][05631] Num frames 1400... +[2023-02-23 01:19:24,628][05631] Num frames 1500... +[2023-02-23 01:19:24,802][05631] Num frames 1600... +[2023-02-23 01:19:24,967][05631] Num frames 1700... +[2023-02-23 01:19:25,080][05631] Avg episode rewards: #0: 51.329, true rewards: #0: 17.330 +[2023-02-23 01:19:25,086][05631] Avg episode reward: 51.329, avg true_objective: 17.330 +[2023-02-23 01:19:25,208][05631] Num frames 1800... +[2023-02-23 01:19:25,379][05631] Num frames 1900... +[2023-02-23 01:19:25,565][05631] Num frames 2000... +[2023-02-23 01:19:25,730][05631] Num frames 2100... +[2023-02-23 01:19:25,902][05631] Num frames 2200... +[2023-02-23 01:19:26,065][05631] Num frames 2300... +[2023-02-23 01:19:26,244][05631] Num frames 2400... +[2023-02-23 01:19:26,410][05631] Num frames 2500... +[2023-02-23 01:19:26,574][05631] Num frames 2600... +[2023-02-23 01:19:26,755][05631] Num frames 2700... +[2023-02-23 01:19:26,965][05631] Avg episode rewards: #0: 38.445, true rewards: #0: 13.945 +[2023-02-23 01:19:26,967][05631] Avg episode reward: 38.445, avg true_objective: 13.945 +[2023-02-23 01:19:26,988][05631] Num frames 2800... +[2023-02-23 01:19:27,160][05631] Num frames 2900... +[2023-02-23 01:19:27,321][05631] Num frames 3000... +[2023-02-23 01:19:27,481][05631] Num frames 3100... +[2023-02-23 01:19:27,649][05631] Num frames 3200... +[2023-02-23 01:19:27,814][05631] Num frames 3300... +[2023-02-23 01:19:27,980][05631] Num frames 3400... +[2023-02-23 01:19:28,092][05631] Num frames 3500... +[2023-02-23 01:19:28,207][05631] Num frames 3600... +[2023-02-23 01:19:28,291][05631] Avg episode rewards: #0: 31.737, true rewards: #0: 12.070 +[2023-02-23 01:19:28,294][05631] Avg episode reward: 31.737, avg true_objective: 12.070 +[2023-02-23 01:19:28,389][05631] Num frames 3700... +[2023-02-23 01:19:28,502][05631] Num frames 3800... +[2023-02-23 01:19:28,621][05631] Num frames 3900... +[2023-02-23 01:19:28,735][05631] Num frames 4000... +[2023-02-23 01:19:28,858][05631] Num frames 4100... +[2023-02-23 01:19:28,978][05631] Num frames 4200... +[2023-02-23 01:19:29,091][05631] Num frames 4300... +[2023-02-23 01:19:29,211][05631] Num frames 4400... +[2023-02-23 01:19:29,370][05631] Avg episode rewards: #0: 28.712, true rewards: #0: 11.212 +[2023-02-23 01:19:29,371][05631] Avg episode reward: 28.712, avg true_objective: 11.212 +[2023-02-23 01:19:29,396][05631] Num frames 4500... +[2023-02-23 01:19:29,520][05631] Num frames 4600... +[2023-02-23 01:19:29,639][05631] Num frames 4700... +[2023-02-23 01:19:29,763][05631] Num frames 4800... +[2023-02-23 01:19:29,879][05631] Num frames 4900... +[2023-02-23 01:19:30,012][05631] Num frames 5000... +[2023-02-23 01:19:30,138][05631] Num frames 5100... +[2023-02-23 01:19:30,255][05631] Num frames 5200... +[2023-02-23 01:19:30,380][05631] Num frames 5300... +[2023-02-23 01:19:30,496][05631] Num frames 5400... +[2023-02-23 01:19:30,617][05631] Num frames 5500... +[2023-02-23 01:19:30,737][05631] Num frames 5600... +[2023-02-23 01:19:30,856][05631] Num frames 5700... +[2023-02-23 01:19:30,983][05631] Num frames 5800... +[2023-02-23 01:19:31,102][05631] Num frames 5900... +[2023-02-23 01:19:31,220][05631] Num frames 6000... +[2023-02-23 01:19:31,350][05631] Num frames 6100... +[2023-02-23 01:19:31,468][05631] Num frames 6200... +[2023-02-23 01:19:31,585][05631] Num frames 6300... +[2023-02-23 01:19:31,705][05631] Num frames 6400... +[2023-02-23 01:19:31,825][05631] Num frames 6500... +[2023-02-23 01:19:31,985][05631] Avg episode rewards: #0: 33.770, true rewards: #0: 13.170 +[2023-02-23 01:19:31,987][05631] Avg episode reward: 33.770, avg true_objective: 13.170 +[2023-02-23 01:19:32,010][05631] Num frames 6600... +[2023-02-23 01:19:32,137][05631] Num frames 6700... +[2023-02-23 01:19:32,267][05631] Num frames 6800... +[2023-02-23 01:19:32,386][05631] Num frames 6900... +[2023-02-23 01:19:32,506][05631] Num frames 7000... +[2023-02-23 01:19:32,633][05631] Num frames 7100... +[2023-02-23 01:19:32,751][05631] Num frames 7200... +[2023-02-23 01:19:32,871][05631] Num frames 7300... +[2023-02-23 01:19:33,002][05631] Num frames 7400... +[2023-02-23 01:19:33,151][05631] Avg episode rewards: #0: 31.968, true rewards: #0: 12.468 +[2023-02-23 01:19:33,153][05631] Avg episode reward: 31.968, avg true_objective: 12.468 +[2023-02-23 01:19:33,181][05631] Num frames 7500... +[2023-02-23 01:19:33,307][05631] Num frames 7600... +[2023-02-23 01:19:33,434][05631] Num frames 7700... +[2023-02-23 01:19:33,550][05631] Num frames 7800... +[2023-02-23 01:19:33,667][05631] Num frames 7900... +[2023-02-23 01:19:33,783][05631] Num frames 8000... +[2023-02-23 01:19:33,900][05631] Num frames 8100... +[2023-02-23 01:19:34,018][05631] Num frames 8200... +[2023-02-23 01:19:34,132][05631] Num frames 8300... +[2023-02-23 01:19:34,257][05631] Num frames 8400... +[2023-02-23 01:19:34,375][05631] Num frames 8500... +[2023-02-23 01:19:34,491][05631] Num frames 8600... +[2023-02-23 01:19:34,611][05631] Num frames 8700... +[2023-02-23 01:19:34,729][05631] Num frames 8800... +[2023-02-23 01:19:34,844][05631] Num frames 8900... +[2023-02-23 01:19:34,966][05631] Num frames 9000... +[2023-02-23 01:19:35,090][05631] Num frames 9100... +[2023-02-23 01:19:35,209][05631] Num frames 9200... +[2023-02-23 01:19:35,341][05631] Num frames 9300... +[2023-02-23 01:19:35,458][05631] Num frames 9400... +[2023-02-23 01:19:35,578][05631] Num frames 9500... +[2023-02-23 01:19:35,732][05631] Avg episode rewards: #0: 36.115, true rewards: #0: 13.687 +[2023-02-23 01:19:35,735][05631] Avg episode reward: 36.115, avg true_objective: 13.687 +[2023-02-23 01:19:35,768][05631] Num frames 9600... +[2023-02-23 01:19:35,895][05631] Num frames 9700... +[2023-02-23 01:19:36,024][05631] Num frames 9800... +[2023-02-23 01:19:36,141][05631] Num frames 9900... +[2023-02-23 01:19:36,263][05631] Num frames 10000... +[2023-02-23 01:19:36,382][05631] Num frames 10100... +[2023-02-23 01:19:36,506][05631] Num frames 10200... +[2023-02-23 01:19:36,624][05631] Num frames 10300... +[2023-02-23 01:19:36,683][05631] Avg episode rewards: #0: 33.501, true rewards: #0: 12.876 +[2023-02-23 01:19:36,684][05631] Avg episode reward: 33.501, avg true_objective: 12.876 +[2023-02-23 01:19:36,813][05631] Num frames 10400... +[2023-02-23 01:19:36,942][05631] Num frames 10500... +[2023-02-23 01:19:37,067][05631] Num frames 10600... +[2023-02-23 01:19:37,192][05631] Num frames 10700... +[2023-02-23 01:19:37,321][05631] Num frames 10800... +[2023-02-23 01:19:37,438][05631] Num frames 10900... +[2023-02-23 01:19:37,554][05631] Num frames 11000... +[2023-02-23 01:19:37,671][05631] Num frames 11100... +[2023-02-23 01:19:37,784][05631] Num frames 11200... +[2023-02-23 01:19:37,906][05631] Num frames 11300... +[2023-02-23 01:19:38,070][05631] Num frames 11400... +[2023-02-23 01:19:38,239][05631] Num frames 11500... +[2023-02-23 01:19:38,434][05631] Avg episode rewards: #0: 33.201, true rewards: #0: 12.868 +[2023-02-23 01:19:38,440][05631] Avg episode reward: 33.201, avg true_objective: 12.868 +[2023-02-23 01:19:38,478][05631] Num frames 11600... +[2023-02-23 01:19:38,647][05631] Num frames 11700... +[2023-02-23 01:19:38,808][05631] Num frames 11800... +[2023-02-23 01:19:38,967][05631] Num frames 11900... +[2023-02-23 01:19:39,138][05631] Num frames 12000... +[2023-02-23 01:19:39,302][05631] Num frames 12100... +[2023-02-23 01:19:39,462][05631] Num frames 12200... +[2023-02-23 01:19:39,621][05631] Num frames 12300... +[2023-02-23 01:19:39,783][05631] Num frames 12400... +[2023-02-23 01:19:39,946][05631] Num frames 12500... +[2023-02-23 01:19:40,106][05631] Num frames 12600... +[2023-02-23 01:19:40,196][05631] Avg episode rewards: #0: 32.418, true rewards: #0: 12.618 +[2023-02-23 01:19:40,198][05631] Avg episode reward: 32.418, avg true_objective: 12.618 +[2023-02-23 01:21:02,859][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-23 01:21:03,307][05631] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-23 01:21:03,315][05631] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-23 01:21:03,319][05631] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-23 01:21:03,323][05631] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-23 01:21:03,327][05631] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-23 01:21:03,329][05631] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-23 01:21:03,331][05631] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-23 01:21:03,333][05631] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-23 01:21:03,335][05631] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-23 01:21:03,337][05631] Adding new argument 'hf_repository'='pittawat/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-23 01:21:03,339][05631] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-23 01:21:03,341][05631] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-23 01:21:03,343][05631] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-23 01:21:03,345][05631] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-23 01:21:03,347][05631] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-23 01:21:03,376][05631] RunningMeanStd input shape: (3, 72, 128) +[2023-02-23 01:21:03,378][05631] RunningMeanStd input shape: (1,) +[2023-02-23 01:21:03,395][05631] ConvEncoder: input_channels=3 +[2023-02-23 01:21:03,455][05631] Conv encoder output size: 512 +[2023-02-23 01:21:03,458][05631] Policy head output size: 512 +[2023-02-23 01:21:03,484][05631] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003664_15007744.pth... +[2023-02-23 01:21:04,167][05631] Num frames 100... +[2023-02-23 01:21:04,328][05631] Num frames 200... +[2023-02-23 01:21:04,487][05631] Num frames 300... +[2023-02-23 01:21:04,639][05631] Num frames 400... +[2023-02-23 01:21:04,805][05631] Num frames 500... +[2023-02-23 01:21:04,956][05631] Num frames 600... +[2023-02-23 01:21:05,112][05631] Num frames 700... +[2023-02-23 01:21:05,285][05631] Num frames 800... +[2023-02-23 01:21:05,446][05631] Num frames 900... +[2023-02-23 01:21:05,617][05631] Num frames 1000... +[2023-02-23 01:21:05,813][05631] Num frames 1100... +[2023-02-23 01:21:05,982][05631] Num frames 1200... +[2023-02-23 01:21:06,148][05631] Num frames 1300... +[2023-02-23 01:21:06,305][05631] Num frames 1400... +[2023-02-23 01:21:06,407][05631] Avg episode rewards: #0: 34.180, true rewards: #0: 14.180 +[2023-02-23 01:21:06,410][05631] Avg episode reward: 34.180, avg true_objective: 14.180 +[2023-02-23 01:21:06,600][05631] Num frames 1500... +[2023-02-23 01:21:06,782][05631] Num frames 1600... +[2023-02-23 01:21:06,943][05631] Num frames 1700... +[2023-02-23 01:21:07,148][05631] Num frames 1800... +[2023-02-23 01:21:07,338][05631] Num frames 1900... +[2023-02-23 01:21:07,525][05631] Num frames 2000... +[2023-02-23 01:21:07,724][05631] Num frames 2100... +[2023-02-23 01:21:07,920][05631] Num frames 2200... +[2023-02-23 01:21:08,095][05631] Num frames 2300... +[2023-02-23 01:21:08,277][05631] Num frames 2400... +[2023-02-23 01:21:08,456][05631] Num frames 2500... +[2023-02-23 01:21:08,665][05631] Num frames 2600... +[2023-02-23 01:21:08,874][05631] Num frames 2700... +[2023-02-23 01:21:09,066][05631] Num frames 2800... +[2023-02-23 01:21:09,262][05631] Num frames 2900... +[2023-02-23 01:21:09,460][05631] Num frames 3000... +[2023-02-23 01:21:09,671][05631] Num frames 3100... +[2023-02-23 01:21:09,892][05631] Num frames 3200... +[2023-02-23 01:21:10,103][05631] Num frames 3300... +[2023-02-23 01:21:10,309][05631] Num frames 3400... +[2023-02-23 01:21:10,495][05631] Num frames 3500... +[2023-02-23 01:21:10,606][05631] Avg episode rewards: #0: 45.589, true rewards: #0: 17.590 +[2023-02-23 01:21:10,608][05631] Avg episode reward: 45.589, avg true_objective: 17.590 +[2023-02-23 01:21:10,793][05631] Num frames 3600... +[2023-02-23 01:21:11,001][05631] Num frames 3700... +[2023-02-23 01:21:11,217][05631] Num frames 3800... +[2023-02-23 01:21:11,413][05631] Num frames 3900... +[2023-02-23 01:21:11,633][05631] Num frames 4000... +[2023-02-23 01:21:11,820][05631] Num frames 4100... +[2023-02-23 01:21:11,984][05631] Num frames 4200... +[2023-02-23 01:21:12,147][05631] Num frames 4300... +[2023-02-23 01:21:12,303][05631] Num frames 4400... +[2023-02-23 01:21:12,469][05631] Num frames 4500... +[2023-02-23 01:21:12,630][05631] Num frames 4600... +[2023-02-23 01:21:12,799][05631] Num frames 4700... +[2023-02-23 01:21:12,970][05631] Avg episode rewards: #0: 39.553, true rewards: #0: 15.887 +[2023-02-23 01:21:12,973][05631] Avg episode reward: 39.553, avg true_objective: 15.887 +[2023-02-23 01:21:13,029][05631] Num frames 4800... +[2023-02-23 01:21:13,179][05631] Num frames 4900... +[2023-02-23 01:21:13,295][05631] Num frames 5000... +[2023-02-23 01:21:13,414][05631] Num frames 5100... +[2023-02-23 01:21:13,532][05631] Num frames 5200... +[2023-02-23 01:21:13,646][05631] Num frames 5300... +[2023-02-23 01:21:13,757][05631] Num frames 5400... +[2023-02-23 01:21:13,869][05631] Num frames 5500... +[2023-02-23 01:21:13,982][05631] Num frames 5600... +[2023-02-23 01:21:14,093][05631] Num frames 5700... +[2023-02-23 01:21:14,214][05631] Num frames 5800... +[2023-02-23 01:21:14,330][05631] Num frames 5900... +[2023-02-23 01:21:14,448][05631] Num frames 6000... +[2023-02-23 01:21:14,563][05631] Avg episode rewards: #0: 37.615, true rewards: #0: 15.115 +[2023-02-23 01:21:14,565][05631] Avg episode reward: 37.615, avg true_objective: 15.115 +[2023-02-23 01:21:14,631][05631] Num frames 6100... +[2023-02-23 01:21:14,748][05631] Num frames 6200... +[2023-02-23 01:21:14,868][05631] Num frames 6300... +[2023-02-23 01:21:14,989][05631] Num frames 6400... +[2023-02-23 01:21:15,113][05631] Num frames 6500... +[2023-02-23 01:21:15,240][05631] Num frames 6600... +[2023-02-23 01:21:15,362][05631] Num frames 6700... +[2023-02-23 01:21:15,490][05631] Num frames 6800... +[2023-02-23 01:21:15,609][05631] Num frames 6900... +[2023-02-23 01:21:15,725][05631] Num frames 7000... +[2023-02-23 01:21:15,839][05631] Num frames 7100... +[2023-02-23 01:21:15,955][05631] Num frames 7200... +[2023-02-23 01:21:16,077][05631] Avg episode rewards: #0: 35.892, true rewards: #0: 14.492 +[2023-02-23 01:21:16,079][05631] Avg episode reward: 35.892, avg true_objective: 14.492 +[2023-02-23 01:21:16,149][05631] Num frames 7300... +[2023-02-23 01:21:16,280][05631] Num frames 7400... +[2023-02-23 01:21:16,406][05631] Num frames 7500... +[2023-02-23 01:21:16,529][05631] Num frames 7600... +[2023-02-23 01:21:16,645][05631] Num frames 7700... +[2023-02-23 01:21:16,758][05631] Num frames 7800... +[2023-02-23 01:21:16,870][05631] Num frames 7900... +[2023-02-23 01:21:16,989][05631] Num frames 8000... +[2023-02-23 01:21:17,101][05631] Num frames 8100... +[2023-02-23 01:21:17,220][05631] Num frames 8200... +[2023-02-23 01:21:17,335][05631] Num frames 8300... +[2023-02-23 01:21:17,450][05631] Num frames 8400... +[2023-02-23 01:21:17,570][05631] Num frames 8500... +[2023-02-23 01:21:17,684][05631] Num frames 8600... +[2023-02-23 01:21:17,795][05631] Num frames 8700... +[2023-02-23 01:21:17,909][05631] Num frames 8800... +[2023-02-23 01:21:18,023][05631] Num frames 8900... +[2023-02-23 01:21:18,136][05631] Num frames 9000... +[2023-02-23 01:21:18,258][05631] Num frames 9100... +[2023-02-23 01:21:18,373][05631] Num frames 9200... +[2023-02-23 01:21:18,491][05631] Num frames 9300... +[2023-02-23 01:21:18,603][05631] Avg episode rewards: #0: 39.743, true rewards: #0: 15.577 +[2023-02-23 01:21:18,606][05631] Avg episode reward: 39.743, avg true_objective: 15.577 +[2023-02-23 01:21:18,675][05631] Num frames 9400... +[2023-02-23 01:21:18,795][05631] Num frames 9500... +[2023-02-23 01:21:18,919][05631] Num frames 9600... +[2023-02-23 01:21:19,053][05631] Num frames 9700... +[2023-02-23 01:21:19,172][05631] Num frames 9800... +[2023-02-23 01:21:19,295][05631] Num frames 9900... +[2023-02-23 01:21:19,410][05631] Num frames 10000... +[2023-02-23 01:21:19,523][05631] Num frames 10100... +[2023-02-23 01:21:19,645][05631] Num frames 10200... +[2023-02-23 01:21:19,759][05631] Num frames 10300... +[2023-02-23 01:21:19,869][05631] Num frames 10400... +[2023-02-23 01:21:19,982][05631] Num frames 10500... +[2023-02-23 01:21:20,110][05631] Num frames 10600... +[2023-02-23 01:21:20,226][05631] Num frames 10700... +[2023-02-23 01:21:20,345][05631] Num frames 10800... +[2023-02-23 01:21:20,463][05631] Num frames 10900... +[2023-02-23 01:21:20,585][05631] Num frames 11000... +[2023-02-23 01:21:20,696][05631] Num frames 11100... +[2023-02-23 01:21:20,809][05631] Num frames 11200... +[2023-02-23 01:21:20,923][05631] Num frames 11300... +[2023-02-23 01:21:21,038][05631] Num frames 11400... +[2023-02-23 01:21:21,149][05631] Avg episode rewards: #0: 42.065, true rewards: #0: 16.351 +[2023-02-23 01:21:21,151][05631] Avg episode reward: 42.065, avg true_objective: 16.351 +[2023-02-23 01:21:21,217][05631] Num frames 11500... +[2023-02-23 01:21:21,352][05631] Num frames 11600... +[2023-02-23 01:21:21,470][05631] Num frames 11700... +[2023-02-23 01:21:21,583][05631] Num frames 11800... +[2023-02-23 01:21:21,699][05631] Num frames 11900... +[2023-02-23 01:21:21,754][05631] Avg episode rewards: #0: 37.875, true rewards: #0: 14.875 +[2023-02-23 01:21:21,756][05631] Avg episode reward: 37.875, avg true_objective: 14.875 +[2023-02-23 01:21:21,927][05631] Num frames 12000... +[2023-02-23 01:21:22,092][05631] Num frames 12100... +[2023-02-23 01:21:22,256][05631] Num frames 12200... +[2023-02-23 01:21:22,429][05631] Num frames 12300... +[2023-02-23 01:21:22,595][05631] Num frames 12400... +[2023-02-23 01:21:22,753][05631] Num frames 12500... +[2023-02-23 01:21:22,911][05631] Num frames 12600... +[2023-02-23 01:21:23,071][05631] Num frames 12700... +[2023-02-23 01:21:23,251][05631] Num frames 12800... +[2023-02-23 01:21:23,419][05631] Num frames 12900... +[2023-02-23 01:21:23,587][05631] Num frames 13000... +[2023-02-23 01:21:23,810][05631] Avg episode rewards: #0: 36.765, true rewards: #0: 14.543 +[2023-02-23 01:21:23,813][05631] Avg episode reward: 36.765, avg true_objective: 14.543 +[2023-02-23 01:21:23,840][05631] Num frames 13100... +[2023-02-23 01:21:24,017][05631] Num frames 13200... +[2023-02-23 01:21:24,189][05631] Num frames 13300... +[2023-02-23 01:21:24,361][05631] Num frames 13400... +[2023-02-23 01:21:24,539][05631] Num frames 13500... +[2023-02-23 01:21:24,706][05631] Num frames 13600... +[2023-02-23 01:21:24,873][05631] Num frames 13700... +[2023-02-23 01:21:25,045][05631] Num frames 13800... +[2023-02-23 01:21:25,218][05631] Num frames 13900... +[2023-02-23 01:21:25,375][05631] Num frames 14000... +[2023-02-23 01:21:25,497][05631] Num frames 14100... +[2023-02-23 01:21:25,614][05631] Num frames 14200... +[2023-02-23 01:21:25,736][05631] Num frames 14300... +[2023-02-23 01:21:25,853][05631] Num frames 14400... +[2023-02-23 01:21:25,984][05631] Avg episode rewards: #0: 36.465, true rewards: #0: 14.465 +[2023-02-23 01:21:25,985][05631] Avg episode reward: 36.465, avg true_objective: 14.465 +[2023-02-23 01:23:00,177][05631] Replay video saved to /content/train_dir/default_experiment/replay.mp4!