diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -1416,3 +1416,1491 @@ main_loop: 1367.0984 [2023-02-24 10:34:19,965][01623] Avg episode rewards: #0: 21.426, true rewards: #0: 9.426 [2023-02-24 10:34:19,967][01623] Avg episode reward: 21.426, avg true_objective: 9.426 [2023-02-24 10:35:20,550][01623] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-24 10:35:37,448][01623] The model has been pushed to https://huggingface.co/dbaibak/rl_course_vizdoom_health_gathering_supreme +[2023-02-24 10:38:14,091][01623] Environment doom_basic already registered, overwriting... +[2023-02-24 10:38:14,094][01623] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-24 10:38:14,096][01623] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-24 10:38:14,097][01623] Environment doom_dm already registered, overwriting... +[2023-02-24 10:38:14,103][01623] Environment doom_dwango5 already registered, overwriting... +[2023-02-24 10:38:14,106][01623] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-24 10:38:14,107][01623] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-24 10:38:14,109][01623] Environment doom_my_way_home already registered, overwriting... +[2023-02-24 10:38:14,110][01623] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-24 10:38:14,112][01623] Environment doom_defend_the_center already registered, overwriting... +[2023-02-24 10:38:14,114][01623] Environment doom_defend_the_line already registered, overwriting... +[2023-02-24 10:38:14,116][01623] Environment doom_health_gathering already registered, overwriting... +[2023-02-24 10:38:14,117][01623] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-24 10:38:14,119][01623] Environment doom_battle already registered, overwriting... +[2023-02-24 10:38:14,120][01623] Environment doom_battle2 already registered, overwriting... +[2023-02-24 10:38:14,122][01623] Environment doom_duel_bots already registered, overwriting... +[2023-02-24 10:38:14,124][01623] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-24 10:38:14,127][01623] Environment doom_duel already registered, overwriting... +[2023-02-24 10:38:14,129][01623] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-24 10:38:14,131][01623] Environment doom_benchmark already registered, overwriting... +[2023-02-24 10:38:14,133][01623] register_encoder_factory: +[2023-02-24 10:38:14,160][01623] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-24 10:38:14,162][01623] Overriding arg 'train_for_env_steps' with value 10000000 passed from command line +[2023-02-24 10:38:14,169][01623] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-24 10:38:14,170][01623] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-24 10:38:14,172][01623] Weights and Biases integration disabled +[2023-02-24 10:38:14,176][01623] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-24 10:38:15,667][01623] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=10000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4500000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4500000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-24 10:38:15,671][01623] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-24 10:38:15,677][01623] Rollout worker 0 uses device cpu +[2023-02-24 10:38:15,678][01623] Rollout worker 1 uses device cpu +[2023-02-24 10:38:15,682][01623] Rollout worker 2 uses device cpu +[2023-02-24 10:38:15,684][01623] Rollout worker 3 uses device cpu +[2023-02-24 10:38:15,689][01623] Rollout worker 4 uses device cpu +[2023-02-24 10:38:15,690][01623] Rollout worker 5 uses device cpu +[2023-02-24 10:38:15,692][01623] Rollout worker 6 uses device cpu +[2023-02-24 10:38:15,694][01623] Rollout worker 7 uses device cpu +[2023-02-24 10:38:15,809][01623] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 10:38:15,812][01623] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-24 10:38:15,842][01623] Starting all processes... +[2023-02-24 10:38:15,843][01623] Starting process learner_proc0 +[2023-02-24 10:38:15,988][01623] Starting all processes... +[2023-02-24 10:38:15,996][01623] Starting process inference_proc0-0 +[2023-02-24 10:38:15,996][01623] Starting process rollout_proc0 +[2023-02-24 10:38:15,998][01623] Starting process rollout_proc1 +[2023-02-24 10:38:15,998][01623] Starting process rollout_proc2 +[2023-02-24 10:38:16,076][01623] Starting process rollout_proc3 +[2023-02-24 10:38:16,083][01623] Starting process rollout_proc4 +[2023-02-24 10:38:16,083][01623] Starting process rollout_proc5 +[2023-02-24 10:38:16,083][01623] Starting process rollout_proc6 +[2023-02-24 10:38:16,083][01623] Starting process rollout_proc7 +[2023-02-24 10:38:25,215][28910] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 10:38:25,219][28910] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-24 10:38:25,245][28910] Num visible devices: 1 +[2023-02-24 10:38:25,269][28910] Starting seed is not provided +[2023-02-24 10:38:25,270][28910] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 10:38:25,271][28910] Initializing actor-critic model on device cuda:0 +[2023-02-24 10:38:25,272][28910] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 10:38:25,273][28910] RunningMeanStd input shape: (1,) +[2023-02-24 10:38:25,329][28910] ConvEncoder: input_channels=3 +[2023-02-24 10:38:26,185][28910] Conv encoder output size: 512 +[2023-02-24 10:38:26,191][28910] Policy head output size: 512 +[2023-02-24 10:38:26,264][28910] Created Actor Critic model with architecture: +[2023-02-24 10:38:26,283][28910] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-24 10:38:27,193][28926] Worker 0 uses CPU cores [0] +[2023-02-24 10:38:27,416][28925] Worker 1 uses CPU cores [1] +[2023-02-24 10:38:27,475][28924] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 10:38:27,477][28924] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-24 10:38:27,538][28924] Num visible devices: 1 +[2023-02-24 10:38:27,857][28927] Worker 3 uses CPU cores [1] +[2023-02-24 10:38:28,100][28937] Worker 2 uses CPU cores [0] +[2023-02-24 10:38:28,226][28931] Worker 4 uses CPU cores [0] +[2023-02-24 10:38:28,248][28939] Worker 5 uses CPU cores [1] +[2023-02-24 10:38:28,369][28943] Worker 6 uses CPU cores [0] +[2023-02-24 10:38:28,460][28941] Worker 7 uses CPU cores [1] +[2023-02-24 10:38:30,388][28910] Using optimizer +[2023-02-24 10:38:30,389][28910] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001100_4505600.pth... +[2023-02-24 10:38:30,422][28910] Loading model from checkpoint +[2023-02-24 10:38:30,426][28910] Loaded experiment state at self.train_step=1100, self.env_steps=4505600 +[2023-02-24 10:38:30,427][28910] Initialized policy 0 weights for model version 1100 +[2023-02-24 10:38:30,430][28910] LearnerWorker_p0 finished initialization! +[2023-02-24 10:38:30,432][28910] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-24 10:38:30,573][28924] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 10:38:30,574][28924] RunningMeanStd input shape: (1,) +[2023-02-24 10:38:30,586][28924] ConvEncoder: input_channels=3 +[2023-02-24 10:38:30,684][28924] Conv encoder output size: 512 +[2023-02-24 10:38:30,684][28924] Policy head output size: 512 +[2023-02-24 10:38:32,989][01623] Inference worker 0-0 is ready! +[2023-02-24 10:38:32,992][01623] All inference workers are ready! Signal rollout workers to start! +[2023-02-24 10:38:33,115][28926] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,118][28931] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,122][28937] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,144][28925] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,144][28927] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,154][28941] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,160][28939] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,163][28943] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-24 10:38:33,657][28941] Decorrelating experience for 0 frames... +[2023-02-24 10:38:34,177][01623] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4505600. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 10:38:34,387][28939] Decorrelating experience for 0 frames... +[2023-02-24 10:38:34,389][28941] Decorrelating experience for 32 frames... +[2023-02-24 10:38:34,655][28926] Decorrelating experience for 0 frames... +[2023-02-24 10:38:34,666][28931] Decorrelating experience for 0 frames... +[2023-02-24 10:38:34,682][28943] Decorrelating experience for 0 frames... +[2023-02-24 10:38:34,680][28937] Decorrelating experience for 0 frames... +[2023-02-24 10:38:35,313][28939] Decorrelating experience for 32 frames... +[2023-02-24 10:38:35,805][01623] Heartbeat connected on Batcher_0 +[2023-02-24 10:38:35,808][01623] Heartbeat connected on LearnerWorker_p0 +[2023-02-24 10:38:35,836][01623] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-24 10:38:35,946][28926] Decorrelating experience for 32 frames... +[2023-02-24 10:38:35,986][28937] Decorrelating experience for 32 frames... +[2023-02-24 10:38:36,160][28941] Decorrelating experience for 64 frames... +[2023-02-24 10:38:36,179][28927] Decorrelating experience for 0 frames... +[2023-02-24 10:38:36,951][28943] Decorrelating experience for 32 frames... +[2023-02-24 10:38:37,596][28939] Decorrelating experience for 64 frames... +[2023-02-24 10:38:37,836][28925] Decorrelating experience for 0 frames... +[2023-02-24 10:38:37,867][28927] Decorrelating experience for 32 frames... +[2023-02-24 10:38:37,995][28926] Decorrelating experience for 64 frames... +[2023-02-24 10:38:38,027][28937] Decorrelating experience for 64 frames... +[2023-02-24 10:38:38,803][28943] Decorrelating experience for 64 frames... +[2023-02-24 10:38:39,029][28939] Decorrelating experience for 96 frames... +[2023-02-24 10:38:39,176][01623] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4505600. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 10:38:39,197][28931] Decorrelating experience for 32 frames... +[2023-02-24 10:38:39,384][01623] Heartbeat connected on RolloutWorker_w5 +[2023-02-24 10:38:39,690][28925] Decorrelating experience for 32 frames... +[2023-02-24 10:38:39,993][28937] Decorrelating experience for 96 frames... +[2023-02-24 10:38:40,616][01623] Heartbeat connected on RolloutWorker_w2 +[2023-02-24 10:38:41,112][28927] Decorrelating experience for 64 frames... +[2023-02-24 10:38:41,762][28925] Decorrelating experience for 64 frames... +[2023-02-24 10:38:42,447][28943] Decorrelating experience for 96 frames... +[2023-02-24 10:38:42,521][28927] Decorrelating experience for 96 frames... +[2023-02-24 10:38:42,758][01623] Heartbeat connected on RolloutWorker_w3 +[2023-02-24 10:38:43,095][01623] Heartbeat connected on RolloutWorker_w6 +[2023-02-24 10:38:43,092][28926] Decorrelating experience for 96 frames... +[2023-02-24 10:38:43,979][01623] Heartbeat connected on RolloutWorker_w0 +[2023-02-24 10:38:44,177][01623] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4505600. Throughput: 0: 6.4. Samples: 64. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 10:38:44,185][01623] Avg episode reward: [(0, '3.288')] +[2023-02-24 10:38:45,174][28931] Decorrelating experience for 64 frames... +[2023-02-24 10:38:45,212][28941] Decorrelating experience for 96 frames... +[2023-02-24 10:38:45,609][01623] Heartbeat connected on RolloutWorker_w7 +[2023-02-24 10:38:46,374][28910] Signal inference workers to stop experience collection... +[2023-02-24 10:38:46,388][28924] InferenceWorker_p0-w0: stopping experience collection +[2023-02-24 10:38:47,033][28931] Decorrelating experience for 96 frames... +[2023-02-24 10:38:47,111][01623] Heartbeat connected on RolloutWorker_w4 +[2023-02-24 10:38:47,127][28925] Decorrelating experience for 96 frames... +[2023-02-24 10:38:47,206][01623] Heartbeat connected on RolloutWorker_w1 +[2023-02-24 10:38:49,180][01623] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4505600. Throughput: 0: 160.4. Samples: 2406. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-24 10:38:49,182][01623] Avg episode reward: [(0, '4.869')] +[2023-02-24 10:38:49,426][28910] Signal inference workers to resume experience collection... +[2023-02-24 10:38:49,427][28924] InferenceWorker_p0-w0: resuming experience collection +[2023-02-24 10:38:54,177][01623] Fps is (10 sec: 2048.0, 60 sec: 1024.0, 300 sec: 1024.0). Total num frames: 4526080. Throughput: 0: 322.3. Samples: 6446. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-24 10:38:54,182][01623] Avg episode reward: [(0, '9.415')] +[2023-02-24 10:38:59,177][01623] Fps is (10 sec: 3687.2, 60 sec: 1474.6, 300 sec: 1474.6). Total num frames: 4542464. Throughput: 0: 338.3. Samples: 8458. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:38:59,196][01623] Avg episode reward: [(0, '11.976')] +[2023-02-24 10:39:00,348][28924] Updated weights for policy 0, policy_version 1110 (0.0018) +[2023-02-24 10:39:04,177][01623] Fps is (10 sec: 2867.2, 60 sec: 1638.4, 300 sec: 1638.4). Total num frames: 4554752. Throughput: 0: 421.9. Samples: 12656. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:39:04,179][01623] Avg episode reward: [(0, '14.624')] +[2023-02-24 10:39:09,177][01623] Fps is (10 sec: 3276.8, 60 sec: 1989.5, 300 sec: 1989.5). Total num frames: 4575232. Throughput: 0: 520.2. Samples: 18208. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:39:09,184][01623] Avg episode reward: [(0, '16.763')] +[2023-02-24 10:39:11,415][28924] Updated weights for policy 0, policy_version 1120 (0.0012) +[2023-02-24 10:39:14,177][01623] Fps is (10 sec: 4096.0, 60 sec: 2252.8, 300 sec: 2252.8). Total num frames: 4595712. Throughput: 0: 540.0. Samples: 21602. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:39:14,182][01623] Avg episode reward: [(0, '19.612')] +[2023-02-24 10:39:19,177][01623] Fps is (10 sec: 4096.0, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 4616192. Throughput: 0: 611.3. Samples: 27508. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:39:19,179][01623] Avg episode reward: [(0, '22.054')] +[2023-02-24 10:39:23,184][28924] Updated weights for policy 0, policy_version 1130 (0.0015) +[2023-02-24 10:39:24,177][01623] Fps is (10 sec: 3276.8, 60 sec: 2457.6, 300 sec: 2457.6). Total num frames: 4628480. Throughput: 0: 705.5. Samples: 31748. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:39:24,183][01623] Avg episode reward: [(0, '23.631')] +[2023-02-24 10:39:29,177][01623] Fps is (10 sec: 3276.8, 60 sec: 2606.5, 300 sec: 2606.5). Total num frames: 4648960. Throughput: 0: 753.2. Samples: 33958. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:39:29,180][01623] Avg episode reward: [(0, '26.722')] +[2023-02-24 10:39:33,745][28924] Updated weights for policy 0, policy_version 1140 (0.0017) +[2023-02-24 10:39:34,177][01623] Fps is (10 sec: 4096.0, 60 sec: 2730.7, 300 sec: 2730.7). Total num frames: 4669440. Throughput: 0: 851.8. Samples: 40736. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:39:34,179][01623] Avg episode reward: [(0, '28.759')] +[2023-02-24 10:39:39,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3072.0, 300 sec: 2835.7). Total num frames: 4689920. Throughput: 0: 889.9. Samples: 46490. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:39:39,179][01623] Avg episode reward: [(0, '29.656')] +[2023-02-24 10:39:44,177][01623] Fps is (10 sec: 3276.7, 60 sec: 3276.8, 300 sec: 2808.7). Total num frames: 4702208. Throughput: 0: 891.9. Samples: 48592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:39:44,180][01623] Avg episode reward: [(0, '30.191')] +[2023-02-24 10:39:44,194][28910] Saving new best policy, reward=30.191! +[2023-02-24 10:39:46,576][28924] Updated weights for policy 0, policy_version 1150 (0.0028) +[2023-02-24 10:39:49,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3550.0, 300 sec: 2839.9). Total num frames: 4718592. Throughput: 0: 897.6. Samples: 53048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:39:49,179][01623] Avg episode reward: [(0, '29.059')] +[2023-02-24 10:39:54,177][01623] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 2969.6). Total num frames: 4743168. Throughput: 0: 926.8. Samples: 59916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:39:54,180][01623] Avg episode reward: [(0, '27.072')] +[2023-02-24 10:39:55,782][28924] Updated weights for policy 0, policy_version 1160 (0.0022) +[2023-02-24 10:39:59,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 2987.7). Total num frames: 4759552. Throughput: 0: 927.5. Samples: 63340. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:39:59,184][01623] Avg episode reward: [(0, '26.260')] +[2023-02-24 10:40:04,179][01623] Fps is (10 sec: 2866.4, 60 sec: 3618.0, 300 sec: 2958.1). Total num frames: 4771840. Throughput: 0: 882.5. Samples: 67222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:40:04,185][01623] Avg episode reward: [(0, '26.167')] +[2023-02-24 10:40:09,177][01623] Fps is (10 sec: 2457.4, 60 sec: 3481.6, 300 sec: 2931.9). Total num frames: 4784128. Throughput: 0: 862.3. Samples: 70550. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:40:09,182][01623] Avg episode reward: [(0, '25.514')] +[2023-02-24 10:40:11,694][28924] Updated weights for policy 0, policy_version 1170 (0.0031) +[2023-02-24 10:40:14,177][01623] Fps is (10 sec: 2458.3, 60 sec: 3345.1, 300 sec: 2908.2). Total num frames: 4796416. Throughput: 0: 852.3. Samples: 72310. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:40:14,185][01623] Avg episode reward: [(0, '26.188')] +[2023-02-24 10:40:14,193][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001171_4796416.pth... +[2023-02-24 10:40:14,438][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001063_4354048.pth +[2023-02-24 10:40:19,177][01623] Fps is (10 sec: 3277.0, 60 sec: 3345.1, 300 sec: 2964.7). Total num frames: 4816896. Throughput: 0: 826.8. Samples: 77940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:40:19,179][01623] Avg episode reward: [(0, '24.339')] +[2023-02-24 10:40:22,058][28924] Updated weights for policy 0, policy_version 1180 (0.0033) +[2023-02-24 10:40:24,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3016.1). Total num frames: 4837376. Throughput: 0: 834.5. Samples: 84044. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:40:24,179][01623] Avg episode reward: [(0, '24.590')] +[2023-02-24 10:40:29,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3027.5). Total num frames: 4853760. Throughput: 0: 835.3. Samples: 86182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:40:29,183][01623] Avg episode reward: [(0, '24.623')] +[2023-02-24 10:40:34,177][01623] Fps is (10 sec: 3276.9, 60 sec: 3345.1, 300 sec: 3037.9). Total num frames: 4870144. Throughput: 0: 832.8. Samples: 90522. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:40:34,179][01623] Avg episode reward: [(0, '24.255')] +[2023-02-24 10:40:35,253][28924] Updated weights for policy 0, policy_version 1190 (0.0017) +[2023-02-24 10:40:39,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3080.2). Total num frames: 4890624. Throughput: 0: 824.1. Samples: 97002. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:40:39,179][01623] Avg episode reward: [(0, '24.742')] +[2023-02-24 10:40:44,177][01623] Fps is (10 sec: 4095.9, 60 sec: 3481.6, 300 sec: 3119.3). Total num frames: 4911104. Throughput: 0: 823.2. Samples: 100384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:40:44,183][01623] Avg episode reward: [(0, '23.775')] +[2023-02-24 10:40:45,026][28924] Updated weights for policy 0, policy_version 1200 (0.0012) +[2023-02-24 10:40:49,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3094.8). Total num frames: 4923392. Throughput: 0: 841.3. Samples: 105078. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:40:49,184][01623] Avg episode reward: [(0, '24.153')] +[2023-02-24 10:40:54,177][01623] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 3101.3). Total num frames: 4939776. Throughput: 0: 863.6. Samples: 109412. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:40:54,179][01623] Avg episode reward: [(0, '24.563')] +[2023-02-24 10:40:57,263][28924] Updated weights for policy 0, policy_version 1210 (0.0017) +[2023-02-24 10:40:59,180][01623] Fps is (10 sec: 4094.5, 60 sec: 3413.1, 300 sec: 3163.7). Total num frames: 4964352. Throughput: 0: 900.9. Samples: 112854. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:40:59,184][01623] Avg episode reward: [(0, '25.321')] +[2023-02-24 10:41:04,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3550.0, 300 sec: 3194.9). Total num frames: 4984832. Throughput: 0: 926.0. Samples: 119608. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:41:04,185][01623] Avg episode reward: [(0, '25.737')] +[2023-02-24 10:41:08,081][28924] Updated weights for policy 0, policy_version 1220 (0.0020) +[2023-02-24 10:41:09,177][01623] Fps is (10 sec: 3278.0, 60 sec: 3549.9, 300 sec: 3171.1). Total num frames: 4997120. Throughput: 0: 888.8. Samples: 124042. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:41:09,182][01623] Avg episode reward: [(0, '24.884')] +[2023-02-24 10:41:14,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3174.4). Total num frames: 5013504. Throughput: 0: 886.6. Samples: 126080. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:41:14,180][01623] Avg episode reward: [(0, '25.072')] +[2023-02-24 10:41:19,178][01623] Fps is (10 sec: 3686.0, 60 sec: 3618.1, 300 sec: 3202.3). Total num frames: 5033984. Throughput: 0: 915.9. Samples: 131740. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:41:19,184][01623] Avg episode reward: [(0, '25.720')] +[2023-02-24 10:41:19,744][28924] Updated weights for policy 0, policy_version 1230 (0.0014) +[2023-02-24 10:41:24,177][01623] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3228.6). Total num frames: 5054464. Throughput: 0: 923.4. Samples: 138554. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:41:24,179][01623] Avg episode reward: [(0, '25.557')] +[2023-02-24 10:41:29,177][01623] Fps is (10 sec: 3686.8, 60 sec: 3618.1, 300 sec: 3230.0). Total num frames: 5070848. Throughput: 0: 899.7. Samples: 140872. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:41:29,182][01623] Avg episode reward: [(0, '23.273')] +[2023-02-24 10:41:31,515][28924] Updated weights for policy 0, policy_version 1240 (0.0022) +[2023-02-24 10:41:34,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3549.9, 300 sec: 3208.5). Total num frames: 5083136. Throughput: 0: 889.3. Samples: 145096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:41:34,183][01623] Avg episode reward: [(0, '23.467')] +[2023-02-24 10:41:39,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3232.5). Total num frames: 5103616. Throughput: 0: 919.0. Samples: 150766. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:41:39,183][01623] Avg episode reward: [(0, '24.702')] +[2023-02-24 10:41:42,080][28924] Updated weights for policy 0, policy_version 1250 (0.0018) +[2023-02-24 10:41:44,177][01623] Fps is (10 sec: 4505.5, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 5128192. Throughput: 0: 915.6. Samples: 154054. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:41:44,179][01623] Avg episode reward: [(0, '24.497')] +[2023-02-24 10:41:49,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 5144576. Throughput: 0: 889.6. Samples: 159642. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:41:49,180][01623] Avg episode reward: [(0, '23.015')] +[2023-02-24 10:41:54,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3256.3). Total num frames: 5156864. Throughput: 0: 885.1. Samples: 163870. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:41:54,184][01623] Avg episode reward: [(0, '23.580')] +[2023-02-24 10:41:54,765][28924] Updated weights for policy 0, policy_version 1260 (0.0017) +[2023-02-24 10:41:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3550.1, 300 sec: 3276.8). Total num frames: 5177344. Throughput: 0: 896.3. Samples: 166414. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:41:59,184][01623] Avg episode reward: [(0, '24.457')] +[2023-02-24 10:42:04,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3296.3). Total num frames: 5197824. Throughput: 0: 918.2. Samples: 173056. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:42:04,180][01623] Avg episode reward: [(0, '23.990')] +[2023-02-24 10:42:04,435][28924] Updated weights for policy 0, policy_version 1270 (0.0014) +[2023-02-24 10:42:09,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3295.9). Total num frames: 5214208. Throughput: 0: 888.8. Samples: 178550. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:42:09,180][01623] Avg episode reward: [(0, '24.410')] +[2023-02-24 10:42:14,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3295.4). Total num frames: 5230592. Throughput: 0: 884.8. Samples: 180688. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:42:14,186][01623] Avg episode reward: [(0, '23.390')] +[2023-02-24 10:42:14,198][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001277_5230592.pth... +[2023-02-24 10:42:14,402][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001100_4505600.pth +[2023-02-24 10:42:17,384][28924] Updated weights for policy 0, policy_version 1280 (0.0013) +[2023-02-24 10:42:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3313.2). Total num frames: 5251072. Throughput: 0: 898.5. Samples: 185530. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:42:19,178][01623] Avg episode reward: [(0, '26.329')] +[2023-02-24 10:42:24,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3330.2). Total num frames: 5271552. Throughput: 0: 922.8. Samples: 192294. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:42:24,180][01623] Avg episode reward: [(0, '25.696')] +[2023-02-24 10:42:26,508][28924] Updated weights for policy 0, policy_version 1290 (0.0014) +[2023-02-24 10:42:29,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3329.1). Total num frames: 5287936. Throughput: 0: 920.7. Samples: 195486. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:42:29,183][01623] Avg episode reward: [(0, '26.962')] +[2023-02-24 10:42:34,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3328.0). Total num frames: 5304320. Throughput: 0: 891.2. Samples: 199748. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:42:34,180][01623] Avg episode reward: [(0, '26.428')] +[2023-02-24 10:42:39,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3327.0). Total num frames: 5320704. Throughput: 0: 912.1. Samples: 204916. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:42:39,181][01623] Avg episode reward: [(0, '26.146')] +[2023-02-24 10:42:39,451][28924] Updated weights for policy 0, policy_version 1300 (0.0033) +[2023-02-24 10:42:44,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3358.7). Total num frames: 5345280. Throughput: 0: 929.8. Samples: 208256. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:42:44,178][01623] Avg episode reward: [(0, '26.688')] +[2023-02-24 10:42:49,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3357.1). Total num frames: 5361664. Throughput: 0: 920.4. Samples: 214476. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:42:49,183][01623] Avg episode reward: [(0, '24.312')] +[2023-02-24 10:42:49,510][28924] Updated weights for policy 0, policy_version 1310 (0.0014) +[2023-02-24 10:42:54,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3355.6). Total num frames: 5378048. Throughput: 0: 894.5. Samples: 218802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:42:54,190][01623] Avg episode reward: [(0, '24.828')] +[2023-02-24 10:42:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3354.1). Total num frames: 5394432. Throughput: 0: 896.2. Samples: 221016. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:42:59,179][01623] Avg episode reward: [(0, '24.184')] +[2023-02-24 10:43:01,657][28924] Updated weights for policy 0, policy_version 1320 (0.0018) +[2023-02-24 10:43:04,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3367.8). Total num frames: 5414912. Throughput: 0: 931.4. Samples: 227442. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:43:04,179][01623] Avg episode reward: [(0, '23.402')] +[2023-02-24 10:43:09,178][01623] Fps is (10 sec: 4095.2, 60 sec: 3686.3, 300 sec: 3381.0). Total num frames: 5435392. Throughput: 0: 922.0. Samples: 233786. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:43:09,181][01623] Avg episode reward: [(0, '23.133')] +[2023-02-24 10:43:12,186][28924] Updated weights for policy 0, policy_version 1330 (0.0012) +[2023-02-24 10:43:14,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3379.2). Total num frames: 5451776. Throughput: 0: 898.3. Samples: 235908. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:43:14,182][01623] Avg episode reward: [(0, '24.456')] +[2023-02-24 10:43:19,176][01623] Fps is (10 sec: 3277.4, 60 sec: 3618.1, 300 sec: 3377.4). Total num frames: 5468160. Throughput: 0: 900.1. Samples: 240254. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:43:19,181][01623] Avg episode reward: [(0, '25.422')] +[2023-02-24 10:43:23,594][28924] Updated weights for policy 0, policy_version 1340 (0.0014) +[2023-02-24 10:43:24,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3389.8). Total num frames: 5488640. Throughput: 0: 930.1. Samples: 246770. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:43:24,185][01623] Avg episode reward: [(0, '25.943')] +[2023-02-24 10:43:29,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3401.8). Total num frames: 5509120. Throughput: 0: 931.7. Samples: 250182. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:43:29,184][01623] Avg episode reward: [(0, '24.858')] +[2023-02-24 10:43:34,177][01623] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3457.3). Total num frames: 5525504. Throughput: 0: 901.9. Samples: 255062. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:43:34,179][01623] Avg episode reward: [(0, '25.832')] +[2023-02-24 10:43:35,158][28924] Updated weights for policy 0, policy_version 1350 (0.0023) +[2023-02-24 10:43:39,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3499.0). Total num frames: 5537792. Throughput: 0: 894.3. Samples: 259044. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:43:39,180][01623] Avg episode reward: [(0, '25.780')] +[2023-02-24 10:43:44,177][01623] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3540.6). Total num frames: 5550080. Throughput: 0: 886.8. Samples: 260924. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:43:44,184][01623] Avg episode reward: [(0, '25.613')] +[2023-02-24 10:43:49,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3526.7). Total num frames: 5566464. Throughput: 0: 841.9. Samples: 265326. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:43:49,179][01623] Avg episode reward: [(0, '25.162')] +[2023-02-24 10:43:49,645][28924] Updated weights for policy 0, policy_version 1360 (0.0020) +[2023-02-24 10:43:54,180][01623] Fps is (10 sec: 3275.6, 60 sec: 3413.1, 300 sec: 3526.7). Total num frames: 5582848. Throughput: 0: 805.4. Samples: 270032. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:43:54,183][01623] Avg episode reward: [(0, '25.255')] +[2023-02-24 10:43:59,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 5595136. Throughput: 0: 804.8. Samples: 272126. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:43:59,183][01623] Avg episode reward: [(0, '26.377')] +[2023-02-24 10:44:02,621][28924] Updated weights for policy 0, policy_version 1370 (0.0012) +[2023-02-24 10:44:04,177][01623] Fps is (10 sec: 3278.0, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 5615616. Throughput: 0: 827.4. Samples: 277486. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:44:04,182][01623] Avg episode reward: [(0, '26.656')] +[2023-02-24 10:44:09,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3413.4, 300 sec: 3540.6). Total num frames: 5640192. Throughput: 0: 831.4. Samples: 284182. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:44:09,184][01623] Avg episode reward: [(0, '26.001')] +[2023-02-24 10:44:12,728][28924] Updated weights for policy 0, policy_version 1380 (0.0014) +[2023-02-24 10:44:14,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3512.8). Total num frames: 5652480. Throughput: 0: 813.8. Samples: 286802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:44:14,181][01623] Avg episode reward: [(0, '26.197')] +[2023-02-24 10:44:14,282][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001381_5656576.pth... +[2023-02-24 10:44:14,608][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001171_4796416.pth +[2023-02-24 10:44:19,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 5668864. Throughput: 0: 797.2. Samples: 290936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:44:19,179][01623] Avg episode reward: [(0, '25.554')] +[2023-02-24 10:44:24,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 5689344. Throughput: 0: 831.6. Samples: 296468. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:44:24,182][01623] Avg episode reward: [(0, '26.177')] +[2023-02-24 10:44:25,077][28924] Updated weights for policy 0, policy_version 1390 (0.0021) +[2023-02-24 10:44:29,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 5709824. Throughput: 0: 859.7. Samples: 299610. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:44:29,183][01623] Avg episode reward: [(0, '24.695')] +[2023-02-24 10:44:34,179][01623] Fps is (10 sec: 3685.5, 60 sec: 3344.9, 300 sec: 3512.8). Total num frames: 5726208. Throughput: 0: 883.9. Samples: 305106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:44:34,186][01623] Avg episode reward: [(0, '27.673')] +[2023-02-24 10:44:36,633][28924] Updated weights for policy 0, policy_version 1400 (0.0013) +[2023-02-24 10:44:39,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3512.8). Total num frames: 5738496. Throughput: 0: 870.9. Samples: 309218. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 10:44:39,183][01623] Avg episode reward: [(0, '27.093')] +[2023-02-24 10:44:44,177][01623] Fps is (10 sec: 3277.6, 60 sec: 3481.6, 300 sec: 3526.7). Total num frames: 5758976. Throughput: 0: 874.5. Samples: 311480. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:44:44,179][01623] Avg episode reward: [(0, '27.463')] +[2023-02-24 10:44:47,976][28924] Updated weights for policy 0, policy_version 1410 (0.0022) +[2023-02-24 10:44:49,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3512.8). Total num frames: 5779456. Throughput: 0: 903.9. Samples: 318160. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:44:49,184][01623] Avg episode reward: [(0, '29.117')] +[2023-02-24 10:44:54,177][01623] Fps is (10 sec: 3686.5, 60 sec: 3550.1, 300 sec: 3512.8). Total num frames: 5795840. Throughput: 0: 883.1. Samples: 323922. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:44:54,179][01623] Avg episode reward: [(0, '29.151')] +[2023-02-24 10:44:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3526.8). Total num frames: 5812224. Throughput: 0: 873.1. Samples: 326090. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:44:59,179][01623] Avg episode reward: [(0, '29.213')] +[2023-02-24 10:45:00,342][28924] Updated weights for policy 0, policy_version 1420 (0.0020) +[2023-02-24 10:45:04,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 5828608. Throughput: 0: 884.0. Samples: 330714. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:45:04,184][01623] Avg episode reward: [(0, '29.695')] +[2023-02-24 10:45:09,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 5853184. Throughput: 0: 913.1. Samples: 337556. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:45:09,183][01623] Avg episode reward: [(0, '27.375')] +[2023-02-24 10:45:09,828][28924] Updated weights for policy 0, policy_version 1430 (0.0020) +[2023-02-24 10:45:14,177][01623] Fps is (10 sec: 4505.4, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 5873664. Throughput: 0: 917.7. Samples: 340908. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:45:14,181][01623] Avg episode reward: [(0, '27.900')] +[2023-02-24 10:45:19,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 5885952. Throughput: 0: 892.4. Samples: 345262. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:45:19,186][01623] Avg episode reward: [(0, '26.080')] +[2023-02-24 10:45:22,745][28924] Updated weights for policy 0, policy_version 1440 (0.0032) +[2023-02-24 10:45:24,177][01623] Fps is (10 sec: 2867.4, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 5902336. Throughput: 0: 910.2. Samples: 350176. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:45:24,179][01623] Avg episode reward: [(0, '26.142')] +[2023-02-24 10:45:29,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 5926912. Throughput: 0: 935.0. Samples: 353556. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:45:29,180][01623] Avg episode reward: [(0, '26.819')] +[2023-02-24 10:45:32,016][28924] Updated weights for policy 0, policy_version 1450 (0.0013) +[2023-02-24 10:45:34,178][01623] Fps is (10 sec: 4095.5, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 5943296. Throughput: 0: 926.9. Samples: 359872. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:45:34,181][01623] Avg episode reward: [(0, '27.108')] +[2023-02-24 10:45:39,181][01623] Fps is (10 sec: 3275.5, 60 sec: 3686.2, 300 sec: 3554.4). Total num frames: 5959680. Throughput: 0: 890.9. Samples: 364014. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:45:39,189][01623] Avg episode reward: [(0, '26.686')] +[2023-02-24 10:45:44,177][01623] Fps is (10 sec: 2867.6, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 5971968. Throughput: 0: 890.0. Samples: 366140. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:45:44,185][01623] Avg episode reward: [(0, '27.805')] +[2023-02-24 10:45:45,288][28924] Updated weights for policy 0, policy_version 1460 (0.0020) +[2023-02-24 10:45:49,177][01623] Fps is (10 sec: 3687.9, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 5996544. Throughput: 0: 922.3. Samples: 372216. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:45:49,180][01623] Avg episode reward: [(0, '27.385')] +[2023-02-24 10:45:54,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 6017024. Throughput: 0: 910.5. Samples: 378528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:45:54,182][01623] Avg episode reward: [(0, '26.535')] +[2023-02-24 10:45:55,362][28924] Updated weights for policy 0, policy_version 1470 (0.0012) +[2023-02-24 10:45:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6029312. Throughput: 0: 883.5. Samples: 380666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:45:59,181][01623] Avg episode reward: [(0, '26.269')] +[2023-02-24 10:46:04,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 6045696. Throughput: 0: 882.4. Samples: 384972. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:46:04,185][01623] Avg episode reward: [(0, '25.310')] +[2023-02-24 10:46:07,264][28924] Updated weights for policy 0, policy_version 1480 (0.0034) +[2023-02-24 10:46:09,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 6070272. Throughput: 0: 921.5. Samples: 391642. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:46:09,183][01623] Avg episode reward: [(0, '27.435')] +[2023-02-24 10:46:14,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3582.3). Total num frames: 6090752. Throughput: 0: 919.8. Samples: 394948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:46:14,179][01623] Avg episode reward: [(0, '26.578')] +[2023-02-24 10:46:14,191][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001487_6090752.pth... +[2023-02-24 10:46:14,364][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001277_5230592.pth +[2023-02-24 10:46:18,678][28924] Updated weights for policy 0, policy_version 1490 (0.0012) +[2023-02-24 10:46:19,178][01623] Fps is (10 sec: 3276.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 6103040. Throughput: 0: 880.8. Samples: 399508. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:46:19,182][01623] Avg episode reward: [(0, '26.928')] +[2023-02-24 10:46:24,177][01623] Fps is (10 sec: 2457.6, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6115328. Throughput: 0: 879.6. Samples: 403594. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:46:24,180][01623] Avg episode reward: [(0, '28.441')] +[2023-02-24 10:46:29,177][01623] Fps is (10 sec: 3686.8, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 6139904. Throughput: 0: 904.7. Samples: 406850. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:46:29,180][01623] Avg episode reward: [(0, '28.376')] +[2023-02-24 10:46:29,991][28924] Updated weights for policy 0, policy_version 1500 (0.0020) +[2023-02-24 10:46:34,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3582.3). Total num frames: 6160384. Throughput: 0: 917.1. Samples: 413486. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:46:34,182][01623] Avg episode reward: [(0, '27.725')] +[2023-02-24 10:46:39,177][01623] Fps is (10 sec: 3276.6, 60 sec: 3550.1, 300 sec: 3540.6). Total num frames: 6172672. Throughput: 0: 879.8. Samples: 418118. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:46:39,181][01623] Avg episode reward: [(0, '27.315')] +[2023-02-24 10:46:42,306][28924] Updated weights for policy 0, policy_version 1510 (0.0014) +[2023-02-24 10:46:44,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6189056. Throughput: 0: 877.7. Samples: 420162. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:46:44,180][01623] Avg episode reward: [(0, '26.192')] +[2023-02-24 10:46:49,177][01623] Fps is (10 sec: 3686.6, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 6209536. Throughput: 0: 903.6. Samples: 425632. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:46:49,185][01623] Avg episode reward: [(0, '25.696')] +[2023-02-24 10:46:52,462][28924] Updated weights for policy 0, policy_version 1520 (0.0022) +[2023-02-24 10:46:54,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 6230016. Throughput: 0: 905.1. Samples: 432370. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:46:54,185][01623] Avg episode reward: [(0, '24.908')] +[2023-02-24 10:46:59,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 6246400. Throughput: 0: 884.0. Samples: 434730. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:46:59,183][01623] Avg episode reward: [(0, '25.616')] +[2023-02-24 10:47:04,178][01623] Fps is (10 sec: 2866.7, 60 sec: 3549.8, 300 sec: 3540.6). Total num frames: 6258688. Throughput: 0: 876.4. Samples: 438948. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:47:04,181][01623] Avg episode reward: [(0, '25.023')] +[2023-02-24 10:47:05,561][28924] Updated weights for policy 0, policy_version 1530 (0.0013) +[2023-02-24 10:47:09,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 6279168. Throughput: 0: 912.7. Samples: 444666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:09,179][01623] Avg episode reward: [(0, '24.310')] +[2023-02-24 10:47:14,177][01623] Fps is (10 sec: 4506.4, 60 sec: 3549.9, 300 sec: 3568.4). Total num frames: 6303744. Throughput: 0: 911.5. Samples: 447868. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:14,183][01623] Avg episode reward: [(0, '24.429')] +[2023-02-24 10:47:15,228][28924] Updated weights for policy 0, policy_version 1540 (0.0023) +[2023-02-24 10:47:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6316032. Throughput: 0: 871.5. Samples: 452704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:19,179][01623] Avg episode reward: [(0, '25.264')] +[2023-02-24 10:47:24,177][01623] Fps is (10 sec: 2048.0, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 6324224. Throughput: 0: 845.0. Samples: 456142. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:24,182][01623] Avg episode reward: [(0, '24.670')] +[2023-02-24 10:47:29,177][01623] Fps is (10 sec: 2048.0, 60 sec: 3276.8, 300 sec: 3499.0). Total num frames: 6336512. Throughput: 0: 835.4. Samples: 457756. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:29,183][01623] Avg episode reward: [(0, '24.629')] +[2023-02-24 10:47:32,089][28924] Updated weights for policy 0, policy_version 1550 (0.0028) +[2023-02-24 10:47:34,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3512.8). Total num frames: 6356992. Throughput: 0: 815.8. Samples: 462344. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:47:34,179][01623] Avg episode reward: [(0, '24.879')] +[2023-02-24 10:47:39,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3499.0). Total num frames: 6377472. Throughput: 0: 812.5. Samples: 468934. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:47:39,179][01623] Avg episode reward: [(0, '25.972')] +[2023-02-24 10:47:41,659][28924] Updated weights for policy 0, policy_version 1560 (0.0029) +[2023-02-24 10:47:44,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3413.3, 300 sec: 3499.0). Total num frames: 6393856. Throughput: 0: 825.8. Samples: 471890. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:44,181][01623] Avg episode reward: [(0, '26.030')] +[2023-02-24 10:47:49,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3499.0). Total num frames: 6410240. Throughput: 0: 824.1. Samples: 476032. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:47:49,183][01623] Avg episode reward: [(0, '25.609')] +[2023-02-24 10:47:54,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3499.0). Total num frames: 6426624. Throughput: 0: 809.9. Samples: 481110. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:47:54,179][01623] Avg episode reward: [(0, '25.638')] +[2023-02-24 10:47:54,543][28924] Updated weights for policy 0, policy_version 1570 (0.0018) +[2023-02-24 10:47:59,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3345.1, 300 sec: 3499.0). Total num frames: 6447104. Throughput: 0: 813.2. Samples: 484462. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:47:59,183][01623] Avg episode reward: [(0, '26.261')] +[2023-02-24 10:48:04,177][01623] Fps is (10 sec: 4095.8, 60 sec: 3481.7, 300 sec: 3499.0). Total num frames: 6467584. Throughput: 0: 843.5. Samples: 490664. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:48:04,185][01623] Avg episode reward: [(0, '25.736')] +[2023-02-24 10:48:05,151][28924] Updated weights for policy 0, policy_version 1580 (0.0018) +[2023-02-24 10:48:09,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3485.1). Total num frames: 6479872. Throughput: 0: 860.8. Samples: 494880. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:48:09,180][01623] Avg episode reward: [(0, '25.759')] +[2023-02-24 10:48:14,177][01623] Fps is (10 sec: 2867.3, 60 sec: 3208.5, 300 sec: 3485.1). Total num frames: 6496256. Throughput: 0: 869.1. Samples: 496866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:48:14,185][01623] Avg episode reward: [(0, '24.768')] +[2023-02-24 10:48:14,199][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001586_6496256.pth... +[2023-02-24 10:48:14,382][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001381_5656576.pth +[2023-02-24 10:48:16,969][28924] Updated weights for policy 0, policy_version 1590 (0.0015) +[2023-02-24 10:48:19,179][01623] Fps is (10 sec: 4094.9, 60 sec: 3413.2, 300 sec: 3498.9). Total num frames: 6520832. Throughput: 0: 909.3. Samples: 503266. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:19,187][01623] Avg episode reward: [(0, '25.565')] +[2023-02-24 10:48:24,180][01623] Fps is (10 sec: 4094.9, 60 sec: 3549.7, 300 sec: 3485.0). Total num frames: 6537216. Throughput: 0: 896.1. Samples: 509260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:48:24,183][01623] Avg episode reward: [(0, '26.238')] +[2023-02-24 10:48:28,920][28924] Updated weights for policy 0, policy_version 1600 (0.0026) +[2023-02-24 10:48:29,177][01623] Fps is (10 sec: 3277.7, 60 sec: 3618.1, 300 sec: 3485.1). Total num frames: 6553600. Throughput: 0: 875.4. Samples: 511282. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:29,183][01623] Avg episode reward: [(0, '25.663')] +[2023-02-24 10:48:34,177][01623] Fps is (10 sec: 3277.7, 60 sec: 3549.9, 300 sec: 3499.0). Total num frames: 6569984. Throughput: 0: 876.6. Samples: 515480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:34,180][01623] Avg episode reward: [(0, '24.993')] +[2023-02-24 10:48:39,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3526.7). Total num frames: 6590464. Throughput: 0: 909.1. Samples: 522018. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:39,183][01623] Avg episode reward: [(0, '26.526')] +[2023-02-24 10:48:39,771][28924] Updated weights for policy 0, policy_version 1610 (0.0026) +[2023-02-24 10:48:44,177][01623] Fps is (10 sec: 4095.7, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6610944. Throughput: 0: 908.1. Samples: 525328. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:44,181][01623] Avg episode reward: [(0, '26.041')] +[2023-02-24 10:48:49,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3526.8). Total num frames: 6623232. Throughput: 0: 872.1. Samples: 529906. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:48:49,183][01623] Avg episode reward: [(0, '26.145')] +[2023-02-24 10:48:52,578][28924] Updated weights for policy 0, policy_version 1620 (0.0012) +[2023-02-24 10:48:54,177][01623] Fps is (10 sec: 2867.4, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6639616. Throughput: 0: 875.6. Samples: 534280. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:54,184][01623] Avg episode reward: [(0, '25.430')] +[2023-02-24 10:48:59,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6660096. Throughput: 0: 905.8. Samples: 537626. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:48:59,179][01623] Avg episode reward: [(0, '25.385')] +[2023-02-24 10:49:01,961][28924] Updated weights for policy 0, policy_version 1630 (0.0021) +[2023-02-24 10:49:04,179][01623] Fps is (10 sec: 4504.6, 60 sec: 3618.0, 300 sec: 3540.6). Total num frames: 6684672. Throughput: 0: 914.5. Samples: 544416. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 10:49:04,185][01623] Avg episode reward: [(0, '24.762')] +[2023-02-24 10:49:09,183][01623] Fps is (10 sec: 3684.1, 60 sec: 3617.8, 300 sec: 3540.5). Total num frames: 6696960. Throughput: 0: 881.9. Samples: 548950. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:49:09,186][01623] Avg episode reward: [(0, '22.924')] +[2023-02-24 10:49:14,177][01623] Fps is (10 sec: 2867.9, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6713344. Throughput: 0: 882.9. Samples: 551014. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:49:14,184][01623] Avg episode reward: [(0, '22.828')] +[2023-02-24 10:49:15,034][28924] Updated weights for policy 0, policy_version 1640 (0.0032) +[2023-02-24 10:49:19,177][01623] Fps is (10 sec: 3688.7, 60 sec: 3550.0, 300 sec: 3540.6). Total num frames: 6733824. Throughput: 0: 920.9. Samples: 556920. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:49:19,180][01623] Avg episode reward: [(0, '23.898')] +[2023-02-24 10:49:24,178][01623] Fps is (10 sec: 4095.2, 60 sec: 3618.2, 300 sec: 3540.6). Total num frames: 6754304. Throughput: 0: 923.4. Samples: 563574. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:49:24,187][01623] Avg episode reward: [(0, '24.169')] +[2023-02-24 10:49:24,451][28924] Updated weights for policy 0, policy_version 1650 (0.0012) +[2023-02-24 10:49:29,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6770688. Throughput: 0: 896.5. Samples: 565672. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:49:29,185][01623] Avg episode reward: [(0, '23.879')] +[2023-02-24 10:49:34,177][01623] Fps is (10 sec: 2867.7, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6782976. Throughput: 0: 887.0. Samples: 569820. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:49:34,180][01623] Avg episode reward: [(0, '24.856')] +[2023-02-24 10:49:37,371][28924] Updated weights for policy 0, policy_version 1660 (0.0026) +[2023-02-24 10:49:39,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 6807552. Throughput: 0: 925.0. Samples: 575906. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:49:39,183][01623] Avg episode reward: [(0, '24.689')] +[2023-02-24 10:49:44,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3554.5). Total num frames: 6828032. Throughput: 0: 921.8. Samples: 579106. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0) +[2023-02-24 10:49:44,184][01623] Avg episode reward: [(0, '25.806')] +[2023-02-24 10:49:47,767][28924] Updated weights for policy 0, policy_version 1670 (0.0020) +[2023-02-24 10:49:49,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 6844416. Throughput: 0: 887.1. Samples: 584334. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:49:49,181][01623] Avg episode reward: [(0, '26.098')] +[2023-02-24 10:49:54,178][01623] Fps is (10 sec: 2866.7, 60 sec: 3618.0, 300 sec: 3540.6). Total num frames: 6856704. Throughput: 0: 880.7. Samples: 588578. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:49:54,182][01623] Avg episode reward: [(0, '25.973')] +[2023-02-24 10:49:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 6877184. Throughput: 0: 897.0. Samples: 591380. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:49:59,183][01623] Avg episode reward: [(0, '25.533')] +[2023-02-24 10:49:59,592][28924] Updated weights for policy 0, policy_version 1680 (0.0012) +[2023-02-24 10:50:04,177][01623] Fps is (10 sec: 4096.7, 60 sec: 3550.0, 300 sec: 3540.6). Total num frames: 6897664. Throughput: 0: 916.3. Samples: 598154. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:50:04,185][01623] Avg episode reward: [(0, '27.167')] +[2023-02-24 10:50:09,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.5, 300 sec: 3526.7). Total num frames: 6914048. Throughput: 0: 886.1. Samples: 603446. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:09,186][01623] Avg episode reward: [(0, '27.809')] +[2023-02-24 10:50:11,013][28924] Updated weights for policy 0, policy_version 1690 (0.0015) +[2023-02-24 10:50:14,177][01623] Fps is (10 sec: 3276.7, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 6930432. Throughput: 0: 887.0. Samples: 605588. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:14,181][01623] Avg episode reward: [(0, '26.213')] +[2023-02-24 10:50:14,193][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001692_6930432.pth... +[2023-02-24 10:50:14,458][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001487_6090752.pth +[2023-02-24 10:50:19,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 6946816. Throughput: 0: 904.3. Samples: 610514. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:19,183][01623] Avg episode reward: [(0, '25.021')] +[2023-02-24 10:50:22,137][28924] Updated weights for policy 0, policy_version 1700 (0.0017) +[2023-02-24 10:50:24,177][01623] Fps is (10 sec: 4096.2, 60 sec: 3618.2, 300 sec: 3540.6). Total num frames: 6971392. Throughput: 0: 915.3. Samples: 617096. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:24,180][01623] Avg episode reward: [(0, '26.422')] +[2023-02-24 10:50:29,180][01623] Fps is (10 sec: 4094.7, 60 sec: 3617.9, 300 sec: 3540.6). Total num frames: 6987776. Throughput: 0: 912.3. Samples: 620164. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:50:29,183][01623] Avg episode reward: [(0, '25.641')] +[2023-02-24 10:50:34,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3526.8). Total num frames: 7000064. Throughput: 0: 888.2. Samples: 624302. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:34,183][01623] Avg episode reward: [(0, '24.012')] +[2023-02-24 10:50:34,297][28924] Updated weights for policy 0, policy_version 1710 (0.0011) +[2023-02-24 10:50:39,177][01623] Fps is (10 sec: 3277.9, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 7020544. Throughput: 0: 912.2. Samples: 629626. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:50:39,179][01623] Avg episode reward: [(0, '23.486')] +[2023-02-24 10:50:44,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 7041024. Throughput: 0: 925.2. Samples: 633014. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:50:44,184][01623] Avg episode reward: [(0, '24.315')] +[2023-02-24 10:50:44,201][28924] Updated weights for policy 0, policy_version 1720 (0.0016) +[2023-02-24 10:50:49,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 7061504. Throughput: 0: 907.6. Samples: 638996. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:50:49,179][01623] Avg episode reward: [(0, '24.132')] +[2023-02-24 10:50:54,178][01623] Fps is (10 sec: 3276.3, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 7073792. Throughput: 0: 866.4. Samples: 642436. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:50:54,181][01623] Avg episode reward: [(0, '23.521')] +[2023-02-24 10:50:59,177][01623] Fps is (10 sec: 2047.8, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 7081984. Throughput: 0: 854.5. Samples: 644042. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:50:59,180][01623] Avg episode reward: [(0, '23.887')] +[2023-02-24 10:50:59,992][28924] Updated weights for policy 0, policy_version 1730 (0.0067) +[2023-02-24 10:51:04,182][01623] Fps is (10 sec: 2456.6, 60 sec: 3344.7, 300 sec: 3485.0). Total num frames: 7098368. Throughput: 0: 824.5. Samples: 647620. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:51:04,185][01623] Avg episode reward: [(0, '25.100')] +[2023-02-24 10:51:09,177][01623] Fps is (10 sec: 3686.7, 60 sec: 3413.3, 300 sec: 3485.1). Total num frames: 7118848. Throughput: 0: 821.2. Samples: 654050. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:51:09,184][01623] Avg episode reward: [(0, '26.697')] +[2023-02-24 10:51:10,793][28924] Updated weights for policy 0, policy_version 1740 (0.0016) +[2023-02-24 10:51:14,179][01623] Fps is (10 sec: 3687.7, 60 sec: 3413.2, 300 sec: 3498.9). Total num frames: 7135232. Throughput: 0: 824.0. Samples: 657244. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:51:14,182][01623] Avg episode reward: [(0, '26.712')] +[2023-02-24 10:51:19,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 7151616. Throughput: 0: 826.1. Samples: 661478. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:51:19,179][01623] Avg episode reward: [(0, '26.547')] +[2023-02-24 10:51:23,838][28924] Updated weights for policy 0, policy_version 1750 (0.0016) +[2023-02-24 10:51:24,177][01623] Fps is (10 sec: 3277.5, 60 sec: 3276.8, 300 sec: 3485.1). Total num frames: 7168000. Throughput: 0: 818.4. Samples: 666456. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:51:24,180][01623] Avg episode reward: [(0, '25.635')] +[2023-02-24 10:51:29,179][01623] Fps is (10 sec: 3685.3, 60 sec: 3345.1, 300 sec: 3485.0). Total num frames: 7188480. Throughput: 0: 818.2. Samples: 669834. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:51:29,182][01623] Avg episode reward: [(0, '24.960')] +[2023-02-24 10:51:33,268][28924] Updated weights for policy 0, policy_version 1760 (0.0012) +[2023-02-24 10:51:34,177][01623] Fps is (10 sec: 4095.9, 60 sec: 3481.6, 300 sec: 3512.8). Total num frames: 7208960. Throughput: 0: 827.2. Samples: 676218. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:51:34,182][01623] Avg episode reward: [(0, '24.270')] +[2023-02-24 10:51:39,177][01623] Fps is (10 sec: 3687.5, 60 sec: 3413.3, 300 sec: 3512.8). Total num frames: 7225344. Throughput: 0: 844.9. Samples: 680454. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:51:39,180][01623] Avg episode reward: [(0, '24.442')] +[2023-02-24 10:51:44,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3499.0). Total num frames: 7241728. Throughput: 0: 857.0. Samples: 682606. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:51:44,179][01623] Avg episode reward: [(0, '24.606')] +[2023-02-24 10:51:45,843][28924] Updated weights for policy 0, policy_version 1770 (0.0024) +[2023-02-24 10:51:49,177][01623] Fps is (10 sec: 3686.3, 60 sec: 3345.0, 300 sec: 3499.0). Total num frames: 7262208. Throughput: 0: 922.2. Samples: 689114. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:51:49,186][01623] Avg episode reward: [(0, '24.156')] +[2023-02-24 10:51:54,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3481.7, 300 sec: 3512.8). Total num frames: 7282688. Throughput: 0: 917.6. Samples: 695342. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:51:54,184][01623] Avg episode reward: [(0, '24.310')] +[2023-02-24 10:51:56,134][28924] Updated weights for policy 0, policy_version 1780 (0.0012) +[2023-02-24 10:51:59,177][01623] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3512.9). Total num frames: 7294976. Throughput: 0: 893.4. Samples: 697444. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:51:59,184][01623] Avg episode reward: [(0, '24.189')] +[2023-02-24 10:52:04,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3550.2, 300 sec: 3499.0). Total num frames: 7311360. Throughput: 0: 895.6. Samples: 701782. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:04,180][01623] Avg episode reward: [(0, '25.950')] +[2023-02-24 10:52:07,819][28924] Updated weights for policy 0, policy_version 1790 (0.0029) +[2023-02-24 10:52:09,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3499.0). Total num frames: 7335936. Throughput: 0: 936.1. Samples: 708580. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:09,186][01623] Avg episode reward: [(0, '25.192')] +[2023-02-24 10:52:14,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3686.5, 300 sec: 3526.7). Total num frames: 7356416. Throughput: 0: 935.5. Samples: 711930. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:52:14,181][01623] Avg episode reward: [(0, '25.358')] +[2023-02-24 10:52:14,195][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001796_7356416.pth... +[2023-02-24 10:52:14,363][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001586_6496256.pth +[2023-02-24 10:52:19,075][28924] Updated weights for policy 0, policy_version 1800 (0.0031) +[2023-02-24 10:52:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 7372800. Throughput: 0: 895.8. Samples: 716530. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:19,184][01623] Avg episode reward: [(0, '25.843')] +[2023-02-24 10:52:24,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 7385088. Throughput: 0: 899.6. Samples: 720938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:24,179][01623] Avg episode reward: [(0, '27.058')] +[2023-02-24 10:52:29,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3568.4). Total num frames: 7409664. Throughput: 0: 926.4. Samples: 724292. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:52:29,186][01623] Avg episode reward: [(0, '27.456')] +[2023-02-24 10:52:29,969][28924] Updated weights for policy 0, policy_version 1810 (0.0021) +[2023-02-24 10:52:34,179][01623] Fps is (10 sec: 4504.6, 60 sec: 3686.3, 300 sec: 3568.4). Total num frames: 7430144. Throughput: 0: 929.0. Samples: 730922. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:52:34,186][01623] Avg episode reward: [(0, '26.833')] +[2023-02-24 10:52:39,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 7442432. Throughput: 0: 891.8. Samples: 735472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:52:39,191][01623] Avg episode reward: [(0, '27.059')] +[2023-02-24 10:52:42,379][28924] Updated weights for policy 0, policy_version 1820 (0.0014) +[2023-02-24 10:52:44,179][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.0, 300 sec: 3554.5). Total num frames: 7458816. Throughput: 0: 892.4. Samples: 737604. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:52:44,186][01623] Avg episode reward: [(0, '27.815')] +[2023-02-24 10:52:49,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 7479296. Throughput: 0: 926.5. Samples: 743476. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:49,185][01623] Avg episode reward: [(0, '27.613')] +[2023-02-24 10:52:52,456][28924] Updated weights for policy 0, policy_version 1830 (0.0015) +[2023-02-24 10:52:54,177][01623] Fps is (10 sec: 4096.9, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 7499776. Throughput: 0: 921.8. Samples: 750060. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 10:52:54,183][01623] Avg episode reward: [(0, '27.478')] +[2023-02-24 10:52:59,177][01623] Fps is (10 sec: 3686.1, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 7516160. Throughput: 0: 896.0. Samples: 752250. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:52:59,181][01623] Avg episode reward: [(0, '26.935')] +[2023-02-24 10:53:04,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 7532544. Throughput: 0: 891.3. Samples: 756640. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:53:04,179][01623] Avg episode reward: [(0, '26.621')] +[2023-02-24 10:53:05,207][28924] Updated weights for policy 0, policy_version 1840 (0.0013) +[2023-02-24 10:53:09,181][01623] Fps is (10 sec: 3684.9, 60 sec: 3617.8, 300 sec: 3582.2). Total num frames: 7553024. Throughput: 0: 929.3. Samples: 762762. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:09,188][01623] Avg episode reward: [(0, '26.538')] +[2023-02-24 10:53:14,149][28924] Updated weights for policy 0, policy_version 1850 (0.0016) +[2023-02-24 10:53:14,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 7577600. Throughput: 0: 931.3. Samples: 766200. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:14,179][01623] Avg episode reward: [(0, '26.391')] +[2023-02-24 10:53:19,177][01623] Fps is (10 sec: 3688.2, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 7589888. Throughput: 0: 905.9. Samples: 771684. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:19,179][01623] Avg episode reward: [(0, '26.516')] +[2023-02-24 10:53:24,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 7606272. Throughput: 0: 899.2. Samples: 775936. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:24,185][01623] Avg episode reward: [(0, '26.852')] +[2023-02-24 10:53:27,036][28924] Updated weights for policy 0, policy_version 1860 (0.0015) +[2023-02-24 10:53:29,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 7626752. Throughput: 0: 913.4. Samples: 778704. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:53:29,182][01623] Avg episode reward: [(0, '27.809')] +[2023-02-24 10:53:34,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.3, 300 sec: 3582.3). Total num frames: 7647232. Throughput: 0: 932.6. Samples: 785442. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:34,183][01623] Avg episode reward: [(0, '28.648')] +[2023-02-24 10:53:36,679][28924] Updated weights for policy 0, policy_version 1870 (0.0021) +[2023-02-24 10:53:39,178][01623] Fps is (10 sec: 3685.8, 60 sec: 3686.3, 300 sec: 3568.4). Total num frames: 7663616. Throughput: 0: 903.4. Samples: 790716. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:39,183][01623] Avg episode reward: [(0, '29.804')] +[2023-02-24 10:53:44,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 3582.3). Total num frames: 7680000. Throughput: 0: 902.9. Samples: 792880. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:53:44,185][01623] Avg episode reward: [(0, '28.952')] +[2023-02-24 10:53:49,177][01623] Fps is (10 sec: 3277.3, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 7696384. Throughput: 0: 915.7. Samples: 797848. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:53:49,179][01623] Avg episode reward: [(0, '29.583')] +[2023-02-24 10:53:49,311][28924] Updated weights for policy 0, policy_version 1880 (0.0012) +[2023-02-24 10:53:54,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 7720960. Throughput: 0: 927.3. Samples: 804488. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:53:54,184][01623] Avg episode reward: [(0, '29.967')] +[2023-02-24 10:53:59,180][01623] Fps is (10 sec: 4094.4, 60 sec: 3686.2, 300 sec: 3568.4). Total num frames: 7737344. Throughput: 0: 917.0. Samples: 807468. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:53:59,184][01623] Avg episode reward: [(0, '29.194')] +[2023-02-24 10:53:59,779][28924] Updated weights for policy 0, policy_version 1890 (0.0019) +[2023-02-24 10:54:04,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 7753728. Throughput: 0: 892.0. Samples: 811824. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:54:04,179][01623] Avg episode reward: [(0, '29.722')] +[2023-02-24 10:54:09,177][01623] Fps is (10 sec: 3278.0, 60 sec: 3618.4, 300 sec: 3582.3). Total num frames: 7770112. Throughput: 0: 917.2. Samples: 817208. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:54:09,185][01623] Avg episode reward: [(0, '28.400')] +[2023-02-24 10:54:11,372][28924] Updated weights for policy 0, policy_version 1900 (0.0017) +[2023-02-24 10:54:14,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 7794688. Throughput: 0: 930.7. Samples: 820584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:54:14,179][01623] Avg episode reward: [(0, '29.216')] +[2023-02-24 10:54:14,198][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001903_7794688.pth... +[2023-02-24 10:54:14,378][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001692_6930432.pth +[2023-02-24 10:54:19,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 7811072. Throughput: 0: 919.1. Samples: 826800. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:54:19,184][01623] Avg episode reward: [(0, '27.770')] +[2023-02-24 10:54:22,858][28924] Updated weights for policy 0, policy_version 1910 (0.0047) +[2023-02-24 10:54:24,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 7823360. Throughput: 0: 887.0. Samples: 830628. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:54:24,184][01623] Avg episode reward: [(0, '27.442')] +[2023-02-24 10:54:29,177][01623] Fps is (10 sec: 2457.6, 60 sec: 3481.6, 300 sec: 3568.4). Total num frames: 7835648. Throughput: 0: 876.3. Samples: 832314. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:54:29,183][01623] Avg episode reward: [(0, '26.922')] +[2023-02-24 10:54:34,177][01623] Fps is (10 sec: 2457.6, 60 sec: 3345.1, 300 sec: 3526.7). Total num frames: 7847936. Throughput: 0: 848.2. Samples: 836016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:54:34,180][01623] Avg episode reward: [(0, '27.101')] +[2023-02-24 10:54:37,267][28924] Updated weights for policy 0, policy_version 1920 (0.0018) +[2023-02-24 10:54:39,177][01623] Fps is (10 sec: 3686.5, 60 sec: 3481.7, 300 sec: 3540.6). Total num frames: 7872512. Throughput: 0: 839.7. Samples: 842276. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:54:39,180][01623] Avg episode reward: [(0, '26.350')] +[2023-02-24 10:54:44,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 7888896. Throughput: 0: 837.3. Samples: 845144. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:54:44,182][01623] Avg episode reward: [(0, '26.592')] +[2023-02-24 10:54:49,177][01623] Fps is (10 sec: 2867.0, 60 sec: 3413.3, 300 sec: 3540.6). Total num frames: 7901184. Throughput: 0: 833.9. Samples: 849348. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:54:49,184][01623] Avg episode reward: [(0, '25.915')] +[2023-02-24 10:54:49,761][28924] Updated weights for policy 0, policy_version 1930 (0.0016) +[2023-02-24 10:54:54,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 7921664. Throughput: 0: 835.8. Samples: 854818. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 10:54:54,183][01623] Avg episode reward: [(0, '26.963')] +[2023-02-24 10:54:59,177][01623] Fps is (10 sec: 4096.2, 60 sec: 3413.5, 300 sec: 3540.6). Total num frames: 7942144. Throughput: 0: 835.3. Samples: 858172. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:54:59,183][01623] Avg episode reward: [(0, '27.215')] +[2023-02-24 10:54:59,439][28924] Updated weights for policy 0, policy_version 1940 (0.0023) +[2023-02-24 10:55:04,177][01623] Fps is (10 sec: 4096.1, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 7962624. Throughput: 0: 833.0. Samples: 864286. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:55:04,181][01623] Avg episode reward: [(0, '28.408')] +[2023-02-24 10:55:09,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3540.6). Total num frames: 7974912. Throughput: 0: 841.3. Samples: 868488. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:55:09,183][01623] Avg episode reward: [(0, '28.632')] +[2023-02-24 10:55:12,367][28924] Updated weights for policy 0, policy_version 1950 (0.0015) +[2023-02-24 10:55:14,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3554.5). Total num frames: 7995392. Throughput: 0: 852.5. Samples: 870678. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0) +[2023-02-24 10:55:14,179][01623] Avg episode reward: [(0, '28.717')] +[2023-02-24 10:55:19,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3413.3, 300 sec: 3540.6). Total num frames: 8015872. Throughput: 0: 921.1. Samples: 877466. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:55:19,179][01623] Avg episode reward: [(0, '27.587')] +[2023-02-24 10:55:21,529][28924] Updated weights for policy 0, policy_version 1960 (0.0014) +[2023-02-24 10:55:24,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 8036352. Throughput: 0: 913.2. Samples: 883370. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:55:24,183][01623] Avg episode reward: [(0, '26.901')] +[2023-02-24 10:55:29,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 8048640. Throughput: 0: 897.5. Samples: 885532. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:55:29,186][01623] Avg episode reward: [(0, '26.682')] +[2023-02-24 10:55:34,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3540.6). Total num frames: 8065024. Throughput: 0: 902.1. Samples: 889940. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:55:34,179][01623] Avg episode reward: [(0, '24.487')] +[2023-02-24 10:55:34,466][28924] Updated weights for policy 0, policy_version 1970 (0.0026) +[2023-02-24 10:55:39,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 8089600. Throughput: 0: 931.3. Samples: 896728. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:55:39,179][01623] Avg episode reward: [(0, '24.311')] +[2023-02-24 10:55:43,755][28924] Updated weights for policy 0, policy_version 1980 (0.0013) +[2023-02-24 10:55:44,184][01623] Fps is (10 sec: 4502.1, 60 sec: 3685.9, 300 sec: 3554.4). Total num frames: 8110080. Throughput: 0: 932.0. Samples: 900120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:55:44,191][01623] Avg episode reward: [(0, '24.030')] +[2023-02-24 10:55:49,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 8122368. Throughput: 0: 900.4. Samples: 904802. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:55:49,179][01623] Avg episode reward: [(0, '25.781')] +[2023-02-24 10:55:54,177][01623] Fps is (10 sec: 2869.4, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 8138752. Throughput: 0: 909.3. Samples: 909408. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:55:54,180][01623] Avg episode reward: [(0, '25.867')] +[2023-02-24 10:55:56,364][28924] Updated weights for policy 0, policy_version 1990 (0.0015) +[2023-02-24 10:55:59,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3610.1). Total num frames: 8163328. Throughput: 0: 933.8. Samples: 912698. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:55:59,185][01623] Avg episode reward: [(0, '26.098')] +[2023-02-24 10:56:04,178][01623] Fps is (10 sec: 4504.9, 60 sec: 3686.3, 300 sec: 3610.0). Total num frames: 8183808. Throughput: 0: 934.9. Samples: 919536. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:56:04,180][01623] Avg episode reward: [(0, '27.690')] +[2023-02-24 10:56:06,563][28924] Updated weights for policy 0, policy_version 2000 (0.0020) +[2023-02-24 10:56:09,178][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3596.2). Total num frames: 8196096. Throughput: 0: 901.7. Samples: 923948. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:56:09,189][01623] Avg episode reward: [(0, '27.582')] +[2023-02-24 10:56:14,177][01623] Fps is (10 sec: 2867.6, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 8212480. Throughput: 0: 900.8. Samples: 926070. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:56:14,180][01623] Avg episode reward: [(0, '26.813')] +[2023-02-24 10:56:14,188][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002005_8212480.pth... +[2023-02-24 10:56:14,372][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001796_7356416.pth +[2023-02-24 10:56:18,408][28924] Updated weights for policy 0, policy_version 2010 (0.0015) +[2023-02-24 10:56:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 8232960. Throughput: 0: 931.5. Samples: 931856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:56:19,179][01623] Avg episode reward: [(0, '25.944')] +[2023-02-24 10:56:24,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3623.9). Total num frames: 8257536. Throughput: 0: 933.7. Samples: 938744. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:56:24,186][01623] Avg episode reward: [(0, '26.111')] +[2023-02-24 10:56:29,183][01623] Fps is (10 sec: 3683.9, 60 sec: 3686.0, 300 sec: 3596.1). Total num frames: 8269824. Throughput: 0: 905.5. Samples: 940866. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:56:29,186][01623] Avg episode reward: [(0, '26.173')] +[2023-02-24 10:56:29,658][28924] Updated weights for policy 0, policy_version 2020 (0.0015) +[2023-02-24 10:56:34,177][01623] Fps is (10 sec: 2457.7, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 8282112. Throughput: 0: 892.5. Samples: 944964. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:56:34,180][01623] Avg episode reward: [(0, '27.397')] +[2023-02-24 10:56:39,177][01623] Fps is (10 sec: 3688.9, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 8306688. Throughput: 0: 922.9. Samples: 950940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:56:39,179][01623] Avg episode reward: [(0, '26.306')] +[2023-02-24 10:56:40,741][28924] Updated weights for policy 0, policy_version 2030 (0.0018) +[2023-02-24 10:56:44,178][01623] Fps is (10 sec: 4504.8, 60 sec: 3618.5, 300 sec: 3610.0). Total num frames: 8327168. Throughput: 0: 921.0. Samples: 954146. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:56:44,181][01623] Avg episode reward: [(0, '28.025')] +[2023-02-24 10:56:49,178][01623] Fps is (10 sec: 3685.7, 60 sec: 3686.3, 300 sec: 3596.1). Total num frames: 8343552. Throughput: 0: 887.6. Samples: 959480. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 10:56:49,185][01623] Avg episode reward: [(0, '29.013')] +[2023-02-24 10:56:53,179][28924] Updated weights for policy 0, policy_version 2040 (0.0012) +[2023-02-24 10:56:54,177][01623] Fps is (10 sec: 2867.7, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 8355840. Throughput: 0: 882.0. Samples: 963636. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:56:54,185][01623] Avg episode reward: [(0, '27.806')] +[2023-02-24 10:56:59,177][01623] Fps is (10 sec: 3277.4, 60 sec: 3549.9, 300 sec: 3610.0). Total num frames: 8376320. Throughput: 0: 895.6. Samples: 966370. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:56:59,179][01623] Avg episode reward: [(0, '27.261')] +[2023-02-24 10:57:03,093][28924] Updated weights for policy 0, policy_version 2050 (0.0015) +[2023-02-24 10:57:04,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3618.2, 300 sec: 3610.0). Total num frames: 8400896. Throughput: 0: 920.0. Samples: 973254. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:57:04,182][01623] Avg episode reward: [(0, '26.492')] +[2023-02-24 10:57:09,183][01623] Fps is (10 sec: 4093.2, 60 sec: 3686.0, 300 sec: 3596.1). Total num frames: 8417280. Throughput: 0: 886.1. Samples: 978624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:57:09,186][01623] Avg episode reward: [(0, '26.822')] +[2023-02-24 10:57:14,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.2, 300 sec: 3582.3). Total num frames: 8429568. Throughput: 0: 887.6. Samples: 980800. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:57:14,181][01623] Avg episode reward: [(0, '27.264')] +[2023-02-24 10:57:15,740][28924] Updated weights for policy 0, policy_version 2060 (0.0027) +[2023-02-24 10:57:19,177][01623] Fps is (10 sec: 3278.9, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 8450048. Throughput: 0: 909.2. Samples: 985880. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:57:19,185][01623] Avg episode reward: [(0, '26.415')] +[2023-02-24 10:57:24,177][01623] Fps is (10 sec: 4095.7, 60 sec: 3549.8, 300 sec: 3596.1). Total num frames: 8470528. Throughput: 0: 929.0. Samples: 992744. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:57:24,180][01623] Avg episode reward: [(0, '27.332')] +[2023-02-24 10:57:24,992][28924] Updated weights for policy 0, policy_version 2070 (0.0016) +[2023-02-24 10:57:29,177][01623] Fps is (10 sec: 4096.1, 60 sec: 3686.8, 300 sec: 3596.2). Total num frames: 8491008. Throughput: 0: 925.1. Samples: 995774. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:57:29,181][01623] Avg episode reward: [(0, '27.371')] +[2023-02-24 10:57:34,177][01623] Fps is (10 sec: 3277.0, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 8503296. Throughput: 0: 903.3. Samples: 1000126. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:57:34,179][01623] Avg episode reward: [(0, '26.507')] +[2023-02-24 10:57:38,022][28924] Updated weights for policy 0, policy_version 2080 (0.0015) +[2023-02-24 10:57:39,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3610.1). Total num frames: 8523776. Throughput: 0: 923.6. Samples: 1005196. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:57:39,185][01623] Avg episode reward: [(0, '26.413')] +[2023-02-24 10:57:44,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.2, 300 sec: 3610.0). Total num frames: 8544256. Throughput: 0: 936.9. Samples: 1008532. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:57:44,179][01623] Avg episode reward: [(0, '25.975')] +[2023-02-24 10:57:46,958][28924] Updated weights for policy 0, policy_version 2090 (0.0024) +[2023-02-24 10:57:49,183][01623] Fps is (10 sec: 4093.2, 60 sec: 3686.1, 300 sec: 3610.0). Total num frames: 8564736. Throughput: 0: 928.7. Samples: 1015052. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:57:49,186][01623] Avg episode reward: [(0, '26.465')] +[2023-02-24 10:57:54,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3610.0). Total num frames: 8581120. Throughput: 0: 903.2. Samples: 1019264. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:57:54,184][01623] Avg episode reward: [(0, '25.332')] +[2023-02-24 10:57:59,177][01623] Fps is (10 sec: 2459.3, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 8589312. Throughput: 0: 894.9. Samples: 1021072. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:57:59,181][01623] Avg episode reward: [(0, '24.737')] +[2023-02-24 10:58:02,347][28924] Updated weights for policy 0, policy_version 2100 (0.0023) +[2023-02-24 10:58:04,177][01623] Fps is (10 sec: 2457.6, 60 sec: 3413.3, 300 sec: 3568.4). Total num frames: 8605696. Throughput: 0: 868.5. Samples: 1024964. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 10:58:04,179][01623] Avg episode reward: [(0, '24.348')] +[2023-02-24 10:58:09,178][01623] Fps is (10 sec: 3276.2, 60 sec: 3413.6, 300 sec: 3540.6). Total num frames: 8622080. Throughput: 0: 834.2. Samples: 1030284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:58:09,187][01623] Avg episode reward: [(0, '25.506')] +[2023-02-24 10:58:14,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 3554.5). Total num frames: 8638464. Throughput: 0: 812.8. Samples: 1032348. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:58:14,191][01623] Avg episode reward: [(0, '26.833')] +[2023-02-24 10:58:14,203][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002109_8638464.pth... +[2023-02-24 10:58:14,511][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001903_7794688.pth +[2023-02-24 10:58:15,744][28924] Updated weights for policy 0, policy_version 2110 (0.0040) +[2023-02-24 10:58:19,177][01623] Fps is (10 sec: 2867.7, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 8650752. Throughput: 0: 806.4. Samples: 1036412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:58:19,183][01623] Avg episode reward: [(0, '25.630')] +[2023-02-24 10:58:24,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 8671232. Throughput: 0: 832.3. Samples: 1042648. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:58:24,183][01623] Avg episode reward: [(0, '25.383')] +[2023-02-24 10:58:26,034][28924] Updated weights for policy 0, policy_version 2120 (0.0017) +[2023-02-24 10:58:29,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3413.3, 300 sec: 3554.5). Total num frames: 8695808. Throughput: 0: 831.6. Samples: 1045954. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:58:29,180][01623] Avg episode reward: [(0, '25.320')] +[2023-02-24 10:58:34,179][01623] Fps is (10 sec: 3685.4, 60 sec: 3413.2, 300 sec: 3540.6). Total num frames: 8708096. Throughput: 0: 805.4. Samples: 1051290. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:58:34,183][01623] Avg episode reward: [(0, '24.742')] +[2023-02-24 10:58:38,411][28924] Updated weights for policy 0, policy_version 2130 (0.0015) +[2023-02-24 10:58:39,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 8724480. Throughput: 0: 808.5. Samples: 1055646. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:58:39,181][01623] Avg episode reward: [(0, '23.439')] +[2023-02-24 10:58:44,177][01623] Fps is (10 sec: 3687.4, 60 sec: 3345.1, 300 sec: 3554.5). Total num frames: 8744960. Throughput: 0: 831.2. Samples: 1058474. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:58:44,184][01623] Avg episode reward: [(0, '23.126')] +[2023-02-24 10:58:48,087][28924] Updated weights for policy 0, policy_version 2140 (0.0023) +[2023-02-24 10:58:49,176][01623] Fps is (10 sec: 4505.7, 60 sec: 3413.7, 300 sec: 3554.5). Total num frames: 8769536. Throughput: 0: 896.9. Samples: 1065326. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 10:58:49,182][01623] Avg episode reward: [(0, '23.553')] +[2023-02-24 10:58:54,180][01623] Fps is (10 sec: 3685.0, 60 sec: 3344.8, 300 sec: 3540.6). Total num frames: 8781824. Throughput: 0: 896.0. Samples: 1070606. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:58:54,183][01623] Avg episode reward: [(0, '25.100')] +[2023-02-24 10:58:59,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3481.6, 300 sec: 3540.6). Total num frames: 8798208. Throughput: 0: 898.0. Samples: 1072760. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:58:59,186][01623] Avg episode reward: [(0, '26.125')] +[2023-02-24 10:59:01,161][28924] Updated weights for policy 0, policy_version 2150 (0.0021) +[2023-02-24 10:59:04,177][01623] Fps is (10 sec: 3687.8, 60 sec: 3549.9, 300 sec: 3554.5). Total num frames: 8818688. Throughput: 0: 923.4. Samples: 1077964. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:59:04,185][01623] Avg episode reward: [(0, '27.145')] +[2023-02-24 10:59:09,177][01623] Fps is (10 sec: 4505.7, 60 sec: 3686.5, 300 sec: 3554.5). Total num frames: 8843264. Throughput: 0: 937.7. Samples: 1084844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:59:09,185][01623] Avg episode reward: [(0, '28.273')] +[2023-02-24 10:59:10,075][28924] Updated weights for policy 0, policy_version 2160 (0.0014) +[2023-02-24 10:59:14,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 8859648. Throughput: 0: 928.4. Samples: 1087734. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:59:14,179][01623] Avg episode reward: [(0, '30.143')] +[2023-02-24 10:59:19,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 8871936. Throughput: 0: 904.1. Samples: 1091974. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:59:19,178][01623] Avg episode reward: [(0, '30.906')] +[2023-02-24 10:59:19,190][28910] Saving new best policy, reward=30.906! +[2023-02-24 10:59:23,224][28924] Updated weights for policy 0, policy_version 2170 (0.0021) +[2023-02-24 10:59:24,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 8892416. Throughput: 0: 922.6. Samples: 1097162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:59:24,179][01623] Avg episode reward: [(0, '29.352')] +[2023-02-24 10:59:29,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 8912896. Throughput: 0: 934.9. Samples: 1100544. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 10:59:29,180][01623] Avg episode reward: [(0, '28.611')] +[2023-02-24 10:59:32,927][28924] Updated weights for policy 0, policy_version 2180 (0.0012) +[2023-02-24 10:59:34,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3582.3). Total num frames: 8929280. Throughput: 0: 916.2. Samples: 1106554. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:59:34,180][01623] Avg episode reward: [(0, '29.746')] +[2023-02-24 10:59:39,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 8945664. Throughput: 0: 894.9. Samples: 1110874. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 10:59:39,180][01623] Avg episode reward: [(0, '29.647')] +[2023-02-24 10:59:44,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 8962048. Throughput: 0: 894.8. Samples: 1113024. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:59:44,180][01623] Avg episode reward: [(0, '30.851')] +[2023-02-24 10:59:45,461][28924] Updated weights for policy 0, policy_version 2190 (0.0029) +[2023-02-24 10:59:49,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 8986624. Throughput: 0: 926.8. Samples: 1119672. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 10:59:49,183][01623] Avg episode reward: [(0, '29.256')] +[2023-02-24 10:59:54,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3754.9, 300 sec: 3610.0). Total num frames: 9007104. Throughput: 0: 909.2. Samples: 1125758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:59:54,182][01623] Avg episode reward: [(0, '28.856')] +[2023-02-24 10:59:55,668][28924] Updated weights for policy 0, policy_version 2200 (0.0017) +[2023-02-24 10:59:59,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 9019392. Throughput: 0: 893.2. Samples: 1127926. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 10:59:59,182][01623] Avg episode reward: [(0, '30.200')] +[2023-02-24 11:00:04,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 9035776. Throughput: 0: 896.6. Samples: 1132320. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:04,180][01623] Avg episode reward: [(0, '30.995')] +[2023-02-24 11:00:04,196][28910] Saving new best policy, reward=30.995! +[2023-02-24 11:00:07,407][28924] Updated weights for policy 0, policy_version 2210 (0.0030) +[2023-02-24 11:00:09,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3596.1). Total num frames: 9056256. Throughput: 0: 929.1. Samples: 1138972. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0) +[2023-02-24 11:00:09,181][01623] Avg episode reward: [(0, '30.846')] +[2023-02-24 11:00:14,179][01623] Fps is (10 sec: 4094.8, 60 sec: 3618.0, 300 sec: 3596.1). Total num frames: 9076736. Throughput: 0: 927.5. Samples: 1142286. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:14,188][01623] Avg episode reward: [(0, '29.888')] +[2023-02-24 11:00:14,210][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002216_9076736.pth... +[2023-02-24 11:00:14,529][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002005_8212480.pth +[2023-02-24 11:00:18,884][28924] Updated weights for policy 0, policy_version 2220 (0.0012) +[2023-02-24 11:00:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 9093120. Throughput: 0: 894.7. Samples: 1146814. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:19,185][01623] Avg episode reward: [(0, '30.221')] +[2023-02-24 11:00:24,177][01623] Fps is (10 sec: 2868.0, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 9105408. Throughput: 0: 898.6. Samples: 1151312. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:00:24,179][01623] Avg episode reward: [(0, '31.021')] +[2023-02-24 11:00:24,249][28910] Saving new best policy, reward=31.021! +[2023-02-24 11:00:29,177][01623] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3610.0). Total num frames: 9129984. Throughput: 0: 922.8. Samples: 1154550. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:29,181][01623] Avg episode reward: [(0, '31.943')] +[2023-02-24 11:00:29,186][28910] Saving new best policy, reward=31.943! +[2023-02-24 11:00:29,933][28924] Updated weights for policy 0, policy_version 2230 (0.0013) +[2023-02-24 11:00:34,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3686.4, 300 sec: 3596.1). Total num frames: 9150464. Throughput: 0: 919.5. Samples: 1161048. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:34,184][01623] Avg episode reward: [(0, '30.633')] +[2023-02-24 11:00:39,179][01623] Fps is (10 sec: 3275.9, 60 sec: 3618.0, 300 sec: 3568.4). Total num frames: 9162752. Throughput: 0: 883.5. Samples: 1165516. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:39,185][01623] Avg episode reward: [(0, '31.235')] +[2023-02-24 11:00:42,448][28924] Updated weights for policy 0, policy_version 2240 (0.0030) +[2023-02-24 11:00:44,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9179136. Throughput: 0: 882.0. Samples: 1167616. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:00:44,179][01623] Avg episode reward: [(0, '30.624')] +[2023-02-24 11:00:49,177][01623] Fps is (10 sec: 3687.4, 60 sec: 3549.9, 300 sec: 3596.2). Total num frames: 9199616. Throughput: 0: 913.2. Samples: 1173412. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:49,179][01623] Avg episode reward: [(0, '29.648')] +[2023-02-24 11:00:52,307][28924] Updated weights for policy 0, policy_version 2250 (0.0023) +[2023-02-24 11:00:54,177][01623] Fps is (10 sec: 4505.6, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 9224192. Throughput: 0: 915.1. Samples: 1180150. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:54,179][01623] Avg episode reward: [(0, '27.824')] +[2023-02-24 11:00:59,178][01623] Fps is (10 sec: 3685.8, 60 sec: 3618.0, 300 sec: 3568.4). Total num frames: 9236480. Throughput: 0: 891.4. Samples: 1182398. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:00:59,182][01623] Avg episode reward: [(0, '27.319')] +[2023-02-24 11:01:04,177][01623] Fps is (10 sec: 2867.1, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9252864. Throughput: 0: 882.5. Samples: 1186528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:01:04,182][01623] Avg episode reward: [(0, '26.603')] +[2023-02-24 11:01:05,562][28924] Updated weights for policy 0, policy_version 2260 (0.0021) +[2023-02-24 11:01:09,176][01623] Fps is (10 sec: 3687.0, 60 sec: 3618.1, 300 sec: 3596.2). Total num frames: 9273344. Throughput: 0: 912.6. Samples: 1192378. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:09,179][01623] Avg episode reward: [(0, '26.534')] +[2023-02-24 11:01:14,177][01623] Fps is (10 sec: 4096.1, 60 sec: 3618.3, 300 sec: 3596.1). Total num frames: 9293824. Throughput: 0: 914.8. Samples: 1195716. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:14,179][01623] Avg episode reward: [(0, '25.960')] +[2023-02-24 11:01:14,537][28924] Updated weights for policy 0, policy_version 2270 (0.0014) +[2023-02-24 11:01:19,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 9310208. Throughput: 0: 893.9. Samples: 1201274. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:19,181][01623] Avg episode reward: [(0, '25.395')] +[2023-02-24 11:01:24,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3582.3). Total num frames: 9326592. Throughput: 0: 892.2. Samples: 1205664. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:24,179][01623] Avg episode reward: [(0, '26.514')] +[2023-02-24 11:01:28,177][28924] Updated weights for policy 0, policy_version 2280 (0.0019) +[2023-02-24 11:01:29,177][01623] Fps is (10 sec: 2867.1, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 9338880. Throughput: 0: 900.5. Samples: 1208138. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 11:01:29,180][01623] Avg episode reward: [(0, '25.870')] +[2023-02-24 11:01:34,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3554.5). Total num frames: 9355264. Throughput: 0: 867.6. Samples: 1212452. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:01:34,180][01623] Avg episode reward: [(0, '25.679')] +[2023-02-24 11:01:39,177][01623] Fps is (10 sec: 2867.3, 60 sec: 3413.5, 300 sec: 3526.7). Total num frames: 9367552. Throughput: 0: 800.7. Samples: 1216180. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 11:01:39,185][01623] Avg episode reward: [(0, '25.468')] +[2023-02-24 11:01:43,423][28924] Updated weights for policy 0, policy_version 2290 (0.0017) +[2023-02-24 11:01:44,177][01623] Fps is (10 sec: 2457.4, 60 sec: 3345.0, 300 sec: 3512.9). Total num frames: 9379840. Throughput: 0: 795.3. Samples: 1218184. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:01:44,184][01623] Avg episode reward: [(0, '25.464')] +[2023-02-24 11:01:49,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 9400320. Throughput: 0: 812.8. Samples: 1223106. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:49,183][01623] Avg episode reward: [(0, '24.950')] +[2023-02-24 11:01:53,528][28924] Updated weights for policy 0, policy_version 2300 (0.0012) +[2023-02-24 11:01:54,177][01623] Fps is (10 sec: 4096.3, 60 sec: 3276.8, 300 sec: 3540.6). Total num frames: 9420800. Throughput: 0: 833.0. Samples: 1229862. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:01:54,183][01623] Avg episode reward: [(0, '26.796')] +[2023-02-24 11:01:59,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3413.4, 300 sec: 3526.7). Total num frames: 9441280. Throughput: 0: 831.8. Samples: 1233146. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-24 11:01:59,182][01623] Avg episode reward: [(0, '25.902')] +[2023-02-24 11:02:04,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3512.9). Total num frames: 9453568. Throughput: 0: 801.9. Samples: 1237360. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0) +[2023-02-24 11:02:04,184][01623] Avg episode reward: [(0, '27.125')] +[2023-02-24 11:02:06,201][28924] Updated weights for policy 0, policy_version 2310 (0.0015) +[2023-02-24 11:02:09,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3276.8, 300 sec: 3526.7). Total num frames: 9469952. Throughput: 0: 818.8. Samples: 1242510. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:02:09,182][01623] Avg episode reward: [(0, '27.341')] +[2023-02-24 11:02:14,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3345.1, 300 sec: 3540.6). Total num frames: 9494528. Throughput: 0: 838.5. Samples: 1245868. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:02:14,182][01623] Avg episode reward: [(0, '27.595')] +[2023-02-24 11:02:14,198][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002318_9494528.pth... +[2023-02-24 11:02:14,367][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002109_8638464.pth +[2023-02-24 11:02:15,723][28924] Updated weights for policy 0, policy_version 2320 (0.0014) +[2023-02-24 11:02:19,177][01623] Fps is (10 sec: 4505.5, 60 sec: 3413.3, 300 sec: 3540.6). Total num frames: 9515008. Throughput: 0: 884.1. Samples: 1252238. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:02:19,179][01623] Avg episode reward: [(0, '28.559')] +[2023-02-24 11:02:24,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3345.1, 300 sec: 3512.8). Total num frames: 9527296. Throughput: 0: 896.9. Samples: 1256542. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 11:02:24,183][01623] Avg episode reward: [(0, '29.460')] +[2023-02-24 11:02:28,581][28924] Updated weights for policy 0, policy_version 2330 (0.0041) +[2023-02-24 11:02:29,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3413.3, 300 sec: 3526.7). Total num frames: 9543680. Throughput: 0: 899.5. Samples: 1258662. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:02:29,185][01623] Avg episode reward: [(0, '29.382')] +[2023-02-24 11:02:34,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3540.6). Total num frames: 9568256. Throughput: 0: 934.8. Samples: 1265170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:02:34,180][01623] Avg episode reward: [(0, '29.301')] +[2023-02-24 11:02:37,610][28924] Updated weights for policy 0, policy_version 2340 (0.0013) +[2023-02-24 11:02:39,177][01623] Fps is (10 sec: 4505.8, 60 sec: 3686.4, 300 sec: 3540.6). Total num frames: 9588736. Throughput: 0: 920.5. Samples: 1271284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:02:39,179][01623] Avg episode reward: [(0, '29.703')] +[2023-02-24 11:02:44,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3512.9). Total num frames: 9601024. Throughput: 0: 894.3. Samples: 1273388. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:02:44,183][01623] Avg episode reward: [(0, '30.472')] +[2023-02-24 11:02:49,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3512.8). Total num frames: 9617408. Throughput: 0: 893.7. Samples: 1277576. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:02:49,185][01623] Avg episode reward: [(0, '31.420')] +[2023-02-24 11:02:50,813][28924] Updated weights for policy 0, policy_version 2350 (0.0015) +[2023-02-24 11:02:54,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 9637888. Throughput: 0: 926.3. Samples: 1284194. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:02:54,180][01623] Avg episode reward: [(0, '30.242')] +[2023-02-24 11:02:59,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 9658368. Throughput: 0: 927.4. Samples: 1287602. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:02:59,185][01623] Avg episode reward: [(0, '29.147')] +[2023-02-24 11:03:00,881][28924] Updated weights for policy 0, policy_version 2360 (0.0013) +[2023-02-24 11:03:04,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 9674752. Throughput: 0: 888.5. Samples: 1292222. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:03:04,186][01623] Avg episode reward: [(0, '29.383')] +[2023-02-24 11:03:09,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 9687040. Throughput: 0: 890.0. Samples: 1296592. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:03:09,185][01623] Avg episode reward: [(0, '29.458')] +[2023-02-24 11:03:13,106][28924] Updated weights for policy 0, policy_version 2370 (0.0018) +[2023-02-24 11:03:14,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 9711616. Throughput: 0: 917.3. Samples: 1299938. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:03:14,181][01623] Avg episode reward: [(0, '28.881')] +[2023-02-24 11:03:19,177][01623] Fps is (10 sec: 4505.5, 60 sec: 3618.1, 300 sec: 3596.1). Total num frames: 9732096. Throughput: 0: 920.6. Samples: 1306598. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:03:19,181][01623] Avg episode reward: [(0, '28.473')] +[2023-02-24 11:03:24,000][28924] Updated weights for policy 0, policy_version 2380 (0.0012) +[2023-02-24 11:03:24,179][01623] Fps is (10 sec: 3685.7, 60 sec: 3686.3, 300 sec: 3568.4). Total num frames: 9748480. Throughput: 0: 888.1. Samples: 1311250. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:03:24,188][01623] Avg episode reward: [(0, '29.448')] +[2023-02-24 11:03:29,177][01623] Fps is (10 sec: 2867.2, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 9760768. Throughput: 0: 889.9. Samples: 1313434. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:03:29,182][01623] Avg episode reward: [(0, '30.294')] +[2023-02-24 11:03:34,177][01623] Fps is (10 sec: 3277.4, 60 sec: 3549.9, 300 sec: 3582.3). Total num frames: 9781248. Throughput: 0: 925.5. Samples: 1319224. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 11:03:34,179][01623] Avg episode reward: [(0, '29.362')] +[2023-02-24 11:03:35,192][28924] Updated weights for policy 0, policy_version 2390 (0.0018) +[2023-02-24 11:03:39,178][01623] Fps is (10 sec: 4504.8, 60 sec: 3618.0, 300 sec: 3596.1). Total num frames: 9805824. Throughput: 0: 925.2. Samples: 1325828. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:03:39,184][01623] Avg episode reward: [(0, '29.915')] +[2023-02-24 11:03:44,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3554.5). Total num frames: 9818112. Throughput: 0: 900.8. Samples: 1328136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 11:03:44,183][01623] Avg episode reward: [(0, '30.350')] +[2023-02-24 11:03:47,084][28924] Updated weights for policy 0, policy_version 2400 (0.0020) +[2023-02-24 11:03:49,178][01623] Fps is (10 sec: 2867.3, 60 sec: 3618.0, 300 sec: 3568.4). Total num frames: 9834496. Throughput: 0: 891.7. Samples: 1332348. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:03:49,185][01623] Avg episode reward: [(0, '30.460')] +[2023-02-24 11:03:54,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9854976. Throughput: 0: 926.0. Samples: 1338260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:03:54,184][01623] Avg episode reward: [(0, '30.649')] +[2023-02-24 11:03:57,383][28924] Updated weights for policy 0, policy_version 2410 (0.0019) +[2023-02-24 11:03:59,177][01623] Fps is (10 sec: 4096.6, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9875456. Throughput: 0: 927.5. Samples: 1341676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:03:59,179][01623] Avg episode reward: [(0, '28.935')] +[2023-02-24 11:04:04,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 9895936. Throughput: 0: 903.8. Samples: 1347270. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0) +[2023-02-24 11:04:04,186][01623] Avg episode reward: [(0, '28.967')] +[2023-02-24 11:04:09,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3554.5). Total num frames: 9908224. Throughput: 0: 893.0. Samples: 1351432. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:04:09,184][01623] Avg episode reward: [(0, '30.409')] +[2023-02-24 11:04:10,077][28924] Updated weights for policy 0, policy_version 2420 (0.0024) +[2023-02-24 11:04:14,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9928704. Throughput: 0: 903.7. Samples: 1354100. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:04:14,179][01623] Avg episode reward: [(0, '30.000')] +[2023-02-24 11:04:14,186][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002424_9928704.pth... +[2023-02-24 11:04:14,355][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002216_9076736.pth +[2023-02-24 11:04:19,177][01623] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3582.3). Total num frames: 9949184. Throughput: 0: 925.9. Samples: 1360888. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:04:19,179][01623] Avg episode reward: [(0, '30.205')] +[2023-02-24 11:04:19,447][28924] Updated weights for policy 0, policy_version 2430 (0.0014) +[2023-02-24 11:04:24,177][01623] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3568.4). Total num frames: 9965568. Throughput: 0: 895.8. Samples: 1366136. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-24 11:04:24,185][01623] Avg episode reward: [(0, '29.022')] +[2023-02-24 11:04:29,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3568.4). Total num frames: 9981952. Throughput: 0: 893.3. Samples: 1368334. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-24 11:04:29,181][01623] Avg episode reward: [(0, '29.417')] +[2023-02-24 11:04:32,717][28924] Updated weights for policy 0, policy_version 2440 (0.0018) +[2023-02-24 11:04:34,177][01623] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 9998336. Throughput: 0: 907.0. Samples: 1373160. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-24 11:04:34,180][01623] Avg episode reward: [(0, '30.233')] +[2023-02-24 11:04:35,540][01623] Component Batcher_0 stopped! +[2023-02-24 11:04:35,539][28910] Stopping Batcher_0... +[2023-02-24 11:04:35,544][28910] Loop batcher_evt_loop terminating... +[2023-02-24 11:04:35,540][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-24 11:04:35,621][28924] Weights refcount: 2 0 +[2023-02-24 11:04:35,646][28924] Stopping InferenceWorker_p0-w0... +[2023-02-24 11:04:35,646][01623] Component InferenceWorker_p0-w0 stopped! +[2023-02-24 11:04:35,656][28924] Loop inference_proc0-0_evt_loop terminating... +[2023-02-24 11:04:35,693][28910] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002318_9494528.pth +[2023-02-24 11:04:35,705][28910] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-24 11:04:35,821][28910] Stopping LearnerWorker_p0... +[2023-02-24 11:04:35,821][28910] Loop learner_proc0_evt_loop terminating... +[2023-02-24 11:04:35,818][01623] Component LearnerWorker_p0 stopped! +[2023-02-24 11:04:35,923][28937] Stopping RolloutWorker_w2... +[2023-02-24 11:04:35,923][28937] Loop rollout_proc2_evt_loop terminating... +[2023-02-24 11:04:35,920][28926] Stopping RolloutWorker_w0... +[2023-02-24 11:04:35,920][01623] Component RolloutWorker_w0 stopped! +[2023-02-24 11:04:35,924][28926] Loop rollout_proc0_evt_loop terminating... +[2023-02-24 11:04:35,927][01623] Component RolloutWorker_w2 stopped! +[2023-02-24 11:04:35,934][28931] Stopping RolloutWorker_w4... +[2023-02-24 11:04:35,936][28931] Loop rollout_proc4_evt_loop terminating... +[2023-02-24 11:04:35,934][01623] Component RolloutWorker_w4 stopped! +[2023-02-24 11:04:35,941][28943] Stopping RolloutWorker_w6... +[2023-02-24 11:04:35,942][28943] Loop rollout_proc6_evt_loop terminating... +[2023-02-24 11:04:35,941][01623] Component RolloutWorker_w6 stopped! +[2023-02-24 11:04:35,964][01623] Component RolloutWorker_w1 stopped! +[2023-02-24 11:04:35,968][28925] Stopping RolloutWorker_w1... +[2023-02-24 11:04:35,969][28925] Loop rollout_proc1_evt_loop terminating... +[2023-02-24 11:04:35,996][28939] Stopping RolloutWorker_w5... +[2023-02-24 11:04:35,996][01623] Component RolloutWorker_w7 stopped! +[2023-02-24 11:04:36,005][01623] Component RolloutWorker_w5 stopped! +[2023-02-24 11:04:36,016][01623] Component RolloutWorker_w3 stopped! +[2023-02-24 11:04:36,017][01623] Waiting for process learner_proc0 to stop... +[2023-02-24 11:04:36,027][28927] Stopping RolloutWorker_w3... +[2023-02-24 11:04:36,027][28927] Loop rollout_proc3_evt_loop terminating... +[2023-02-24 11:04:35,998][28941] Stopping RolloutWorker_w7... +[2023-02-24 11:04:35,997][28939] Loop rollout_proc5_evt_loop terminating... +[2023-02-24 11:04:36,039][28941] Loop rollout_proc7_evt_loop terminating... +[2023-02-24 11:04:39,181][01623] Waiting for process inference_proc0-0 to join... +[2023-02-24 11:04:39,183][01623] Waiting for process rollout_proc0 to join... +[2023-02-24 11:04:39,187][01623] Waiting for process rollout_proc1 to join... +[2023-02-24 11:04:39,193][01623] Waiting for process rollout_proc2 to join... +[2023-02-24 11:04:39,193][01623] Waiting for process rollout_proc3 to join... +[2023-02-24 11:04:39,195][01623] Waiting for process rollout_proc4 to join... +[2023-02-24 11:04:39,196][01623] Waiting for process rollout_proc5 to join... +[2023-02-24 11:04:39,198][01623] Waiting for process rollout_proc6 to join... +[2023-02-24 11:04:39,206][01623] Waiting for process rollout_proc7 to join... +[2023-02-24 11:04:39,209][01623] Batcher 0 profile tree view: +batching: 35.6157, releasing_batches: 0.0329 +[2023-02-24 11:04:39,211][01623] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0065 + wait_policy_total: 747.8741 +update_model: 10.2728 + weight_update: 0.0014 +one_step: 0.0027 + handle_policy_step: 744.4427 + deserialize: 20.7052, stack: 4.1453, obs_to_device_normalize: 160.9053, forward: 363.6956, send_messages: 36.5638 + prepare_outputs: 121.5696 + to_cpu: 76.1946 +[2023-02-24 11:04:39,214][01623] Learner 0 profile tree view: +misc: 0.0085, prepare_batch: 21.2186 +train: 110.5493 + epoch_init: 0.0081, minibatch_init: 0.0083, losses_postprocess: 0.7529, kl_divergence: 0.7241, after_optimizer: 4.4222 + calculate_losses: 36.8311 + losses_init: 0.0144, forward_head: 2.4315, bptt_initial: 24.0398, tail: 1.5803, advantages_returns: 0.4345, losses: 4.6497 + bptt: 3.1972 + bptt_forward_core: 3.0780 + update: 66.7834 + clip: 1.9878 +[2023-02-24 11:04:39,216][01623] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.4310, enqueue_policy_requests: 208.4554, env_step: 1174.5719, overhead: 31.4377, complete_rollouts: 10.4650 +save_policy_outputs: 29.8957 + split_output_tensors: 14.5189 +[2023-02-24 11:04:39,221][01623] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.4278, enqueue_policy_requests: 203.1694, env_step: 1178.1392, overhead: 29.8791, complete_rollouts: 10.1686 +save_policy_outputs: 28.9438 + split_output_tensors: 14.4667 +[2023-02-24 11:04:39,223][01623] Loop Runner_EvtLoop terminating... +[2023-02-24 11:04:39,225][01623] Runner profile tree view: +main_loop: 1583.3832 +[2023-02-24 11:04:39,227][01623] Collected {0: 10006528}, FPS: 3474.2 +[2023-02-24 11:06:52,177][01623] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-24 11:06:52,180][01623] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-24 11:06:52,182][01623] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-24 11:06:52,186][01623] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-24 11:06:52,189][01623] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-24 11:06:52,191][01623] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-24 11:06:52,194][01623] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-24 11:06:52,196][01623] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-24 11:06:52,198][01623] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-24 11:06:52,199][01623] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-24 11:06:52,200][01623] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-24 11:06:52,201][01623] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-24 11:06:52,202][01623] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-24 11:06:52,204][01623] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-24 11:06:52,205][01623] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-24 11:06:52,244][01623] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 11:06:52,251][01623] RunningMeanStd input shape: (1,) +[2023-02-24 11:06:52,274][01623] ConvEncoder: input_channels=3 +[2023-02-24 11:06:52,418][01623] Conv encoder output size: 512 +[2023-02-24 11:06:52,419][01623] Policy head output size: 512 +[2023-02-24 11:06:52,520][01623] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-24 11:06:53,433][01623] Num frames 100... +[2023-02-24 11:06:53,550][01623] Num frames 200... +[2023-02-24 11:06:53,658][01623] Num frames 300... +[2023-02-24 11:06:53,776][01623] Num frames 400... +[2023-02-24 11:06:53,897][01623] Num frames 500... +[2023-02-24 11:06:54,011][01623] Num frames 600... +[2023-02-24 11:06:54,121][01623] Num frames 700... +[2023-02-24 11:06:54,233][01623] Num frames 800... +[2023-02-24 11:06:54,356][01623] Num frames 900... +[2023-02-24 11:06:54,478][01623] Num frames 1000... +[2023-02-24 11:06:54,601][01623] Num frames 1100... +[2023-02-24 11:06:54,715][01623] Num frames 1200... +[2023-02-24 11:06:54,820][01623] Avg episode rewards: #0: 39.390, true rewards: #0: 12.390 +[2023-02-24 11:06:54,821][01623] Avg episode reward: 39.390, avg true_objective: 12.390 +[2023-02-24 11:06:54,895][01623] Num frames 1300... +[2023-02-24 11:06:55,018][01623] Num frames 1400... +[2023-02-24 11:06:55,137][01623] Num frames 1500... +[2023-02-24 11:06:55,245][01623] Num frames 1600... +[2023-02-24 11:06:55,360][01623] Num frames 1700... +[2023-02-24 11:06:55,484][01623] Num frames 1800... +[2023-02-24 11:06:55,605][01623] Num frames 1900... +[2023-02-24 11:06:55,718][01623] Num frames 2000... +[2023-02-24 11:06:55,834][01623] Num frames 2100... +[2023-02-24 11:06:55,951][01623] Num frames 2200... +[2023-02-24 11:06:56,067][01623] Num frames 2300... +[2023-02-24 11:06:56,179][01623] Num frames 2400... +[2023-02-24 11:06:56,291][01623] Num frames 2500... +[2023-02-24 11:06:56,407][01623] Num frames 2600... +[2023-02-24 11:06:56,524][01623] Num frames 2700... +[2023-02-24 11:06:56,633][01623] Num frames 2800... +[2023-02-24 11:06:56,746][01623] Num frames 2900... +[2023-02-24 11:06:56,848][01623] Avg episode rewards: #0: 40.175, true rewards: #0: 14.675 +[2023-02-24 11:06:56,850][01623] Avg episode reward: 40.175, avg true_objective: 14.675 +[2023-02-24 11:06:56,932][01623] Num frames 3000... +[2023-02-24 11:06:57,056][01623] Num frames 3100... +[2023-02-24 11:06:57,176][01623] Num frames 3200... +[2023-02-24 11:06:57,294][01623] Num frames 3300... +[2023-02-24 11:06:57,463][01623] Num frames 3400... +[2023-02-24 11:06:57,545][01623] Avg episode rewards: #0: 30.383, true rewards: #0: 11.383 +[2023-02-24 11:06:57,547][01623] Avg episode reward: 30.383, avg true_objective: 11.383 +[2023-02-24 11:06:57,699][01623] Num frames 3500... +[2023-02-24 11:06:57,863][01623] Num frames 3600... +[2023-02-24 11:06:58,031][01623] Num frames 3700... +[2023-02-24 11:06:58,185][01623] Num frames 3800... +[2023-02-24 11:06:58,345][01623] Num frames 3900... +[2023-02-24 11:06:58,499][01623] Num frames 4000... +[2023-02-24 11:06:58,658][01623] Num frames 4100... +[2023-02-24 11:06:58,815][01623] Num frames 4200... +[2023-02-24 11:06:58,973][01623] Num frames 4300... +[2023-02-24 11:06:59,127][01623] Num frames 4400... +[2023-02-24 11:06:59,290][01623] Num frames 4500... +[2023-02-24 11:06:59,450][01623] Num frames 4600... +[2023-02-24 11:06:59,612][01623] Num frames 4700... +[2023-02-24 11:06:59,775][01623] Num frames 4800... +[2023-02-24 11:06:59,938][01623] Num frames 4900... +[2023-02-24 11:07:00,095][01623] Num frames 5000... +[2023-02-24 11:07:00,255][01623] Num frames 5100... +[2023-02-24 11:07:00,424][01623] Num frames 5200... +[2023-02-24 11:07:00,589][01623] Num frames 5300... +[2023-02-24 11:07:00,804][01623] Avg episode rewards: #0: 34.497, true rewards: #0: 13.497 +[2023-02-24 11:07:00,807][01623] Avg episode reward: 34.497, avg true_objective: 13.497 +[2023-02-24 11:07:00,811][01623] Num frames 5400... +[2023-02-24 11:07:00,935][01623] Num frames 5500... +[2023-02-24 11:07:01,046][01623] Num frames 5600... +[2023-02-24 11:07:01,155][01623] Num frames 5700... +[2023-02-24 11:07:01,272][01623] Num frames 5800... +[2023-02-24 11:07:01,385][01623] Num frames 5900... +[2023-02-24 11:07:01,495][01623] Num frames 6000... +[2023-02-24 11:07:01,603][01623] Num frames 6100... +[2023-02-24 11:07:01,714][01623] Num frames 6200... +[2023-02-24 11:07:01,830][01623] Num frames 6300... +[2023-02-24 11:07:01,966][01623] Num frames 6400... +[2023-02-24 11:07:02,090][01623] Num frames 6500... +[2023-02-24 11:07:02,219][01623] Num frames 6600... +[2023-02-24 11:07:02,350][01623] Num frames 6700... +[2023-02-24 11:07:02,472][01623] Num frames 6800... +[2023-02-24 11:07:02,593][01623] Num frames 6900... +[2023-02-24 11:07:02,715][01623] Num frames 7000... +[2023-02-24 11:07:02,822][01623] Avg episode rewards: #0: 35.288, true rewards: #0: 14.088 +[2023-02-24 11:07:02,823][01623] Avg episode reward: 35.288, avg true_objective: 14.088 +[2023-02-24 11:07:02,899][01623] Num frames 7100... +[2023-02-24 11:07:03,024][01623] Num frames 7200... +[2023-02-24 11:07:03,138][01623] Num frames 7300... +[2023-02-24 11:07:03,246][01623] Num frames 7400... +[2023-02-24 11:07:03,360][01623] Num frames 7500... +[2023-02-24 11:07:03,471][01623] Num frames 7600... +[2023-02-24 11:07:03,549][01623] Avg episode rewards: #0: 31.533, true rewards: #0: 12.700 +[2023-02-24 11:07:03,550][01623] Avg episode reward: 31.533, avg true_objective: 12.700 +[2023-02-24 11:07:03,649][01623] Num frames 7700... +[2023-02-24 11:07:03,769][01623] Num frames 7800... +[2023-02-24 11:07:03,888][01623] Num frames 7900... +[2023-02-24 11:07:04,017][01623] Num frames 8000... +[2023-02-24 11:07:04,134][01623] Num frames 8100... +[2023-02-24 11:07:04,253][01623] Num frames 8200... +[2023-02-24 11:07:04,371][01623] Num frames 8300... +[2023-02-24 11:07:04,485][01623] Num frames 8400... +[2023-02-24 11:07:04,598][01623] Num frames 8500... +[2023-02-24 11:07:04,715][01623] Num frames 8600... +[2023-02-24 11:07:04,824][01623] Num frames 8700... +[2023-02-24 11:07:04,941][01623] Num frames 8800... +[2023-02-24 11:07:05,060][01623] Num frames 8900... +[2023-02-24 11:07:05,176][01623] Num frames 9000... +[2023-02-24 11:07:05,288][01623] Num frames 9100... +[2023-02-24 11:07:05,436][01623] Avg episode rewards: #0: 32.265, true rewards: #0: 13.123 +[2023-02-24 11:07:05,437][01623] Avg episode reward: 32.265, avg true_objective: 13.123 +[2023-02-24 11:07:05,459][01623] Num frames 9200... +[2023-02-24 11:07:05,576][01623] Num frames 9300... +[2023-02-24 11:07:05,693][01623] Num frames 9400... +[2023-02-24 11:07:05,807][01623] Num frames 9500... +[2023-02-24 11:07:05,924][01623] Num frames 9600... +[2023-02-24 11:07:06,051][01623] Num frames 9700... +[2023-02-24 11:07:06,178][01623] Num frames 9800... +[2023-02-24 11:07:06,285][01623] Avg episode rewards: #0: 30.054, true rewards: #0: 12.304 +[2023-02-24 11:07:06,288][01623] Avg episode reward: 30.054, avg true_objective: 12.304 +[2023-02-24 11:07:06,361][01623] Num frames 9900... +[2023-02-24 11:07:06,483][01623] Num frames 10000... +[2023-02-24 11:07:06,603][01623] Num frames 10100... +[2023-02-24 11:07:06,728][01623] Num frames 10200... +[2023-02-24 11:07:06,885][01623] Avg episode rewards: #0: 27.323, true rewards: #0: 11.434 +[2023-02-24 11:07:06,887][01623] Avg episode reward: 27.323, avg true_objective: 11.434 +[2023-02-24 11:07:06,904][01623] Num frames 10300... +[2023-02-24 11:07:07,034][01623] Num frames 10400... +[2023-02-24 11:07:07,174][01623] Num frames 10500... +[2023-02-24 11:07:07,309][01623] Num frames 10600... +[2023-02-24 11:07:07,426][01623] Num frames 10700... +[2023-02-24 11:07:07,543][01623] Num frames 10800... +[2023-02-24 11:07:07,659][01623] Num frames 10900... +[2023-02-24 11:07:07,772][01623] Num frames 11000... +[2023-02-24 11:07:07,890][01623] Num frames 11100... +[2023-02-24 11:07:08,005][01623] Num frames 11200... +[2023-02-24 11:07:08,125][01623] Num frames 11300... +[2023-02-24 11:07:08,290][01623] Avg episode rewards: #0: 27.294, true rewards: #0: 11.394 +[2023-02-24 11:07:08,292][01623] Avg episode reward: 27.294, avg true_objective: 11.394 +[2023-02-24 11:08:20,362][01623] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-24 11:13:41,500][01623] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-24 11:13:41,504][01623] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-24 11:13:41,506][01623] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-24 11:13:41,511][01623] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-24 11:13:41,515][01623] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-24 11:13:41,516][01623] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-24 11:13:41,518][01623] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-24 11:13:41,519][01623] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-24 11:13:41,523][01623] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-24 11:13:41,524][01623] Adding new argument 'hf_repository'='dbaibak/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-24 11:13:41,525][01623] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-24 11:13:41,526][01623] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-24 11:13:41,528][01623] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-24 11:13:41,531][01623] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-24 11:13:41,532][01623] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-24 11:13:41,570][01623] RunningMeanStd input shape: (3, 72, 128) +[2023-02-24 11:13:41,572][01623] RunningMeanStd input shape: (1,) +[2023-02-24 11:13:41,593][01623] ConvEncoder: input_channels=3 +[2023-02-24 11:13:41,655][01623] Conv encoder output size: 512 +[2023-02-24 11:13:41,657][01623] Policy head output size: 512 +[2023-02-24 11:13:41,685][01623] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002443_10006528.pth... +[2023-02-24 11:13:42,350][01623] Num frames 100... +[2023-02-24 11:13:42,514][01623] Num frames 200... +[2023-02-24 11:13:42,675][01623] Num frames 300... +[2023-02-24 11:13:42,839][01623] Num frames 400... +[2023-02-24 11:13:42,962][01623] Avg episode rewards: #0: 9.410, true rewards: #0: 4.410 +[2023-02-24 11:13:42,965][01623] Avg episode reward: 9.410, avg true_objective: 4.410 +[2023-02-24 11:13:43,060][01623] Num frames 500... +[2023-02-24 11:13:43,193][01623] Num frames 600... +[2023-02-24 11:13:43,303][01623] Num frames 700... +[2023-02-24 11:13:43,419][01623] Num frames 800... +[2023-02-24 11:13:43,538][01623] Num frames 900... +[2023-02-24 11:13:43,661][01623] Num frames 1000... +[2023-02-24 11:13:43,775][01623] Num frames 1100... +[2023-02-24 11:13:43,895][01623] Num frames 1200... +[2023-02-24 11:13:43,961][01623] Avg episode rewards: #0: 13.045, true rewards: #0: 6.045 +[2023-02-24 11:13:43,963][01623] Avg episode reward: 13.045, avg true_objective: 6.045 +[2023-02-24 11:13:44,063][01623] Num frames 1300... +[2023-02-24 11:13:44,172][01623] Num frames 1400... +[2023-02-24 11:13:44,293][01623] Num frames 1500... +[2023-02-24 11:13:44,413][01623] Num frames 1600... +[2023-02-24 11:13:44,530][01623] Num frames 1700... +[2023-02-24 11:13:44,639][01623] Num frames 1800... +[2023-02-24 11:13:44,783][01623] Avg episode rewards: #0: 12.937, true rewards: #0: 6.270 +[2023-02-24 11:13:44,785][01623] Avg episode reward: 12.937, avg true_objective: 6.270 +[2023-02-24 11:13:44,812][01623] Num frames 1900... +[2023-02-24 11:13:44,925][01623] Num frames 2000... +[2023-02-24 11:13:45,038][01623] Num frames 2100... +[2023-02-24 11:13:45,154][01623] Num frames 2200... +[2023-02-24 11:13:45,271][01623] Num frames 2300... +[2023-02-24 11:13:45,387][01623] Num frames 2400... +[2023-02-24 11:13:45,498][01623] Num frames 2500... +[2023-02-24 11:13:45,614][01623] Num frames 2600... +[2023-02-24 11:13:45,737][01623] Num frames 2700... +[2023-02-24 11:13:45,850][01623] Num frames 2800... +[2023-02-24 11:13:45,980][01623] Num frames 2900... +[2023-02-24 11:13:46,106][01623] Num frames 3000... +[2023-02-24 11:13:46,222][01623] Num frames 3100... +[2023-02-24 11:13:46,351][01623] Num frames 3200... +[2023-02-24 11:13:46,480][01623] Num frames 3300... +[2023-02-24 11:13:46,570][01623] Avg episode rewards: #0: 20.577, true rewards: #0: 8.327 +[2023-02-24 11:13:46,571][01623] Avg episode reward: 20.577, avg true_objective: 8.327 +[2023-02-24 11:13:46,656][01623] Num frames 3400... +[2023-02-24 11:13:46,787][01623] Num frames 3500... +[2023-02-24 11:13:46,906][01623] Num frames 3600... +[2023-02-24 11:13:47,024][01623] Num frames 3700... +[2023-02-24 11:13:47,145][01623] Num frames 3800... +[2023-02-24 11:13:47,267][01623] Num frames 3900... +[2023-02-24 11:13:47,391][01623] Num frames 4000... +[2023-02-24 11:13:47,517][01623] Num frames 4100... +[2023-02-24 11:13:47,631][01623] Num frames 4200... +[2023-02-24 11:13:47,754][01623] Num frames 4300... +[2023-02-24 11:13:47,882][01623] Num frames 4400... +[2023-02-24 11:13:48,010][01623] Num frames 4500... +[2023-02-24 11:13:48,135][01623] Num frames 4600... +[2023-02-24 11:13:48,248][01623] Num frames 4700... +[2023-02-24 11:13:48,367][01623] Num frames 4800... +[2023-02-24 11:13:48,489][01623] Num frames 4900... +[2023-02-24 11:13:48,601][01623] Num frames 5000... +[2023-02-24 11:13:48,720][01623] Num frames 5100... +[2023-02-24 11:13:48,842][01623] Num frames 5200... +[2023-02-24 11:13:48,966][01623] Num frames 5300... +[2023-02-24 11:13:49,077][01623] Num frames 5400... +[2023-02-24 11:13:49,168][01623] Avg episode rewards: #0: 28.262, true rewards: #0: 10.862 +[2023-02-24 11:13:49,170][01623] Avg episode reward: 28.262, avg true_objective: 10.862 +[2023-02-24 11:13:49,270][01623] Num frames 5500... +[2023-02-24 11:13:49,399][01623] Num frames 5600... +[2023-02-24 11:13:49,519][01623] Num frames 5700... +[2023-02-24 11:13:49,630][01623] Num frames 5800... +[2023-02-24 11:13:49,748][01623] Num frames 5900... +[2023-02-24 11:13:49,866][01623] Num frames 6000... +[2023-02-24 11:13:49,982][01623] Num frames 6100... +[2023-02-24 11:13:50,110][01623] Num frames 6200... +[2023-02-24 11:13:50,225][01623] Num frames 6300... +[2023-02-24 11:13:50,424][01623] Num frames 6400... +[2023-02-24 11:13:50,588][01623] Num frames 6500... +[2023-02-24 11:13:50,667][01623] Avg episode rewards: #0: 27.198, true rewards: #0: 10.865 +[2023-02-24 11:13:50,669][01623] Avg episode reward: 27.198, avg true_objective: 10.865 +[2023-02-24 11:13:50,772][01623] Num frames 6600... +[2023-02-24 11:13:50,903][01623] Num frames 6700... +[2023-02-24 11:13:51,025][01623] Num frames 6800... +[2023-02-24 11:13:51,150][01623] Num frames 6900... +[2023-02-24 11:13:51,263][01623] Num frames 7000... +[2023-02-24 11:13:51,385][01623] Num frames 7100... +[2023-02-24 11:13:51,513][01623] Num frames 7200... +[2023-02-24 11:13:51,630][01623] Num frames 7300... +[2023-02-24 11:13:51,747][01623] Num frames 7400... +[2023-02-24 11:13:51,867][01623] Num frames 7500... +[2023-02-24 11:13:51,985][01623] Num frames 7600... +[2023-02-24 11:13:52,105][01623] Num frames 7700... +[2023-02-24 11:13:52,234][01623] Num frames 7800... +[2023-02-24 11:13:52,370][01623] Num frames 7900... +[2023-02-24 11:13:52,508][01623] Num frames 8000... +[2023-02-24 11:13:52,628][01623] Num frames 8100... +[2023-02-24 11:13:52,749][01623] Num frames 8200... +[2023-02-24 11:13:52,880][01623] Num frames 8300... +[2023-02-24 11:13:52,999][01623] Num frames 8400... +[2023-02-24 11:13:53,086][01623] Avg episode rewards: #0: 29.894, true rewards: #0: 12.037 +[2023-02-24 11:13:53,088][01623] Avg episode reward: 29.894, avg true_objective: 12.037 +[2023-02-24 11:13:53,214][01623] Num frames 8500... +[2023-02-24 11:13:53,405][01623] Num frames 8600... +[2023-02-24 11:13:53,576][01623] Num frames 8700... +[2023-02-24 11:13:53,740][01623] Num frames 8800... +[2023-02-24 11:13:53,916][01623] Avg episode rewards: #0: 26.842, true rewards: #0: 11.092 +[2023-02-24 11:13:53,918][01623] Avg episode reward: 26.842, avg true_objective: 11.092 +[2023-02-24 11:13:53,962][01623] Num frames 8900... +[2023-02-24 11:13:54,113][01623] Num frames 9000... +[2023-02-24 11:13:54,272][01623] Num frames 9100... +[2023-02-24 11:13:54,432][01623] Num frames 9200... +[2023-02-24 11:13:54,592][01623] Num frames 9300... +[2023-02-24 11:13:54,692][01623] Avg episode rewards: #0: 24.802, true rewards: #0: 10.358 +[2023-02-24 11:13:54,693][01623] Avg episode reward: 24.802, avg true_objective: 10.358 +[2023-02-24 11:13:54,820][01623] Num frames 9400... +[2023-02-24 11:13:54,991][01623] Num frames 9500... +[2023-02-24 11:13:55,153][01623] Num frames 9600... +[2023-02-24 11:13:55,316][01623] Num frames 9700... +[2023-02-24 11:13:55,490][01623] Num frames 9800... +[2023-02-24 11:13:55,659][01623] Num frames 9900... +[2023-02-24 11:13:55,827][01623] Num frames 10000... +[2023-02-24 11:13:55,991][01623] Num frames 10100... +[2023-02-24 11:13:56,174][01623] Num frames 10200... +[2023-02-24 11:13:56,359][01623] Num frames 10300... +[2023-02-24 11:13:56,542][01623] Num frames 10400... +[2023-02-24 11:13:56,714][01623] Num frames 10500... +[2023-02-24 11:13:56,810][01623] Avg episode rewards: #0: 25.324, true rewards: #0: 10.524 +[2023-02-24 11:13:56,813][01623] Avg episode reward: 25.324, avg true_objective: 10.524 +[2023-02-24 11:15:04,289][01623] Replay video saved to /content/train_dir/default_experiment/replay.mp4!