diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -971,3 +971,2933 @@ main_loop: 1135.5441 [2023-02-25 03:33:54,810][00684] Avg episode rewards: #0: 4.332, true rewards: #0: 4.032 [2023-02-25 03:33:54,812][00684] Avg episode reward: 4.332, avg true_objective: 4.032 [2023-02-25 03:34:14,342][00684] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-25 03:34:18,158][00684] The model has been pushed to https://huggingface.co/menoua/rl_course_vizdoom_health_gathering_supreme +[2023-02-25 03:35:47,993][00684] Environment doom_basic already registered, overwriting... +[2023-02-25 03:35:47,996][00684] Environment doom_two_colors_easy already registered, overwriting... +[2023-02-25 03:35:47,998][00684] Environment doom_two_colors_hard already registered, overwriting... +[2023-02-25 03:35:47,999][00684] Environment doom_dm already registered, overwriting... +[2023-02-25 03:35:48,001][00684] Environment doom_dwango5 already registered, overwriting... +[2023-02-25 03:35:48,003][00684] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2023-02-25 03:35:48,005][00684] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2023-02-25 03:35:48,009][00684] Environment doom_my_way_home already registered, overwriting... +[2023-02-25 03:35:48,010][00684] Environment doom_deadly_corridor already registered, overwriting... +[2023-02-25 03:35:48,011][00684] Environment doom_defend_the_center already registered, overwriting... +[2023-02-25 03:35:48,013][00684] Environment doom_defend_the_line already registered, overwriting... +[2023-02-25 03:35:48,017][00684] Environment doom_health_gathering already registered, overwriting... +[2023-02-25 03:35:48,019][00684] Environment doom_health_gathering_supreme already registered, overwriting... +[2023-02-25 03:35:48,020][00684] Environment doom_battle already registered, overwriting... +[2023-02-25 03:35:48,021][00684] Environment doom_battle2 already registered, overwriting... +[2023-02-25 03:35:48,024][00684] Environment doom_duel_bots already registered, overwriting... +[2023-02-25 03:35:48,025][00684] Environment doom_deathmatch_bots already registered, overwriting... +[2023-02-25 03:35:48,027][00684] Environment doom_duel already registered, overwriting... +[2023-02-25 03:35:48,029][00684] Environment doom_deathmatch_full already registered, overwriting... +[2023-02-25 03:35:48,030][00684] Environment doom_benchmark already registered, overwriting... +[2023-02-25 03:35:48,035][00684] register_encoder_factory: +[2023-02-25 03:35:48,067][00684] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 03:35:48,075][00684] Overriding arg 'num_envs_per_worker' with value 8 passed from command line +[2023-02-25 03:35:48,076][00684] Overriding arg 'train_for_env_steps' with value 20000000 passed from command line +[2023-02-25 03:35:48,083][00684] Experiment dir /content/train_dir/default_experiment already exists! +[2023-02-25 03:35:48,085][00684] Resuming existing experiment from /content/train_dir/default_experiment... +[2023-02-25 03:35:48,087][00684] Weights and Biases integration disabled +[2023-02-25 03:35:48,093][00684] Environment var CUDA_VISIBLE_DEVICES is 0 + +[2023-02-25 03:35:49,619][00684] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=gpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=8 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=20000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2023-02-25 03:35:49,622][00684] Saving configuration to /content/train_dir/default_experiment/config.json... +[2023-02-25 03:35:49,626][00684] Rollout worker 0 uses device cpu +[2023-02-25 03:35:49,628][00684] Rollout worker 1 uses device cpu +[2023-02-25 03:35:49,631][00684] Rollout worker 2 uses device cpu +[2023-02-25 03:35:49,637][00684] Rollout worker 3 uses device cpu +[2023-02-25 03:35:49,640][00684] Rollout worker 4 uses device cpu +[2023-02-25 03:35:49,641][00684] Rollout worker 5 uses device cpu +[2023-02-25 03:35:49,642][00684] Rollout worker 6 uses device cpu +[2023-02-25 03:35:49,644][00684] Rollout worker 7 uses device cpu +[2023-02-25 03:35:49,853][00684] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 03:35:49,856][00684] InferenceWorker_p0-w0: min num requests: 2 +[2023-02-25 03:35:49,891][00684] Starting all processes... +[2023-02-25 03:35:49,893][00684] Starting process learner_proc0 +[2023-02-25 03:35:50,019][00684] Starting all processes... +[2023-02-25 03:35:50,026][00684] Starting process inference_proc0-0 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc0 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc1 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc2 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc3 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc4 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc5 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc6 +[2023-02-25 03:35:50,029][00684] Starting process rollout_proc7 +[2023-02-25 03:35:58,906][19675] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 03:35:58,909][19675] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0 +[2023-02-25 03:35:58,983][19675] Num visible devices: 1 +[2023-02-25 03:35:59,025][19675] Starting seed is not provided +[2023-02-25 03:35:59,026][19675] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 03:35:59,027][19675] Initializing actor-critic model on device cuda:0 +[2023-02-25 03:35:59,028][19675] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 03:35:59,029][19675] RunningMeanStd input shape: (1,) +[2023-02-25 03:35:59,191][19675] ConvEncoder: input_channels=3 +[2023-02-25 03:36:00,444][19675] Conv encoder output size: 512 +[2023-02-25 03:36:00,446][19675] Policy head output size: 512 +[2023-02-25 03:36:00,662][19675] Created Actor Critic model with architecture: +[2023-02-25 03:36:00,663][19675] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2023-02-25 03:36:01,228][19689] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 03:36:01,230][19689] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0 +[2023-02-25 03:36:01,305][19689] Num visible devices: 1 +[2023-02-25 03:36:01,437][19690] Worker 1 uses CPU cores [1] +[2023-02-25 03:36:01,920][19691] Worker 0 uses CPU cores [0] +[2023-02-25 03:36:01,943][19693] Worker 3 uses CPU cores [1] +[2023-02-25 03:36:01,967][19696] Worker 2 uses CPU cores [0] +[2023-02-25 03:36:02,282][19704] Worker 5 uses CPU cores [1] +[2023-02-25 03:36:02,307][19698] Worker 4 uses CPU cores [0] +[2023-02-25 03:36:02,363][19706] Worker 7 uses CPU cores [1] +[2023-02-25 03:36:02,447][19708] Worker 6 uses CPU cores [0] +[2023-02-25 03:36:04,435][19675] Using optimizer +[2023-02-25 03:36:04,436][19675] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth... +[2023-02-25 03:36:04,470][19675] Loading model from checkpoint +[2023-02-25 03:36:04,475][19675] Loaded experiment state at self.train_step=978, self.env_steps=4005888 +[2023-02-25 03:36:04,476][19675] Initialized policy 0 weights for model version 978 +[2023-02-25 03:36:04,479][19675] LearnerWorker_p0 finished initialization! +[2023-02-25 03:36:04,481][19675] Using GPUs [0] for process 0 (actually maps to GPUs [0]) +[2023-02-25 03:36:04,685][19689] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 03:36:04,686][19689] RunningMeanStd input shape: (1,) +[2023-02-25 03:36:04,699][19689] ConvEncoder: input_channels=3 +[2023-02-25 03:36:04,796][19689] Conv encoder output size: 512 +[2023-02-25 03:36:04,797][19689] Policy head output size: 512 +[2023-02-25 03:36:07,194][00684] Inference worker 0-0 is ready! +[2023-02-25 03:36:07,196][00684] All inference workers are ready! Signal rollout workers to start! +[2023-02-25 03:36:07,337][19704] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,336][19706] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,350][19693] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,352][19690] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,363][19708] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,360][19698] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,365][19691] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:07,361][19696] Doom resolution: 160x120, resize resolution: (128, 72) +[2023-02-25 03:36:08,083][19696] Decorrelating experience for 0 frames... +[2023-02-25 03:36:08,093][00684] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 4005888. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:08,784][19696] Decorrelating experience for 32 frames... +[2023-02-25 03:36:08,883][19691] Decorrelating experience for 0 frames... +[2023-02-25 03:36:09,071][19690] Decorrelating experience for 0 frames... +[2023-02-25 03:36:09,069][19704] Decorrelating experience for 0 frames... +[2023-02-25 03:36:09,074][19706] Decorrelating experience for 0 frames... +[2023-02-25 03:36:09,098][19693] Decorrelating experience for 0 frames... +[2023-02-25 03:36:09,753][19696] Decorrelating experience for 64 frames... +[2023-02-25 03:36:09,791][19704] Decorrelating experience for 32 frames... +[2023-02-25 03:36:09,793][19690] Decorrelating experience for 32 frames... +[2023-02-25 03:36:09,845][00684] Heartbeat connected on Batcher_0 +[2023-02-25 03:36:09,848][00684] Heartbeat connected on LearnerWorker_p0 +[2023-02-25 03:36:09,904][00684] Heartbeat connected on InferenceWorker_p0-w0 +[2023-02-25 03:36:09,916][19691] Decorrelating experience for 32 frames... +[2023-02-25 03:36:10,227][19698] Decorrelating experience for 0 frames... +[2023-02-25 03:36:10,759][19706] Decorrelating experience for 32 frames... +[2023-02-25 03:36:10,834][19704] Decorrelating experience for 64 frames... +[2023-02-25 03:36:10,926][19708] Decorrelating experience for 0 frames... +[2023-02-25 03:36:11,084][19691] Decorrelating experience for 64 frames... +[2023-02-25 03:36:11,383][19698] Decorrelating experience for 32 frames... +[2023-02-25 03:36:11,524][19693] Decorrelating experience for 32 frames... +[2023-02-25 03:36:11,925][19708] Decorrelating experience for 32 frames... +[2023-02-25 03:36:12,009][19696] Decorrelating experience for 96 frames... +[2023-02-25 03:36:12,368][19706] Decorrelating experience for 64 frames... +[2023-02-25 03:36:12,381][19690] Decorrelating experience for 64 frames... +[2023-02-25 03:36:12,606][19704] Decorrelating experience for 96 frames... +[2023-02-25 03:36:13,093][00684] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:13,469][19691] Decorrelating experience for 96 frames... +[2023-02-25 03:36:13,767][19698] Decorrelating experience for 64 frames... +[2023-02-25 03:36:13,923][19696] Decorrelating experience for 128 frames... +[2023-02-25 03:36:14,668][19706] Decorrelating experience for 96 frames... +[2023-02-25 03:36:14,670][19690] Decorrelating experience for 96 frames... +[2023-02-25 03:36:15,203][19698] Decorrelating experience for 96 frames... +[2023-02-25 03:36:15,350][19704] Decorrelating experience for 128 frames... +[2023-02-25 03:36:15,554][19696] Decorrelating experience for 160 frames... +[2023-02-25 03:36:15,735][19693] Decorrelating experience for 64 frames... +[2023-02-25 03:36:16,717][19690] Decorrelating experience for 128 frames... +[2023-02-25 03:36:16,725][19706] Decorrelating experience for 128 frames... +[2023-02-25 03:36:17,396][19693] Decorrelating experience for 96 frames... +[2023-02-25 03:36:17,408][19708] Decorrelating experience for 64 frames... +[2023-02-25 03:36:17,582][19696] Decorrelating experience for 192 frames... +[2023-02-25 03:36:18,093][00684] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:18,584][19704] Decorrelating experience for 160 frames... +[2023-02-25 03:36:18,723][19691] Decorrelating experience for 128 frames... +[2023-02-25 03:36:18,910][19706] Decorrelating experience for 160 frames... +[2023-02-25 03:36:19,267][19696] Decorrelating experience for 224 frames... +[2023-02-25 03:36:19,637][19693] Decorrelating experience for 128 frames... +[2023-02-25 03:36:19,918][00684] Heartbeat connected on RolloutWorker_w2 +[2023-02-25 03:36:20,620][19698] Decorrelating experience for 128 frames... +[2023-02-25 03:36:21,006][19691] Decorrelating experience for 160 frames... +[2023-02-25 03:36:21,963][19706] Decorrelating experience for 192 frames... +[2023-02-25 03:36:23,020][19704] Decorrelating experience for 192 frames... +[2023-02-25 03:36:23,094][00684] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 0.5. Samples: 8. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:23,260][19693] Decorrelating experience for 160 frames... +[2023-02-25 03:36:23,791][19690] Decorrelating experience for 160 frames... +[2023-02-25 03:36:24,393][19698] Decorrelating experience for 160 frames... +[2023-02-25 03:36:24,904][19691] Decorrelating experience for 192 frames... +[2023-02-25 03:36:25,147][19706] Decorrelating experience for 224 frames... +[2023-02-25 03:36:25,996][00684] Heartbeat connected on RolloutWorker_w7 +[2023-02-25 03:36:26,010][19708] Decorrelating experience for 96 frames... +[2023-02-25 03:36:26,031][19704] Decorrelating experience for 224 frames... +[2023-02-25 03:36:26,702][00684] Heartbeat connected on RolloutWorker_w5 +[2023-02-25 03:36:26,782][19690] Decorrelating experience for 192 frames... +[2023-02-25 03:36:27,407][19698] Decorrelating experience for 192 frames... +[2023-02-25 03:36:28,093][00684] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 21.6. Samples: 432. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:28,100][00684] Avg episode reward: [(0, '3.554')] +[2023-02-25 03:36:31,471][19675] Signal inference workers to stop experience collection... +[2023-02-25 03:36:31,496][19689] InferenceWorker_p0-w0: stopping experience collection +[2023-02-25 03:36:31,608][19693] Decorrelating experience for 192 frames... +[2023-02-25 03:36:31,609][19708] Decorrelating experience for 128 frames... +[2023-02-25 03:36:31,694][19690] Decorrelating experience for 224 frames... +[2023-02-25 03:36:32,427][00684] Heartbeat connected on RolloutWorker_w1 +[2023-02-25 03:36:32,512][19698] Decorrelating experience for 224 frames... +[2023-02-25 03:36:32,906][00684] Heartbeat connected on RolloutWorker_w4 +[2023-02-25 03:36:33,093][00684] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 4005888. Throughput: 0: 90.6. Samples: 2264. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2023-02-25 03:36:33,098][00684] Avg episode reward: [(0, '4.313')] +[2023-02-25 03:36:33,320][19691] Decorrelating experience for 224 frames... +[2023-02-25 03:36:33,447][19708] Decorrelating experience for 160 frames... +[2023-02-25 03:36:34,013][19693] Decorrelating experience for 224 frames... +[2023-02-25 03:36:34,138][00684] Heartbeat connected on RolloutWorker_w0 +[2023-02-25 03:36:34,317][00684] Heartbeat connected on RolloutWorker_w3 +[2023-02-25 03:36:34,476][19675] Signal inference workers to resume experience collection... +[2023-02-25 03:36:34,479][19689] InferenceWorker_p0-w0: resuming experience collection +[2023-02-25 03:36:36,785][19708] Decorrelating experience for 192 frames... +[2023-02-25 03:36:38,093][00684] Fps is (10 sec: 409.6, 60 sec: 136.5, 300 sec: 136.5). Total num frames: 4009984. Throughput: 0: 119.7. Samples: 3592. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2023-02-25 03:36:38,102][00684] Avg episode reward: [(0, '3.685')] +[2023-02-25 03:36:41,337][19708] Decorrelating experience for 224 frames... +[2023-02-25 03:36:42,916][00684] Heartbeat connected on RolloutWorker_w6 +[2023-02-25 03:36:43,093][00684] Fps is (10 sec: 2457.6, 60 sec: 702.2, 300 sec: 702.2). Total num frames: 4030464. Throughput: 0: 158.3. Samples: 5540. Policy #0 lag: (min: 0.0, avg: 0.7, max: 3.0) +[2023-02-25 03:36:43,097][00684] Avg episode reward: [(0, '3.998')] +[2023-02-25 03:36:48,001][19689] Updated weights for policy 0, policy_version 988 (0.0025) +[2023-02-25 03:36:48,093][00684] Fps is (10 sec: 3686.3, 60 sec: 1024.0, 300 sec: 1024.0). Total num frames: 4046848. Throughput: 0: 257.2. Samples: 10288. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:36:48,096][00684] Avg episode reward: [(0, '4.216')] +[2023-02-25 03:36:53,094][00684] Fps is (10 sec: 4095.7, 60 sec: 1456.3, 300 sec: 1456.3). Total num frames: 4071424. Throughput: 0: 379.8. Samples: 17092. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 03:36:53,099][00684] Avg episode reward: [(0, '4.289')] +[2023-02-25 03:36:56,400][19689] Updated weights for policy 0, policy_version 998 (0.0012) +[2023-02-25 03:36:58,093][00684] Fps is (10 sec: 4915.3, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 4096000. Throughput: 0: 459.6. Samples: 20680. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:36:58,098][00684] Avg episode reward: [(0, '4.399')] +[2023-02-25 03:37:03,093][00684] Fps is (10 sec: 4096.3, 60 sec: 1936.3, 300 sec: 1936.3). Total num frames: 4112384. Throughput: 0: 590.9. Samples: 26592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:37:03,100][00684] Avg episode reward: [(0, '4.432')] +[2023-02-25 03:37:07,966][19689] Updated weights for policy 0, policy_version 1008 (0.0012) +[2023-02-25 03:37:08,093][00684] Fps is (10 sec: 3276.8, 60 sec: 2048.0, 300 sec: 2048.0). Total num frames: 4128768. Throughput: 0: 700.5. Samples: 31532. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:37:08,098][00684] Avg episode reward: [(0, '4.532')] +[2023-02-25 03:37:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 2321.1, 300 sec: 2142.5). Total num frames: 4145152. Throughput: 0: 743.5. Samples: 33888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:37:13,105][00684] Avg episode reward: [(0, '4.857')] +[2023-02-25 03:37:17,586][19689] Updated weights for policy 0, policy_version 1018 (0.0020) +[2023-02-25 03:37:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 2730.7, 300 sec: 2340.6). Total num frames: 4169728. Throughput: 0: 850.9. Samples: 40556. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:37:18,100][00684] Avg episode reward: [(0, '4.715')] +[2023-02-25 03:37:23,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3140.3, 300 sec: 2512.2). Total num frames: 4194304. Throughput: 0: 984.9. Samples: 47912. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:37:23,097][00684] Avg episode reward: [(0, '4.624')] +[2023-02-25 03:37:27,856][19689] Updated weights for policy 0, policy_version 1028 (0.0012) +[2023-02-25 03:37:28,095][00684] Fps is (10 sec: 4095.4, 60 sec: 3413.2, 300 sec: 2560.0). Total num frames: 4210688. Throughput: 0: 997.0. Samples: 50408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:37:28,098][00684] Avg episode reward: [(0, '4.670')] +[2023-02-25 03:37:33,100][00684] Fps is (10 sec: 3274.5, 60 sec: 3686.0, 300 sec: 2601.9). Total num frames: 4227072. Throughput: 0: 998.4. Samples: 55224. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:37:33,108][00684] Avg episode reward: [(0, '4.544')] +[2023-02-25 03:37:38,095][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.4, 300 sec: 2685.1). Total num frames: 4247552. Throughput: 0: 968.6. Samples: 60680. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:37:38,098][00684] Avg episode reward: [(0, '4.563')] +[2023-02-25 03:37:38,507][19689] Updated weights for policy 0, policy_version 1038 (0.0012) +[2023-02-25 03:37:43,098][00684] Fps is (10 sec: 4506.6, 60 sec: 4027.4, 300 sec: 2802.4). Total num frames: 4272128. Throughput: 0: 969.8. Samples: 64324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:37:43,100][00684] Avg episode reward: [(0, '4.547')] +[2023-02-25 03:37:47,109][19689] Updated weights for policy 0, policy_version 1048 (0.0013) +[2023-02-25 03:37:48,098][00684] Fps is (10 sec: 4504.1, 60 sec: 4095.7, 300 sec: 2867.1). Total num frames: 4292608. Throughput: 0: 1002.4. Samples: 71704. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:37:48,102][00684] Avg episode reward: [(0, '4.731')] +[2023-02-25 03:37:48,116][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001048_4292608.pth... +[2023-02-25 03:37:48,328][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000921_3772416.pth +[2023-02-25 03:37:53,098][00684] Fps is (10 sec: 3686.3, 60 sec: 3959.2, 300 sec: 2886.6). Total num frames: 4308992. Throughput: 0: 1003.0. Samples: 76672. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:37:53,102][00684] Avg episode reward: [(0, '4.800')] +[2023-02-25 03:37:58,095][00684] Fps is (10 sec: 3277.9, 60 sec: 3822.8, 300 sec: 2904.4). Total num frames: 4325376. Throughput: 0: 1006.5. Samples: 79180. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:37:58,104][00684] Avg episode reward: [(0, '4.663')] +[2023-02-25 03:37:59,293][19689] Updated weights for policy 0, policy_version 1058 (0.0018) +[2023-02-25 03:38:03,093][00684] Fps is (10 sec: 4098.1, 60 sec: 3959.5, 300 sec: 2991.9). Total num frames: 4349952. Throughput: 0: 977.8. Samples: 84556. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:38:03,096][00684] Avg episode reward: [(0, '4.604')] +[2023-02-25 03:38:08,093][00684] Fps is (10 sec: 4506.4, 60 sec: 4027.7, 300 sec: 3037.9). Total num frames: 4370432. Throughput: 0: 966.5. Samples: 91404. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:38:08,099][00684] Avg episode reward: [(0, '4.655')] +[2023-02-25 03:38:08,690][19689] Updated weights for policy 0, policy_version 1068 (0.0012) +[2023-02-25 03:38:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3080.2). Total num frames: 4390912. Throughput: 0: 992.7. Samples: 95080. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:38:13,096][00684] Avg episode reward: [(0, '4.560')] +[2023-02-25 03:38:18,093][00684] Fps is (10 sec: 3686.3, 60 sec: 3959.5, 300 sec: 3087.7). Total num frames: 4407296. Throughput: 0: 997.0. Samples: 100084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:38:18,099][00684] Avg episode reward: [(0, '4.599')] +[2023-02-25 03:38:20,406][19689] Updated weights for policy 0, policy_version 1078 (0.0012) +[2023-02-25 03:38:23,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3094.8). Total num frames: 4423680. Throughput: 0: 986.2. Samples: 105056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:38:23,096][00684] Avg episode reward: [(0, '4.916')] +[2023-02-25 03:38:28,098][00684] Fps is (10 sec: 3684.5, 60 sec: 3891.0, 300 sec: 3130.4). Total num frames: 4444160. Throughput: 0: 964.0. Samples: 107704. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:38:28,104][00684] Avg episode reward: [(0, '4.957')] +[2023-02-25 03:38:30,177][19689] Updated weights for policy 0, policy_version 1088 (0.0012) +[2023-02-25 03:38:33,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4028.2, 300 sec: 3192.1). Total num frames: 4468736. Throughput: 0: 964.6. Samples: 115104. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:38:33,096][00684] Avg episode reward: [(0, '4.628')] +[2023-02-25 03:38:38,095][00684] Fps is (10 sec: 4507.2, 60 sec: 4027.7, 300 sec: 3222.1). Total num frames: 4489216. Throughput: 0: 998.1. Samples: 121584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:38:38,098][00684] Avg episode reward: [(0, '4.703')] +[2023-02-25 03:38:40,977][19689] Updated weights for policy 0, policy_version 1098 (0.0012) +[2023-02-25 03:38:43,097][00684] Fps is (10 sec: 3275.6, 60 sec: 3823.0, 300 sec: 3197.4). Total num frames: 4501504. Throughput: 0: 985.3. Samples: 123520. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:38:43,102][00684] Avg episode reward: [(0, '4.748')] +[2023-02-25 03:38:48,095][00684] Fps is (10 sec: 2867.2, 60 sec: 3754.9, 300 sec: 3200.0). Total num frames: 4517888. Throughput: 0: 954.3. Samples: 127500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:38:48,099][00684] Avg episode reward: [(0, '4.656')] +[2023-02-25 03:38:53,095][00684] Fps is (10 sec: 2458.0, 60 sec: 3618.3, 300 sec: 3152.6). Total num frames: 4526080. Throughput: 0: 886.4. Samples: 131296. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:38:53,098][00684] Avg episode reward: [(0, '4.620')] +[2023-02-25 03:38:56,326][19689] Updated weights for policy 0, policy_version 1108 (0.0013) +[2023-02-25 03:38:58,093][00684] Fps is (10 sec: 2458.0, 60 sec: 3618.2, 300 sec: 3156.3). Total num frames: 4542464. Throughput: 0: 848.5. Samples: 133264. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:38:58,096][00684] Avg episode reward: [(0, '4.648')] +[2023-02-25 03:39:03,093][00684] Fps is (10 sec: 4096.8, 60 sec: 3618.1, 300 sec: 3206.6). Total num frames: 4567040. Throughput: 0: 886.3. Samples: 139968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:39:03,096][00684] Avg episode reward: [(0, '4.861')] +[2023-02-25 03:39:04,913][19689] Updated weights for policy 0, policy_version 1118 (0.0012) +[2023-02-25 03:39:08,095][00684] Fps is (10 sec: 4504.8, 60 sec: 3618.0, 300 sec: 3231.3). Total num frames: 4587520. Throughput: 0: 919.9. Samples: 146452. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:39:08,104][00684] Avg episode reward: [(0, '4.646')] +[2023-02-25 03:39:13,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3232.5). Total num frames: 4603904. Throughput: 0: 918.0. Samples: 149008. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:39:13,100][00684] Avg episode reward: [(0, '4.549')] +[2023-02-25 03:39:16,621][19689] Updated weights for policy 0, policy_version 1128 (0.0012) +[2023-02-25 03:39:18,104][00684] Fps is (10 sec: 3273.9, 60 sec: 3549.2, 300 sec: 3233.5). Total num frames: 4620288. Throughput: 0: 863.1. Samples: 153952. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:39:18,107][00684] Avg episode reward: [(0, '4.713')] +[2023-02-25 03:39:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3276.8). Total num frames: 4644864. Throughput: 0: 851.3. Samples: 159892. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:39:23,098][00684] Avg episode reward: [(0, '4.616')] +[2023-02-25 03:39:26,267][19689] Updated weights for policy 0, policy_version 1138 (0.0016) +[2023-02-25 03:39:28,093][00684] Fps is (10 sec: 4920.5, 60 sec: 3755.0, 300 sec: 3317.8). Total num frames: 4669440. Throughput: 0: 890.9. Samples: 163608. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:39:28,099][00684] Avg episode reward: [(0, '4.470')] +[2023-02-25 03:39:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3316.8). Total num frames: 4685824. Throughput: 0: 952.8. Samples: 170376. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:39:33,099][00684] Avg episode reward: [(0, '4.522')] +[2023-02-25 03:39:36,732][19689] Updated weights for policy 0, policy_version 1148 (0.0024) +[2023-02-25 03:39:38,093][00684] Fps is (10 sec: 3686.3, 60 sec: 3618.2, 300 sec: 3335.3). Total num frames: 4706304. Throughput: 0: 981.4. Samples: 175456. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:39:38,102][00684] Avg episode reward: [(0, '4.645')] +[2023-02-25 03:39:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3334.0). Total num frames: 4722688. Throughput: 0: 992.9. Samples: 177944. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 03:39:43,095][00684] Avg episode reward: [(0, '4.589')] +[2023-02-25 03:39:47,469][19689] Updated weights for policy 0, policy_version 1158 (0.0012) +[2023-02-25 03:39:48,094][00684] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 3351.3). Total num frames: 4743168. Throughput: 0: 975.7. Samples: 183876. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:39:48,096][00684] Avg episode reward: [(0, '4.494')] +[2023-02-25 03:39:48,116][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001159_4747264.pth... +[2023-02-25 03:39:48,311][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth +[2023-02-25 03:39:53,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4027.9, 300 sec: 3386.0). Total num frames: 4767744. Throughput: 0: 996.1. Samples: 191276. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:39:53,101][00684] Avg episode reward: [(0, '4.352')] +[2023-02-25 03:39:56,542][19689] Updated weights for policy 0, policy_version 1168 (0.0012) +[2023-02-25 03:39:58,095][00684] Fps is (10 sec: 4505.0, 60 sec: 4095.9, 300 sec: 3401.4). Total num frames: 4788224. Throughput: 0: 1012.4. Samples: 194568. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:39:58,103][00684] Avg episode reward: [(0, '4.561')] +[2023-02-25 03:40:03,094][00684] Fps is (10 sec: 3686.2, 60 sec: 3959.4, 300 sec: 3398.8). Total num frames: 4804608. Throughput: 0: 1010.4. Samples: 199408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:40:03,098][00684] Avg episode reward: [(0, '4.620')] +[2023-02-25 03:40:08,093][00684] Fps is (10 sec: 3277.4, 60 sec: 3891.3, 300 sec: 3396.3). Total num frames: 4820992. Throughput: 0: 987.7. Samples: 204340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:40:08,097][00684] Avg episode reward: [(0, '4.607')] +[2023-02-25 03:40:08,891][19689] Updated weights for policy 0, policy_version 1178 (0.0014) +[2023-02-25 03:40:13,093][00684] Fps is (10 sec: 3686.6, 60 sec: 3959.5, 300 sec: 3410.5). Total num frames: 4841472. Throughput: 0: 975.8. Samples: 207520. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:40:13,096][00684] Avg episode reward: [(0, '4.666')] +[2023-02-25 03:40:17,188][19689] Updated weights for policy 0, policy_version 1188 (0.0021) +[2023-02-25 03:40:18,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4165.0, 300 sec: 3457.0). Total num frames: 4870144. Throughput: 0: 987.6. Samples: 214816. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:40:18,096][00684] Avg episode reward: [(0, '4.615')] +[2023-02-25 03:40:23,095][00684] Fps is (10 sec: 4504.8, 60 sec: 4027.6, 300 sec: 3453.5). Total num frames: 4886528. Throughput: 0: 1004.5. Samples: 220660. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:40:23,098][00684] Avg episode reward: [(0, '4.579')] +[2023-02-25 03:40:28,098][00684] Fps is (10 sec: 3275.2, 60 sec: 3890.9, 300 sec: 3450.0). Total num frames: 4902912. Throughput: 0: 1005.0. Samples: 223172. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:40:28,100][00684] Avg episode reward: [(0, '4.620')] +[2023-02-25 03:40:29,331][19689] Updated weights for policy 0, policy_version 1198 (0.0012) +[2023-02-25 03:40:33,093][00684] Fps is (10 sec: 3277.3, 60 sec: 3891.2, 300 sec: 3446.8). Total num frames: 4919296. Throughput: 0: 982.1. Samples: 228072. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:40:33,099][00684] Avg episode reward: [(0, '4.528')] +[2023-02-25 03:40:38,093][00684] Fps is (10 sec: 4097.9, 60 sec: 3959.5, 300 sec: 3474.0). Total num frames: 4943872. Throughput: 0: 967.5. Samples: 234812. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:40:38,099][00684] Avg episode reward: [(0, '4.544')] +[2023-02-25 03:40:38,646][19689] Updated weights for policy 0, policy_version 1208 (0.0013) +[2023-02-25 03:40:43,096][00684] Fps is (10 sec: 4914.0, 60 sec: 4095.8, 300 sec: 3500.2). Total num frames: 4968448. Throughput: 0: 975.1. Samples: 238448. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:40:43,100][00684] Avg episode reward: [(0, '4.620')] +[2023-02-25 03:40:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3496.2). Total num frames: 4984832. Throughput: 0: 996.4. Samples: 244244. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:40:48,095][00684] Avg episode reward: [(0, '4.755')] +[2023-02-25 03:40:49,103][19689] Updated weights for policy 0, policy_version 1218 (0.0011) +[2023-02-25 03:40:53,095][00684] Fps is (10 sec: 2867.5, 60 sec: 3822.8, 300 sec: 3478.0). Total num frames: 4997120. Throughput: 0: 984.3. Samples: 248636. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:40:53,097][00684] Avg episode reward: [(0, '4.765')] +[2023-02-25 03:40:58,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3686.5, 300 sec: 3460.4). Total num frames: 5009408. Throughput: 0: 956.1. Samples: 250544. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:40:58,095][00684] Avg episode reward: [(0, '4.811')] +[2023-02-25 03:41:03,093][00684] Fps is (10 sec: 2867.7, 60 sec: 3686.4, 300 sec: 3457.3). Total num frames: 5025792. Throughput: 0: 886.4. Samples: 254704. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:41:03,098][00684] Avg episode reward: [(0, '4.732')] +[2023-02-25 03:41:04,441][19689] Updated weights for policy 0, policy_version 1228 (0.0012) +[2023-02-25 03:41:08,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3512.8). Total num frames: 5042176. Throughput: 0: 863.4. Samples: 259512. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:41:08,095][00684] Avg episode reward: [(0, '4.597')] +[2023-02-25 03:41:13,094][00684] Fps is (10 sec: 3276.4, 60 sec: 3618.1, 300 sec: 3568.4). Total num frames: 5058560. Throughput: 0: 880.6. Samples: 262796. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:41:13,100][00684] Avg episode reward: [(0, '4.705')] +[2023-02-25 03:41:15,541][19689] Updated weights for policy 0, policy_version 1238 (0.0012) +[2023-02-25 03:41:18,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3481.6, 300 sec: 3637.8). Total num frames: 5079040. Throughput: 0: 877.5. Samples: 267560. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:41:18,100][00684] Avg episode reward: [(0, '4.698')] +[2023-02-25 03:41:23,094][00684] Fps is (10 sec: 3686.6, 60 sec: 3481.7, 300 sec: 3693.3). Total num frames: 5095424. Throughput: 0: 836.8. Samples: 272468. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:41:23,100][00684] Avg episode reward: [(0, '4.738')] +[2023-02-25 03:41:26,865][19689] Updated weights for policy 0, policy_version 1248 (0.0014) +[2023-02-25 03:41:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3550.1, 300 sec: 3762.8). Total num frames: 5115904. Throughput: 0: 819.6. Samples: 275328. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:41:28,095][00684] Avg episode reward: [(0, '4.833')] +[2023-02-25 03:41:33,093][00684] Fps is (10 sec: 4505.9, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 5140480. Throughput: 0: 850.8. Samples: 282528. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:41:33,098][00684] Avg episode reward: [(0, '4.522')] +[2023-02-25 03:41:36,575][19689] Updated weights for policy 0, policy_version 1258 (0.0013) +[2023-02-25 03:41:38,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3549.9, 300 sec: 3818.3). Total num frames: 5156864. Throughput: 0: 884.2. Samples: 288424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:41:38,098][00684] Avg episode reward: [(0, '4.542')] +[2023-02-25 03:41:43,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3413.5, 300 sec: 3818.3). Total num frames: 5173248. Throughput: 0: 895.8. Samples: 290856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:41:43,103][00684] Avg episode reward: [(0, '4.547')] +[2023-02-25 03:41:48,097][00684] Fps is (10 sec: 3276.8, 60 sec: 3413.3, 300 sec: 3790.5). Total num frames: 5189632. Throughput: 0: 910.7. Samples: 295684. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:41:48,101][00684] Avg episode reward: [(0, '4.576')] +[2023-02-25 03:41:48,115][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001267_5189632.pth... +[2023-02-25 03:41:48,397][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001048_4292608.pth +[2023-02-25 03:41:48,970][19689] Updated weights for policy 0, policy_version 1268 (0.0033) +[2023-02-25 03:41:53,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3550.0, 300 sec: 3776.7). Total num frames: 5210112. Throughput: 0: 942.0. Samples: 301904. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:41:53,097][00684] Avg episode reward: [(0, '4.511')] +[2023-02-25 03:41:57,345][19689] Updated weights for policy 0, policy_version 1278 (0.0026) +[2023-02-25 03:41:58,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 5234688. Throughput: 0: 948.3. Samples: 305468. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:41:58,106][00684] Avg episode reward: [(0, '4.460')] +[2023-02-25 03:42:03,097][00684] Fps is (10 sec: 4503.8, 60 sec: 3822.7, 300 sec: 3818.3). Total num frames: 5255168. Throughput: 0: 976.4. Samples: 311500. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:42:03,100][00684] Avg episode reward: [(0, '4.641')] +[2023-02-25 03:42:08,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3804.4). Total num frames: 5267456. Throughput: 0: 975.3. Samples: 316356. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:42:08,099][00684] Avg episode reward: [(0, '4.758')] +[2023-02-25 03:42:09,938][19689] Updated weights for policy 0, policy_version 1288 (0.0020) +[2023-02-25 03:42:13,093][00684] Fps is (10 sec: 2868.4, 60 sec: 3754.7, 300 sec: 3776.7). Total num frames: 5283840. Throughput: 0: 963.5. Samples: 318684. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:42:13,096][00684] Avg episode reward: [(0, '4.865')] +[2023-02-25 03:42:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 5308416. Throughput: 0: 937.6. Samples: 324720. Policy #0 lag: (min: 0.0, avg: 0.6, max: 3.0) +[2023-02-25 03:42:18,096][00684] Avg episode reward: [(0, '4.671')] +[2023-02-25 03:42:19,418][19689] Updated weights for policy 0, policy_version 1298 (0.0014) +[2023-02-25 03:42:23,098][00684] Fps is (10 sec: 4912.9, 60 sec: 3959.2, 300 sec: 3804.4). Total num frames: 5332992. Throughput: 0: 965.4. Samples: 331872. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:42:23,105][00684] Avg episode reward: [(0, '4.558')] +[2023-02-25 03:42:28,095][00684] Fps is (10 sec: 3685.8, 60 sec: 3822.8, 300 sec: 3790.6). Total num frames: 5345280. Throughput: 0: 971.8. Samples: 334588. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:42:28,097][00684] Avg episode reward: [(0, '4.726')] +[2023-02-25 03:42:30,699][19689] Updated weights for policy 0, policy_version 1308 (0.0014) +[2023-02-25 03:42:33,093][00684] Fps is (10 sec: 2868.6, 60 sec: 3686.4, 300 sec: 3776.7). Total num frames: 5361664. Throughput: 0: 971.7. Samples: 339412. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:42:33,100][00684] Avg episode reward: [(0, '4.802')] +[2023-02-25 03:42:38,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 5382144. Throughput: 0: 941.8. Samples: 344284. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:42:38,097][00684] Avg episode reward: [(0, '4.686')] +[2023-02-25 03:42:41,415][19689] Updated weights for policy 0, policy_version 1318 (0.0023) +[2023-02-25 03:42:43,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3776.7). Total num frames: 5406720. Throughput: 0: 941.7. Samples: 347844. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 03:42:43,096][00684] Avg episode reward: [(0, '4.536')] +[2023-02-25 03:42:48,093][00684] Fps is (10 sec: 4505.5, 60 sec: 3959.5, 300 sec: 3790.6). Total num frames: 5427200. Throughput: 0: 968.2. Samples: 355064. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:42:48,097][00684] Avg episode reward: [(0, '4.756')] +[2023-02-25 03:42:50,495][19689] Updated weights for policy 0, policy_version 1328 (0.0012) +[2023-02-25 03:42:53,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3959.5, 300 sec: 3804.4). Total num frames: 5447680. Throughput: 0: 978.4. Samples: 360384. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:42:53,098][00684] Avg episode reward: [(0, '4.771')] +[2023-02-25 03:42:58,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3762.8). Total num frames: 5459968. Throughput: 0: 979.4. Samples: 362756. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:42:58,101][00684] Avg episode reward: [(0, '4.644')] +[2023-02-25 03:43:03,038][19689] Updated weights for policy 0, policy_version 1338 (0.0014) +[2023-02-25 03:43:03,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.9, 300 sec: 3762.8). Total num frames: 5480448. Throughput: 0: 952.5. Samples: 367584. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:43:03,096][00684] Avg episode reward: [(0, '4.570')] +[2023-02-25 03:43:08,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5500928. Throughput: 0: 931.8. Samples: 373800. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:43:08,097][00684] Avg episode reward: [(0, '4.565')] +[2023-02-25 03:43:13,095][00684] Fps is (10 sec: 3276.2, 60 sec: 3822.8, 300 sec: 3748.9). Total num frames: 5513216. Throughput: 0: 925.1. Samples: 376216. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:43:13,106][00684] Avg episode reward: [(0, '4.563')] +[2023-02-25 03:43:15,781][19689] Updated weights for policy 0, policy_version 1348 (0.0012) +[2023-02-25 03:43:18,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3618.1, 300 sec: 3735.0). Total num frames: 5525504. Throughput: 0: 907.7. Samples: 380260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:43:18,101][00684] Avg episode reward: [(0, '4.570')] +[2023-02-25 03:43:23,093][00684] Fps is (10 sec: 2867.7, 60 sec: 3481.9, 300 sec: 3721.2). Total num frames: 5541888. Throughput: 0: 884.2. Samples: 384072. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:43:23,101][00684] Avg episode reward: [(0, '4.568')] +[2023-02-25 03:43:28,093][00684] Fps is (10 sec: 3276.7, 60 sec: 3550.0, 300 sec: 3693.3). Total num frames: 5558272. Throughput: 0: 856.4. Samples: 386384. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 03:43:28,099][00684] Avg episode reward: [(0, '4.781')] +[2023-02-25 03:43:29,821][19689] Updated weights for policy 0, policy_version 1358 (0.0013) +[2023-02-25 03:43:33,096][00684] Fps is (10 sec: 3275.9, 60 sec: 3549.7, 300 sec: 3679.4). Total num frames: 5574656. Throughput: 0: 818.4. Samples: 391892. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:43:33,101][00684] Avg episode reward: [(0, '4.809')] +[2023-02-25 03:43:38,093][00684] Fps is (10 sec: 4096.1, 60 sec: 3618.1, 300 sec: 3721.2). Total num frames: 5599232. Throughput: 0: 860.7. Samples: 399116. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:43:38,097][00684] Avg episode reward: [(0, '4.708')] +[2023-02-25 03:43:38,306][19689] Updated weights for policy 0, policy_version 1368 (0.0014) +[2023-02-25 03:43:43,094][00684] Fps is (10 sec: 4506.7, 60 sec: 3549.8, 300 sec: 3735.0). Total num frames: 5619712. Throughput: 0: 884.0. Samples: 402536. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:43:43,099][00684] Avg episode reward: [(0, '4.840')] +[2023-02-25 03:43:48,093][00684] Fps is (10 sec: 3686.3, 60 sec: 3481.6, 300 sec: 3762.8). Total num frames: 5636096. Throughput: 0: 884.4. Samples: 407380. Policy #0 lag: (min: 0.0, avg: 0.7, max: 3.0) +[2023-02-25 03:43:48,103][00684] Avg episode reward: [(0, '4.849')] +[2023-02-25 03:43:48,120][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001376_5636096.pth... +[2023-02-25 03:43:48,348][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001159_4747264.pth +[2023-02-25 03:43:50,027][19689] Updated weights for policy 0, policy_version 1378 (0.0022) +[2023-02-25 03:43:53,094][00684] Fps is (10 sec: 3276.7, 60 sec: 3413.3, 300 sec: 3762.8). Total num frames: 5652480. Throughput: 0: 851.5. Samples: 412116. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:43:53,102][00684] Avg episode reward: [(0, '4.760')] +[2023-02-25 03:43:58,095][00684] Fps is (10 sec: 3685.7, 60 sec: 3549.7, 300 sec: 3748.9). Total num frames: 5672960. Throughput: 0: 864.3. Samples: 415108. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:43:58,098][00684] Avg episode reward: [(0, '4.526')] +[2023-02-25 03:44:00,011][19689] Updated weights for policy 0, policy_version 1388 (0.0035) +[2023-02-25 03:44:03,093][00684] Fps is (10 sec: 4505.9, 60 sec: 3618.1, 300 sec: 3762.8). Total num frames: 5697536. Throughput: 0: 935.6. Samples: 422364. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:44:03,096][00684] Avg episode reward: [(0, '4.963')] +[2023-02-25 03:44:08,098][00684] Fps is (10 sec: 4504.3, 60 sec: 3617.8, 300 sec: 3776.6). Total num frames: 5718016. Throughput: 0: 987.6. Samples: 428520. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 03:44:08,103][00684] Avg episode reward: [(0, '4.914')] +[2023-02-25 03:44:10,497][19689] Updated weights for policy 0, policy_version 1398 (0.0023) +[2023-02-25 03:44:13,094][00684] Fps is (10 sec: 3686.2, 60 sec: 3686.5, 300 sec: 3776.8). Total num frames: 5734400. Throughput: 0: 991.5. Samples: 431000. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:44:13,098][00684] Avg episode reward: [(0, '4.607')] +[2023-02-25 03:44:18,094][00684] Fps is (10 sec: 3278.3, 60 sec: 3754.6, 300 sec: 3748.9). Total num frames: 5750784. Throughput: 0: 978.3. Samples: 435912. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:44:18,097][00684] Avg episode reward: [(0, '4.433')] +[2023-02-25 03:44:21,354][19689] Updated weights for policy 0, policy_version 1408 (0.0027) +[2023-02-25 03:44:23,093][00684] Fps is (10 sec: 3686.6, 60 sec: 3822.9, 300 sec: 3735.0). Total num frames: 5771264. Throughput: 0: 956.4. Samples: 442156. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:44:23,099][00684] Avg episode reward: [(0, '4.622')] +[2023-02-25 03:44:28,093][00684] Fps is (10 sec: 4915.4, 60 sec: 4027.7, 300 sec: 3776.7). Total num frames: 5799936. Throughput: 0: 964.7. Samples: 445948. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:44:28,096][00684] Avg episode reward: [(0, '4.469')] +[2023-02-25 03:44:30,547][19689] Updated weights for policy 0, policy_version 1418 (0.0022) +[2023-02-25 03:44:33,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4027.9, 300 sec: 3762.8). Total num frames: 5816320. Throughput: 0: 996.4. Samples: 452220. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:44:33,096][00684] Avg episode reward: [(0, '4.676')] +[2023-02-25 03:44:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5832704. Throughput: 0: 1003.7. Samples: 457280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:44:38,100][00684] Avg episode reward: [(0, '4.758')] +[2023-02-25 03:44:42,743][19689] Updated weights for policy 0, policy_version 1428 (0.0018) +[2023-02-25 03:44:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5853184. Throughput: 0: 992.9. Samples: 459788. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:44:43,097][00684] Avg episode reward: [(0, '4.784')] +[2023-02-25 03:44:48,093][00684] Fps is (10 sec: 4505.5, 60 sec: 4027.7, 300 sec: 3762.8). Total num frames: 5877760. Throughput: 0: 978.4. Samples: 466392. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:44:48,101][00684] Avg episode reward: [(0, '4.741')] +[2023-02-25 03:44:50,504][19689] Updated weights for policy 0, policy_version 1438 (0.0012) +[2023-02-25 03:44:53,094][00684] Fps is (10 sec: 4505.2, 60 sec: 4096.0, 300 sec: 3762.8). Total num frames: 5898240. Throughput: 0: 1006.8. Samples: 473820. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:44:53,101][00684] Avg episode reward: [(0, '4.815')] +[2023-02-25 03:44:58,095][00684] Fps is (10 sec: 3685.9, 60 sec: 4027.8, 300 sec: 3762.8). Total num frames: 5914624. Throughput: 0: 1011.4. Samples: 476516. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:44:58,099][00684] Avg episode reward: [(0, '4.623')] +[2023-02-25 03:45:01,803][19689] Updated weights for policy 0, policy_version 1448 (0.0020) +[2023-02-25 03:45:03,093][00684] Fps is (10 sec: 3277.1, 60 sec: 3891.2, 300 sec: 3762.8). Total num frames: 5931008. Throughput: 0: 1013.7. Samples: 481528. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:45:03,098][00684] Avg episode reward: [(0, '4.368')] +[2023-02-25 03:45:08,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3891.5, 300 sec: 3762.8). Total num frames: 5951488. Throughput: 0: 992.4. Samples: 486812. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:45:08,096][00684] Avg episode reward: [(0, '4.382')] +[2023-02-25 03:45:11,699][19689] Updated weights for policy 0, policy_version 1458 (0.0013) +[2023-02-25 03:45:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3748.9). Total num frames: 5976064. Throughput: 0: 988.4. Samples: 490428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:45:13,101][00684] Avg episode reward: [(0, '4.681')] +[2023-02-25 03:45:18,095][00684] Fps is (10 sec: 4914.3, 60 sec: 4164.2, 300 sec: 3776.6). Total num frames: 6000640. Throughput: 0: 1016.3. Samples: 497956. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:45:18,098][00684] Avg episode reward: [(0, '4.761')] +[2023-02-25 03:45:22,272][19689] Updated weights for policy 0, policy_version 1468 (0.0017) +[2023-02-25 03:45:23,096][00684] Fps is (10 sec: 3685.4, 60 sec: 4027.5, 300 sec: 3762.8). Total num frames: 6012928. Throughput: 0: 1008.8. Samples: 502680. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:45:23,104][00684] Avg episode reward: [(0, '4.622')] +[2023-02-25 03:45:28,097][00684] Fps is (10 sec: 2457.1, 60 sec: 3754.4, 300 sec: 3748.8). Total num frames: 6025216. Throughput: 0: 995.7. Samples: 504600. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:45:28,100][00684] Avg episode reward: [(0, '4.587')] +[2023-02-25 03:45:33,094][00684] Fps is (10 sec: 2458.1, 60 sec: 3686.4, 300 sec: 3707.2). Total num frames: 6037504. Throughput: 0: 935.5. Samples: 508488. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:45:33,096][00684] Avg episode reward: [(0, '4.434')] +[2023-02-25 03:45:37,977][19689] Updated weights for policy 0, policy_version 1478 (0.0017) +[2023-02-25 03:45:38,095][00684] Fps is (10 sec: 2867.9, 60 sec: 3686.3, 300 sec: 3679.5). Total num frames: 6053888. Throughput: 0: 857.3. Samples: 512400. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:45:38,101][00684] Avg episode reward: [(0, '4.483')] +[2023-02-25 03:45:43,093][00684] Fps is (10 sec: 3686.7, 60 sec: 3686.4, 300 sec: 3693.3). Total num frames: 6074368. Throughput: 0: 865.1. Samples: 515444. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:45:43,099][00684] Avg episode reward: [(0, '4.488')] +[2023-02-25 03:45:46,211][19689] Updated weights for policy 0, policy_version 1488 (0.0013) +[2023-02-25 03:45:48,093][00684] Fps is (10 sec: 4506.3, 60 sec: 3686.4, 300 sec: 3735.0). Total num frames: 6098944. Throughput: 0: 916.3. Samples: 522760. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:45:48,103][00684] Avg episode reward: [(0, '4.366')] +[2023-02-25 03:45:48,115][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001489_6098944.pth... +[2023-02-25 03:45:48,349][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001267_5189632.pth +[2023-02-25 03:45:53,099][00684] Fps is (10 sec: 4093.7, 60 sec: 3617.8, 300 sec: 3748.8). Total num frames: 6115328. Throughput: 0: 915.7. Samples: 528024. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:45:53,104][00684] Avg episode reward: [(0, '4.397')] +[2023-02-25 03:45:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.2, 300 sec: 3748.9). Total num frames: 6131712. Throughput: 0: 892.4. Samples: 530584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:45:58,096][00684] Avg episode reward: [(0, '4.696')] +[2023-02-25 03:45:58,185][19689] Updated weights for policy 0, policy_version 1498 (0.0013) +[2023-02-25 03:46:03,093][00684] Fps is (10 sec: 3688.5, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 6152192. Throughput: 0: 838.6. Samples: 535692. Policy #0 lag: (min: 0.0, avg: 0.7, max: 3.0) +[2023-02-25 03:46:03,096][00684] Avg episode reward: [(0, '4.916')] +[2023-02-25 03:46:07,459][19689] Updated weights for policy 0, policy_version 1508 (0.0016) +[2023-02-25 03:46:08,093][00684] Fps is (10 sec: 4505.5, 60 sec: 3754.6, 300 sec: 3790.5). Total num frames: 6176768. Throughput: 0: 898.1. Samples: 543092. Policy #0 lag: (min: 0.0, avg: 0.7, max: 3.0) +[2023-02-25 03:46:08,100][00684] Avg episode reward: [(0, '4.649')] +[2023-02-25 03:46:13,095][00684] Fps is (10 sec: 4914.5, 60 sec: 3754.6, 300 sec: 3804.4). Total num frames: 6201344. Throughput: 0: 939.2. Samples: 546864. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:46:13,097][00684] Avg episode reward: [(0, '4.572')] +[2023-02-25 03:46:17,621][19689] Updated weights for policy 0, policy_version 1518 (0.0016) +[2023-02-25 03:46:18,093][00684] Fps is (10 sec: 4096.1, 60 sec: 3618.2, 300 sec: 3804.4). Total num frames: 6217728. Throughput: 0: 977.0. Samples: 552452. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:46:18,101][00684] Avg episode reward: [(0, '4.846')] +[2023-02-25 03:46:23,094][00684] Fps is (10 sec: 3277.1, 60 sec: 3686.5, 300 sec: 3790.5). Total num frames: 6234112. Throughput: 0: 1002.2. Samples: 557500. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:46:23,103][00684] Avg episode reward: [(0, '4.894')] +[2023-02-25 03:46:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 3776.6). Total num frames: 6254592. Throughput: 0: 992.2. Samples: 560092. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:46:28,101][00684] Avg episode reward: [(0, '5.012')] +[2023-02-25 03:46:28,445][19689] Updated weights for policy 0, policy_version 1528 (0.0014) +[2023-02-25 03:46:33,093][00684] Fps is (10 sec: 4505.8, 60 sec: 4027.8, 300 sec: 3804.4). Total num frames: 6279168. Throughput: 0: 992.6. Samples: 567428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:46:33,099][00684] Avg episode reward: [(0, '4.910')] +[2023-02-25 03:46:37,136][19689] Updated weights for policy 0, policy_version 1538 (0.0012) +[2023-02-25 03:46:38,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4164.4, 300 sec: 3832.2). Total num frames: 6303744. Throughput: 0: 1032.6. Samples: 574484. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:46:38,098][00684] Avg episode reward: [(0, '4.799')] +[2023-02-25 03:46:43,094][00684] Fps is (10 sec: 4095.5, 60 sec: 4095.9, 300 sec: 3832.2). Total num frames: 6320128. Throughput: 0: 1032.5. Samples: 577048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 03:46:43,104][00684] Avg episode reward: [(0, '4.679')] +[2023-02-25 03:46:48,095][00684] Fps is (10 sec: 3276.1, 60 sec: 3959.3, 300 sec: 3818.3). Total num frames: 6336512. Throughput: 0: 1033.7. Samples: 582212. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:46:48,098][00684] Avg episode reward: [(0, '4.677')] +[2023-02-25 03:46:49,349][19689] Updated weights for policy 0, policy_version 1548 (0.0030) +[2023-02-25 03:46:53,095][00684] Fps is (10 sec: 3686.2, 60 sec: 4028.0, 300 sec: 3804.4). Total num frames: 6356992. Throughput: 0: 1003.8. Samples: 588264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:46:53,098][00684] Avg episode reward: [(0, '4.662')] +[2023-02-25 03:46:57,770][19689] Updated weights for policy 0, policy_version 1558 (0.0012) +[2023-02-25 03:46:58,093][00684] Fps is (10 sec: 4916.3, 60 sec: 4232.5, 300 sec: 3832.2). Total num frames: 6385664. Throughput: 0: 1002.3. Samples: 591964. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:46:58,096][00684] Avg episode reward: [(0, '4.527')] +[2023-02-25 03:47:03,099][00684] Fps is (10 sec: 4503.7, 60 sec: 4163.9, 300 sec: 3846.0). Total num frames: 6402048. Throughput: 0: 1033.3. Samples: 598956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 03:47:03,107][00684] Avg episode reward: [(0, '4.666')] +[2023-02-25 03:47:08,008][19689] Updated weights for policy 0, policy_version 1568 (0.0014) +[2023-02-25 03:47:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 3860.0). Total num frames: 6422528. Throughput: 0: 1036.5. Samples: 604144. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:47:08,096][00684] Avg episode reward: [(0, '4.906')] +[2023-02-25 03:47:13,093][00684] Fps is (10 sec: 3688.4, 60 sec: 3959.5, 300 sec: 3832.2). Total num frames: 6438912. Throughput: 0: 1033.4. Samples: 606596. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:47:13,098][00684] Avg episode reward: [(0, '4.887')] +[2023-02-25 03:47:18,094][00684] Fps is (10 sec: 3686.1, 60 sec: 4027.7, 300 sec: 3818.4). Total num frames: 6459392. Throughput: 0: 997.5. Samples: 612316. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:47:18,096][00684] Avg episode reward: [(0, '4.587')] +[2023-02-25 03:47:18,423][19689] Updated weights for policy 0, policy_version 1578 (0.0013) +[2023-02-25 03:47:23,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4164.3, 300 sec: 3860.0). Total num frames: 6483968. Throughput: 0: 1005.8. Samples: 619744. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:47:23,096][00684] Avg episode reward: [(0, '4.636')] +[2023-02-25 03:47:28,015][19689] Updated weights for policy 0, policy_version 1588 (0.0012) +[2023-02-25 03:47:28,101][00684] Fps is (10 sec: 4502.4, 60 sec: 4163.7, 300 sec: 3873.7). Total num frames: 6504448. Throughput: 0: 1022.6. Samples: 623072. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:47:28,109][00684] Avg episode reward: [(0, '4.780')] +[2023-02-25 03:47:33,097][00684] Fps is (10 sec: 3275.6, 60 sec: 3959.2, 300 sec: 3846.0). Total num frames: 6516736. Throughput: 0: 1017.0. Samples: 627980. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:47:33,100][00684] Avg episode reward: [(0, '4.963')] +[2023-02-25 03:47:38,095][00684] Fps is (10 sec: 2868.9, 60 sec: 3822.8, 300 sec: 3818.3). Total num frames: 6533120. Throughput: 0: 975.4. Samples: 632156. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 03:47:38,105][00684] Avg episode reward: [(0, '4.768')] +[2023-02-25 03:47:42,694][19689] Updated weights for policy 0, policy_version 1598 (0.0020) +[2023-02-25 03:47:43,098][00684] Fps is (10 sec: 2866.9, 60 sec: 3754.4, 300 sec: 3790.5). Total num frames: 6545408. Throughput: 0: 936.3. Samples: 634104. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:47:43,101][00684] Avg episode reward: [(0, '4.672')] +[2023-02-25 03:47:48,093][00684] Fps is (10 sec: 2867.7, 60 sec: 3754.8, 300 sec: 3776.7). Total num frames: 6561792. Throughput: 0: 885.4. Samples: 638796. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:47:48,096][00684] Avg episode reward: [(0, '4.463')] +[2023-02-25 03:47:48,107][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001602_6561792.pth... +[2023-02-25 03:47:48,338][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001376_5636096.pth +[2023-02-25 03:47:53,093][00684] Fps is (10 sec: 3278.3, 60 sec: 3686.5, 300 sec: 3790.5). Total num frames: 6578176. Throughput: 0: 881.8. Samples: 643824. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:47:53,097][00684] Avg episode reward: [(0, '4.567')] +[2023-02-25 03:47:54,723][19689] Updated weights for policy 0, policy_version 1608 (0.0018) +[2023-02-25 03:47:58,094][00684] Fps is (10 sec: 3276.6, 60 sec: 3481.6, 300 sec: 3776.6). Total num frames: 6594560. Throughput: 0: 880.9. Samples: 646236. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:47:58,102][00684] Avg episode reward: [(0, '4.632')] +[2023-02-25 03:48:03,093][00684] Fps is (10 sec: 3276.7, 60 sec: 3481.9, 300 sec: 3762.8). Total num frames: 6610944. Throughput: 0: 861.0. Samples: 651060. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:48:03,095][00684] Avg episode reward: [(0, '4.580')] +[2023-02-25 03:48:06,881][19689] Updated weights for policy 0, policy_version 1618 (0.0014) +[2023-02-25 03:48:08,096][00684] Fps is (10 sec: 3276.1, 60 sec: 3413.2, 300 sec: 3776.6). Total num frames: 6627328. Throughput: 0: 816.3. Samples: 656480. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:48:08,104][00684] Avg episode reward: [(0, '4.657')] +[2023-02-25 03:48:13,093][00684] Fps is (10 sec: 4505.7, 60 sec: 3618.2, 300 sec: 3832.2). Total num frames: 6656000. Throughput: 0: 822.6. Samples: 660084. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:48:13,095][00684] Avg episode reward: [(0, '4.583')] +[2023-02-25 03:48:15,558][19689] Updated weights for policy 0, policy_version 1628 (0.0017) +[2023-02-25 03:48:18,097][00684] Fps is (10 sec: 4914.7, 60 sec: 3617.9, 300 sec: 3846.0). Total num frames: 6676480. Throughput: 0: 872.5. Samples: 667244. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:48:18,100][00684] Avg episode reward: [(0, '4.478')] +[2023-02-25 03:48:23,101][00684] Fps is (10 sec: 3274.2, 60 sec: 3412.9, 300 sec: 3832.1). Total num frames: 6688768. Throughput: 0: 890.3. Samples: 672224. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:23,104][00684] Avg episode reward: [(0, '4.452')] +[2023-02-25 03:48:28,002][19689] Updated weights for policy 0, policy_version 1638 (0.0016) +[2023-02-25 03:48:28,093][00684] Fps is (10 sec: 3278.0, 60 sec: 3413.8, 300 sec: 3846.1). Total num frames: 6709248. Throughput: 0: 902.4. Samples: 674708. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:28,102][00684] Avg episode reward: [(0, '4.676')] +[2023-02-25 03:48:33,093][00684] Fps is (10 sec: 4099.2, 60 sec: 3550.1, 300 sec: 3832.2). Total num frames: 6729728. Throughput: 0: 917.8. Samples: 680096. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:48:33,096][00684] Avg episode reward: [(0, '4.864')] +[2023-02-25 03:48:36,621][19689] Updated weights for policy 0, policy_version 1648 (0.0012) +[2023-02-25 03:48:38,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3686.5, 300 sec: 3846.1). Total num frames: 6754304. Throughput: 0: 973.2. Samples: 687616. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:38,096][00684] Avg episode reward: [(0, '4.563')] +[2023-02-25 03:48:43,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3823.2, 300 sec: 3860.0). Total num frames: 6774784. Throughput: 0: 1000.4. Samples: 691252. Policy #0 lag: (min: 0.0, avg: 0.7, max: 3.0) +[2023-02-25 03:48:43,098][00684] Avg episode reward: [(0, '4.579')] +[2023-02-25 03:48:46,980][19689] Updated weights for policy 0, policy_version 1658 (0.0016) +[2023-02-25 03:48:48,094][00684] Fps is (10 sec: 3686.2, 60 sec: 3822.9, 300 sec: 3860.0). Total num frames: 6791168. Throughput: 0: 1006.5. Samples: 696352. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:48,100][00684] Avg episode reward: [(0, '4.546')] +[2023-02-25 03:48:53,093][00684] Fps is (10 sec: 3276.7, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 6807552. Throughput: 0: 995.1. Samples: 701256. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:53,101][00684] Avg episode reward: [(0, '4.669')] +[2023-02-25 03:48:57,737][19689] Updated weights for policy 0, policy_version 1668 (0.0021) +[2023-02-25 03:48:58,093][00684] Fps is (10 sec: 4096.2, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 6832128. Throughput: 0: 977.0. Samples: 704048. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:48:58,102][00684] Avg episode reward: [(0, '4.735')] +[2023-02-25 03:49:03,093][00684] Fps is (10 sec: 4915.4, 60 sec: 4096.0, 300 sec: 3860.0). Total num frames: 6856704. Throughput: 0: 982.6. Samples: 711456. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:49:03,097][00684] Avg episode reward: [(0, '4.790')] +[2023-02-25 03:49:06,669][19689] Updated weights for policy 0, policy_version 1678 (0.0014) +[2023-02-25 03:49:08,095][00684] Fps is (10 sec: 4095.3, 60 sec: 4096.1, 300 sec: 3859.9). Total num frames: 6873088. Throughput: 0: 1017.3. Samples: 717996. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:49:08,098][00684] Avg episode reward: [(0, '4.731')] +[2023-02-25 03:49:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 6889472. Throughput: 0: 1017.0. Samples: 720472. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:49:13,100][00684] Avg episode reward: [(0, '4.654')] +[2023-02-25 03:49:18,094][00684] Fps is (10 sec: 3277.0, 60 sec: 3823.1, 300 sec: 3846.1). Total num frames: 6905856. Throughput: 0: 1003.6. Samples: 725260. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:49:18,097][00684] Avg episode reward: [(0, '4.744')] +[2023-02-25 03:49:19,367][19689] Updated weights for policy 0, policy_version 1688 (0.0013) +[2023-02-25 03:49:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4028.3, 300 sec: 3832.2). Total num frames: 6930432. Throughput: 0: 977.2. Samples: 731588. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:49:23,096][00684] Avg episode reward: [(0, '4.669')] +[2023-02-25 03:49:27,384][19689] Updated weights for policy 0, policy_version 1698 (0.0013) +[2023-02-25 03:49:28,094][00684] Fps is (10 sec: 4915.5, 60 sec: 4096.0, 300 sec: 3860.0). Total num frames: 6955008. Throughput: 0: 979.8. Samples: 735344. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:49:28,096][00684] Avg episode reward: [(0, '4.657')] +[2023-02-25 03:49:33,099][00684] Fps is (10 sec: 4093.6, 60 sec: 4027.3, 300 sec: 3859.9). Total num frames: 6971392. Throughput: 0: 1012.7. Samples: 741928. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:49:33,103][00684] Avg episode reward: [(0, '4.502')] +[2023-02-25 03:49:38,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 6991872. Throughput: 0: 1016.5. Samples: 746996. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:49:38,100][00684] Avg episode reward: [(0, '4.372')] +[2023-02-25 03:49:39,139][19689] Updated weights for policy 0, policy_version 1708 (0.0014) +[2023-02-25 03:49:43,093][00684] Fps is (10 sec: 3688.5, 60 sec: 3891.2, 300 sec: 3832.2). Total num frames: 7008256. Throughput: 0: 1009.4. Samples: 749472. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:49:43,097][00684] Avg episode reward: [(0, '4.598')] +[2023-02-25 03:49:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3846.1). Total num frames: 7032832. Throughput: 0: 979.8. Samples: 755548. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:49:48,102][00684] Avg episode reward: [(0, '4.851')] +[2023-02-25 03:49:48,112][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001717_7032832.pth... +[2023-02-25 03:49:48,265][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001489_6098944.pth +[2023-02-25 03:49:49,087][19689] Updated weights for policy 0, policy_version 1718 (0.0016) +[2023-02-25 03:49:53,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3846.1). Total num frames: 7049216. Throughput: 0: 977.2. Samples: 761968. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:49:53,098][00684] Avg episode reward: [(0, '4.499')] +[2023-02-25 03:49:58,095][00684] Fps is (10 sec: 3276.1, 60 sec: 3891.1, 300 sec: 3846.0). Total num frames: 7065600. Throughput: 0: 972.7. Samples: 764244. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:49:58,106][00684] Avg episode reward: [(0, '4.520')] +[2023-02-25 03:50:01,942][19689] Updated weights for policy 0, policy_version 1728 (0.0017) +[2023-02-25 03:50:03,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3818.3). Total num frames: 7077888. Throughput: 0: 954.2. Samples: 768196. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:50:03,097][00684] Avg episode reward: [(0, '4.613')] +[2023-02-25 03:50:08,097][00684] Fps is (10 sec: 2457.2, 60 sec: 3618.0, 300 sec: 3776.6). Total num frames: 7090176. Throughput: 0: 901.2. Samples: 772144. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:50:08,099][00684] Avg episode reward: [(0, '4.753')] +[2023-02-25 03:50:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3762.8). Total num frames: 7110656. Throughput: 0: 868.5. Samples: 774424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:50:13,097][00684] Avg episode reward: [(0, '4.731')] +[2023-02-25 03:50:14,809][19689] Updated weights for policy 0, policy_version 1738 (0.0020) +[2023-02-25 03:50:18,093][00684] Fps is (10 sec: 4507.3, 60 sec: 3823.0, 300 sec: 3804.5). Total num frames: 7135232. Throughput: 0: 870.1. Samples: 781076. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:50:18,096][00684] Avg episode reward: [(0, '4.551')] +[2023-02-25 03:50:23,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 7155712. Throughput: 0: 922.8. Samples: 788520. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:50:23,096][00684] Avg episode reward: [(0, '4.616')] +[2023-02-25 03:50:23,116][19689] Updated weights for policy 0, policy_version 1748 (0.0013) +[2023-02-25 03:50:28,095][00684] Fps is (10 sec: 4095.2, 60 sec: 3686.3, 300 sec: 3859.9). Total num frames: 7176192. Throughput: 0: 925.3. Samples: 791112. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:50:28,098][00684] Avg episode reward: [(0, '4.624')] +[2023-02-25 03:50:33,094][00684] Fps is (10 sec: 3686.0, 60 sec: 3686.7, 300 sec: 3860.0). Total num frames: 7192576. Throughput: 0: 902.4. Samples: 796156. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:50:33,105][00684] Avg episode reward: [(0, '4.625')] +[2023-02-25 03:50:35,053][19689] Updated weights for policy 0, policy_version 1758 (0.0012) +[2023-02-25 03:50:38,093][00684] Fps is (10 sec: 3277.4, 60 sec: 3618.1, 300 sec: 3846.1). Total num frames: 7208960. Throughput: 0: 875.6. Samples: 801372. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:50:38,095][00684] Avg episode reward: [(0, '4.749')] +[2023-02-25 03:50:43,093][00684] Fps is (10 sec: 4096.3, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 7233536. Throughput: 0: 908.8. Samples: 805140. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:50:43,096][00684] Avg episode reward: [(0, '4.674')] +[2023-02-25 03:50:43,914][19689] Updated weights for policy 0, policy_version 1768 (0.0012) +[2023-02-25 03:50:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3873.9). Total num frames: 7258112. Throughput: 0: 988.2. Samples: 812664. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:50:48,099][00684] Avg episode reward: [(0, '4.508')] +[2023-02-25 03:50:53,096][00684] Fps is (10 sec: 4095.1, 60 sec: 3754.5, 300 sec: 3873.8). Total num frames: 7274496. Throughput: 0: 1017.2. Samples: 817916. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:50:53,104][00684] Avg episode reward: [(0, '4.714')] +[2023-02-25 03:50:54,629][19689] Updated weights for policy 0, policy_version 1778 (0.0011) +[2023-02-25 03:50:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3860.0). Total num frames: 7290880. Throughput: 0: 1022.0. Samples: 820412. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:50:58,100][00684] Avg episode reward: [(0, '4.830')] +[2023-02-25 03:51:03,095][00684] Fps is (10 sec: 3686.7, 60 sec: 3891.1, 300 sec: 3846.1). Total num frames: 7311360. Throughput: 0: 990.7. Samples: 825660. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:51:03,097][00684] Avg episode reward: [(0, '4.872')] +[2023-02-25 03:51:05,014][19689] Updated weights for policy 0, policy_version 1788 (0.0013) +[2023-02-25 03:51:08,095][00684] Fps is (10 sec: 4504.8, 60 sec: 4096.1, 300 sec: 3846.1). Total num frames: 7335936. Throughput: 0: 991.7. Samples: 833148. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:51:08,101][00684] Avg episode reward: [(0, '4.793')] +[2023-02-25 03:51:13,096][00684] Fps is (10 sec: 4505.0, 60 sec: 4095.8, 300 sec: 3859.9). Total num frames: 7356416. Throughput: 0: 1017.8. Samples: 836912. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:51:13,099][00684] Avg episode reward: [(0, '4.732')] +[2023-02-25 03:51:14,618][19689] Updated weights for policy 0, policy_version 1798 (0.0012) +[2023-02-25 03:51:18,101][00684] Fps is (10 sec: 3684.4, 60 sec: 3959.0, 300 sec: 3859.9). Total num frames: 7372800. Throughput: 0: 1022.5. Samples: 842176. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:51:18,107][00684] Avg episode reward: [(0, '4.778')] +[2023-02-25 03:51:23,093][00684] Fps is (10 sec: 3277.6, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 7389184. Throughput: 0: 1016.9. Samples: 847132. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:51:23,097][00684] Avg episode reward: [(0, '4.784')] +[2023-02-25 03:51:26,154][19689] Updated weights for policy 0, policy_version 1808 (0.0017) +[2023-02-25 03:51:28,093][00684] Fps is (10 sec: 4099.0, 60 sec: 3959.6, 300 sec: 3846.1). Total num frames: 7413760. Throughput: 0: 993.0. Samples: 849824. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:51:28,102][00684] Avg episode reward: [(0, '4.669')] +[2023-02-25 03:51:33,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4096.1, 300 sec: 3846.1). Total num frames: 7438336. Throughput: 0: 990.7. Samples: 857244. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:51:33,100][00684] Avg episode reward: [(0, '4.669')] +[2023-02-25 03:51:34,590][19689] Updated weights for policy 0, policy_version 1818 (0.0012) +[2023-02-25 03:51:38,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3860.0). Total num frames: 7458816. Throughput: 0: 1020.0. Samples: 863812. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:51:38,098][00684] Avg episode reward: [(0, '4.645')] +[2023-02-25 03:51:43,094][00684] Fps is (10 sec: 3686.0, 60 sec: 4027.7, 300 sec: 3860.0). Total num frames: 7475200. Throughput: 0: 1021.2. Samples: 866368. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:51:43,102][00684] Avg episode reward: [(0, '4.659')] +[2023-02-25 03:51:46,413][19689] Updated weights for policy 0, policy_version 1828 (0.0016) +[2023-02-25 03:51:48,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 7491584. Throughput: 0: 1017.3. Samples: 871436. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:51:48,095][00684] Avg episode reward: [(0, '4.633')] +[2023-02-25 03:51:48,118][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001829_7491584.pth... +[2023-02-25 03:51:48,360][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001602_6561792.pth +[2023-02-25 03:51:53,094][00684] Fps is (10 sec: 4096.3, 60 sec: 4027.9, 300 sec: 3832.2). Total num frames: 7516160. Throughput: 0: 990.0. Samples: 877696. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:51:53,096][00684] Avg episode reward: [(0, '4.807')] +[2023-02-25 03:51:55,533][19689] Updated weights for policy 0, policy_version 1838 (0.0012) +[2023-02-25 03:51:58,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3860.0). Total num frames: 7540736. Throughput: 0: 988.3. Samples: 881384. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:51:58,096][00684] Avg episode reward: [(0, '4.891')] +[2023-02-25 03:52:03,095][00684] Fps is (10 sec: 4095.4, 60 sec: 4096.0, 300 sec: 3846.0). Total num frames: 7557120. Throughput: 0: 1019.4. Samples: 888044. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:52:03,098][00684] Avg episode reward: [(0, '4.820')] +[2023-02-25 03:52:06,211][19689] Updated weights for policy 0, policy_version 1848 (0.0012) +[2023-02-25 03:52:08,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.6, 300 sec: 3846.1). Total num frames: 7573504. Throughput: 0: 1009.0. Samples: 892536. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:52:08,096][00684] Avg episode reward: [(0, '4.757')] +[2023-02-25 03:52:13,094][00684] Fps is (10 sec: 2867.4, 60 sec: 3823.0, 300 sec: 3818.3). Total num frames: 7585792. Throughput: 0: 992.6. Samples: 894492. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:52:13,099][00684] Avg episode reward: [(0, '4.632')] +[2023-02-25 03:52:18,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3755.1, 300 sec: 3776.7). Total num frames: 7598080. Throughput: 0: 917.0. Samples: 898508. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:52:18,101][00684] Avg episode reward: [(0, '4.846')] +[2023-02-25 03:52:20,755][19689] Updated weights for policy 0, policy_version 1858 (0.0032) +[2023-02-25 03:52:23,093][00684] Fps is (10 sec: 3277.1, 60 sec: 3822.9, 300 sec: 3776.7). Total num frames: 7618560. Throughput: 0: 877.5. Samples: 903300. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:52:23,101][00684] Avg episode reward: [(0, '4.696')] +[2023-02-25 03:52:28,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3818.4). Total num frames: 7643136. Throughput: 0: 899.8. Samples: 906860. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:52:28,097][00684] Avg episode reward: [(0, '4.701')] +[2023-02-25 03:52:30,641][19689] Updated weights for policy 0, policy_version 1868 (0.0012) +[2023-02-25 03:52:33,101][00684] Fps is (10 sec: 4093.0, 60 sec: 3685.9, 300 sec: 3818.2). Total num frames: 7659520. Throughput: 0: 916.6. Samples: 912688. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:52:33,106][00684] Avg episode reward: [(0, '4.744')] +[2023-02-25 03:52:38,100][00684] Fps is (10 sec: 3274.6, 60 sec: 3617.7, 300 sec: 3832.2). Total num frames: 7675904. Throughput: 0: 888.9. Samples: 917704. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:52:38,103][00684] Avg episode reward: [(0, '4.665')] +[2023-02-25 03:52:42,570][19689] Updated weights for policy 0, policy_version 1878 (0.0021) +[2023-02-25 03:52:43,093][00684] Fps is (10 sec: 3279.3, 60 sec: 3618.2, 300 sec: 3832.2). Total num frames: 7692288. Throughput: 0: 862.5. Samples: 920196. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:52:43,098][00684] Avg episode reward: [(0, '4.692')] +[2023-02-25 03:52:48,093][00684] Fps is (10 sec: 4098.8, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 7716864. Throughput: 0: 866.8. Samples: 927048. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:52:48,096][00684] Avg episode reward: [(0, '4.596')] +[2023-02-25 03:52:50,953][19689] Updated weights for policy 0, policy_version 1888 (0.0012) +[2023-02-25 03:52:53,100][00684] Fps is (10 sec: 4911.9, 60 sec: 3754.3, 300 sec: 3887.6). Total num frames: 7741440. Throughput: 0: 930.7. Samples: 934424. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:52:53,103][00684] Avg episode reward: [(0, '4.897')] +[2023-02-25 03:52:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3887.7). Total num frames: 7757824. Throughput: 0: 943.8. Samples: 936960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:52:58,096][00684] Avg episode reward: [(0, '4.759')] +[2023-02-25 03:53:02,785][19689] Updated weights for policy 0, policy_version 1898 (0.0013) +[2023-02-25 03:53:03,094][00684] Fps is (10 sec: 3278.8, 60 sec: 3618.2, 300 sec: 3887.8). Total num frames: 7774208. Throughput: 0: 966.9. Samples: 942020. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:53:03,096][00684] Avg episode reward: [(0, '4.512')] +[2023-02-25 03:53:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 7794688. Throughput: 0: 981.8. Samples: 947480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:08,096][00684] Avg episode reward: [(0, '4.552')] +[2023-02-25 03:53:11,956][19689] Updated weights for policy 0, policy_version 1908 (0.0012) +[2023-02-25 03:53:13,093][00684] Fps is (10 sec: 4505.9, 60 sec: 3891.3, 300 sec: 3873.9). Total num frames: 7819264. Throughput: 0: 985.7. Samples: 951216. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:13,096][00684] Avg episode reward: [(0, '4.650')] +[2023-02-25 03:53:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4027.7, 300 sec: 3901.7). Total num frames: 7839744. Throughput: 0: 1022.0. Samples: 958672. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:53:18,097][00684] Avg episode reward: [(0, '4.509')] +[2023-02-25 03:53:22,566][19689] Updated weights for policy 0, policy_version 1918 (0.0011) +[2023-02-25 03:53:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 7856128. Throughput: 0: 1022.1. Samples: 963692. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:23,097][00684] Avg episode reward: [(0, '4.489')] +[2023-02-25 03:53:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 7876608. Throughput: 0: 1023.6. Samples: 966260. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:53:28,101][00684] Avg episode reward: [(0, '4.448')] +[2023-02-25 03:53:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.7, 300 sec: 3860.0). Total num frames: 7892992. Throughput: 0: 989.2. Samples: 971560. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:53:33,101][00684] Avg episode reward: [(0, '4.825')] +[2023-02-25 03:53:33,253][19689] Updated weights for policy 0, policy_version 1928 (0.0016) +[2023-02-25 03:53:38,099][00684] Fps is (10 sec: 4093.5, 60 sec: 4027.8, 300 sec: 3873.8). Total num frames: 7917568. Throughput: 0: 991.6. Samples: 979044. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:38,107][00684] Avg episode reward: [(0, '4.862')] +[2023-02-25 03:53:41,672][19689] Updated weights for policy 0, policy_version 1938 (0.0012) +[2023-02-25 03:53:43,095][00684] Fps is (10 sec: 4914.1, 60 sec: 4164.1, 300 sec: 3901.6). Total num frames: 7942144. Throughput: 0: 1017.7. Samples: 982760. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:53:43,098][00684] Avg episode reward: [(0, '4.530')] +[2023-02-25 03:53:48,096][00684] Fps is (10 sec: 3687.5, 60 sec: 3959.3, 300 sec: 3887.7). Total num frames: 7954432. Throughput: 0: 1020.5. Samples: 987944. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:48,100][00684] Avg episode reward: [(0, '4.474')] +[2023-02-25 03:53:48,150][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001943_7958528.pth... +[2023-02-25 03:53:48,417][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001717_7032832.pth +[2023-02-25 03:53:53,095][00684] Fps is (10 sec: 3276.9, 60 sec: 3891.5, 300 sec: 3873.8). Total num frames: 7974912. Throughput: 0: 1007.7. Samples: 992828. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:53:53,104][00684] Avg episode reward: [(0, '4.577')] +[2023-02-25 03:53:53,824][19689] Updated weights for policy 0, policy_version 1948 (0.0020) +[2023-02-25 03:53:58,093][00684] Fps is (10 sec: 4097.3, 60 sec: 3959.5, 300 sec: 3860.0). Total num frames: 7995392. Throughput: 0: 982.1. Samples: 995412. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:53:58,095][00684] Avg episode reward: [(0, '4.572')] +[2023-02-25 03:54:02,469][19689] Updated weights for policy 0, policy_version 1958 (0.0014) +[2023-02-25 03:54:03,093][00684] Fps is (10 sec: 4506.5, 60 sec: 4096.0, 300 sec: 3887.7). Total num frames: 8019968. Throughput: 0: 981.7. Samples: 1002848. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:54:03,099][00684] Avg episode reward: [(0, '4.869')] +[2023-02-25 03:54:08,094][00684] Fps is (10 sec: 4505.4, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 8040448. Throughput: 0: 1016.5. Samples: 1009436. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:54:08,099][00684] Avg episode reward: [(0, '4.734')] +[2023-02-25 03:54:13,093][00684] Fps is (10 sec: 3686.3, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 8056832. Throughput: 0: 1016.9. Samples: 1012020. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:54:13,096][00684] Avg episode reward: [(0, '4.635')] +[2023-02-25 03:54:13,398][19689] Updated weights for policy 0, policy_version 1968 (0.0013) +[2023-02-25 03:54:18,093][00684] Fps is (10 sec: 3277.0, 60 sec: 3891.2, 300 sec: 3873.8). Total num frames: 8073216. Throughput: 0: 1012.2. Samples: 1017108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:54:18,096][00684] Avg episode reward: [(0, '4.711')] +[2023-02-25 03:54:23,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3846.1). Total num frames: 8089600. Throughput: 0: 956.8. Samples: 1022096. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:54:23,100][00684] Avg episode reward: [(0, '4.895')] +[2023-02-25 03:54:25,906][19689] Updated weights for policy 0, policy_version 1978 (0.0015) +[2023-02-25 03:54:28,097][00684] Fps is (10 sec: 3275.5, 60 sec: 3822.7, 300 sec: 3846.1). Total num frames: 8105984. Throughput: 0: 931.7. Samples: 1024688. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:54:28,102][00684] Avg episode reward: [(0, '4.661')] +[2023-02-25 03:54:33,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3832.2). Total num frames: 8122368. Throughput: 0: 922.8. Samples: 1029468. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:54:33,096][00684] Avg episode reward: [(0, '4.757')] +[2023-02-25 03:54:38,093][00684] Fps is (10 sec: 3278.1, 60 sec: 3686.8, 300 sec: 3832.2). Total num frames: 8138752. Throughput: 0: 910.0. Samples: 1033776. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:54:38,101][00684] Avg episode reward: [(0, '4.774')] +[2023-02-25 03:54:39,011][19689] Updated weights for policy 0, policy_version 1988 (0.0013) +[2023-02-25 03:54:43,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3550.0, 300 sec: 3804.4). Total num frames: 8155136. Throughput: 0: 908.5. Samples: 1036296. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:54:43,098][00684] Avg episode reward: [(0, '4.886')] +[2023-02-25 03:54:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.6, 300 sec: 3818.3). Total num frames: 8175616. Throughput: 0: 864.0. Samples: 1041728. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:54:48,099][00684] Avg episode reward: [(0, '4.810')] +[2023-02-25 03:54:49,243][19689] Updated weights for policy 0, policy_version 1998 (0.0014) +[2023-02-25 03:54:53,093][00684] Fps is (10 sec: 4505.5, 60 sec: 3754.8, 300 sec: 3846.1). Total num frames: 8200192. Throughput: 0: 883.9. Samples: 1049212. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:54:53,099][00684] Avg episode reward: [(0, '4.593')] +[2023-02-25 03:54:58,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 8220672. Throughput: 0: 909.7. Samples: 1052956. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:54:58,096][00684] Avg episode reward: [(0, '4.580')] +[2023-02-25 03:54:58,243][19689] Updated weights for policy 0, policy_version 2008 (0.0011) +[2023-02-25 03:55:03,094][00684] Fps is (10 sec: 3686.2, 60 sec: 3618.1, 300 sec: 3887.8). Total num frames: 8237056. Throughput: 0: 911.3. Samples: 1058116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:55:03,096][00684] Avg episode reward: [(0, '4.754')] +[2023-02-25 03:55:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3618.2, 300 sec: 3887.7). Total num frames: 8257536. Throughput: 0: 911.4. Samples: 1063108. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:55:08,100][00684] Avg episode reward: [(0, '4.587')] +[2023-02-25 03:55:10,232][19689] Updated weights for policy 0, policy_version 2018 (0.0021) +[2023-02-25 03:55:13,093][00684] Fps is (10 sec: 4096.3, 60 sec: 3686.4, 300 sec: 3873.8). Total num frames: 8278016. Throughput: 0: 915.9. Samples: 1065900. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:55:13,101][00684] Avg episode reward: [(0, '4.496')] +[2023-02-25 03:55:18,095][00684] Fps is (10 sec: 4504.7, 60 sec: 3822.8, 300 sec: 3887.7). Total num frames: 8302592. Throughput: 0: 977.6. Samples: 1073460. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:55:18,103][00684] Avg episode reward: [(0, '4.674')] +[2023-02-25 03:55:18,475][19689] Updated weights for policy 0, policy_version 2028 (0.0012) +[2023-02-25 03:55:23,100][00684] Fps is (10 sec: 4502.6, 60 sec: 3890.8, 300 sec: 3887.7). Total num frames: 8323072. Throughput: 0: 1024.9. Samples: 1079904. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:55:23,103][00684] Avg episode reward: [(0, '4.598')] +[2023-02-25 03:55:28,093][00684] Fps is (10 sec: 3687.1, 60 sec: 3891.5, 300 sec: 3887.7). Total num frames: 8339456. Throughput: 0: 1025.8. Samples: 1082456. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:55:28,096][00684] Avg episode reward: [(0, '4.592')] +[2023-02-25 03:55:30,018][19689] Updated weights for policy 0, policy_version 2038 (0.0023) +[2023-02-25 03:55:33,094][00684] Fps is (10 sec: 3278.8, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 8355840. Throughput: 0: 1016.8. Samples: 1087484. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:55:33,101][00684] Avg episode reward: [(0, '4.670')] +[2023-02-25 03:55:38,095][00684] Fps is (10 sec: 4095.3, 60 sec: 4027.6, 300 sec: 3887.7). Total num frames: 8380416. Throughput: 0: 990.8. Samples: 1093800. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:55:38,100][00684] Avg episode reward: [(0, '4.453')] +[2023-02-25 03:55:39,486][19689] Updated weights for policy 0, policy_version 2048 (0.0013) +[2023-02-25 03:55:43,093][00684] Fps is (10 sec: 4915.5, 60 sec: 4164.3, 300 sec: 3887.7). Total num frames: 8404992. Throughput: 0: 990.9. Samples: 1097548. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:55:43,096][00684] Avg episode reward: [(0, '4.636')] +[2023-02-25 03:55:48,093][00684] Fps is (10 sec: 4506.3, 60 sec: 4164.3, 300 sec: 3901.6). Total num frames: 8425472. Throughput: 0: 1022.4. Samples: 1104124. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:55:48,096][00684] Avg episode reward: [(0, '4.743')] +[2023-02-25 03:55:48,117][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002057_8425472.pth... +[2023-02-25 03:55:48,418][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001829_7491584.pth +[2023-02-25 03:55:49,449][19689] Updated weights for policy 0, policy_version 2058 (0.0015) +[2023-02-25 03:55:53,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 8437760. Throughput: 0: 1022.7. Samples: 1109128. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:55:53,096][00684] Avg episode reward: [(0, '4.741')] +[2023-02-25 03:55:58,095][00684] Fps is (10 sec: 2866.7, 60 sec: 3891.1, 300 sec: 3873.8). Total num frames: 8454144. Throughput: 0: 1016.5. Samples: 1111644. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:55:58,100][00684] Avg episode reward: [(0, '4.663')] +[2023-02-25 03:56:00,515][19689] Updated weights for policy 0, policy_version 2068 (0.0014) +[2023-02-25 03:56:03,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3873.9). Total num frames: 8478720. Throughput: 0: 987.4. Samples: 1117892. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:56:03,100][00684] Avg episode reward: [(0, '4.935')] +[2023-02-25 03:56:08,093][00684] Fps is (10 sec: 4916.0, 60 sec: 4096.0, 300 sec: 3887.8). Total num frames: 8503296. Throughput: 0: 1009.9. Samples: 1125344. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:56:08,096][00684] Avg episode reward: [(0, '4.788')] +[2023-02-25 03:56:09,140][19689] Updated weights for policy 0, policy_version 2078 (0.0012) +[2023-02-25 03:56:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3887.8). Total num frames: 8519680. Throughput: 0: 1018.7. Samples: 1128296. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:56:13,102][00684] Avg episode reward: [(0, '4.600')] +[2023-02-25 03:56:18,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3959.6, 300 sec: 3901.6). Total num frames: 8540160. Throughput: 0: 1019.3. Samples: 1133352. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:56:18,097][00684] Avg episode reward: [(0, '4.642')] +[2023-02-25 03:56:21,323][19689] Updated weights for policy 0, policy_version 2088 (0.0017) +[2023-02-25 03:56:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.6, 300 sec: 3873.8). Total num frames: 8556544. Throughput: 0: 989.3. Samples: 1138316. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:56:23,101][00684] Avg episode reward: [(0, '4.649')] +[2023-02-25 03:56:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3873.8). Total num frames: 8581120. Throughput: 0: 982.2. Samples: 1141748. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:56:28,102][00684] Avg episode reward: [(0, '4.508')] +[2023-02-25 03:56:30,343][19689] Updated weights for policy 0, policy_version 2098 (0.0012) +[2023-02-25 03:56:33,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3887.7). Total num frames: 8605696. Throughput: 0: 1002.8. Samples: 1149248. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:56:33,099][00684] Avg episode reward: [(0, '4.670')] +[2023-02-25 03:56:38,096][00684] Fps is (10 sec: 4094.9, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 8622080. Throughput: 0: 1014.9. Samples: 1154800. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:56:38,099][00684] Avg episode reward: [(0, '4.657')] +[2023-02-25 03:56:42,682][19689] Updated weights for policy 0, policy_version 2108 (0.0018) +[2023-02-25 03:56:43,096][00684] Fps is (10 sec: 2866.4, 60 sec: 3822.8, 300 sec: 3873.8). Total num frames: 8634368. Throughput: 0: 1003.5. Samples: 1156804. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:56:43,098][00684] Avg episode reward: [(0, '4.715')] +[2023-02-25 03:56:48,098][00684] Fps is (10 sec: 2457.1, 60 sec: 3686.1, 300 sec: 3832.1). Total num frames: 8646656. Throughput: 0: 952.4. Samples: 1160756. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:56:48,100][00684] Avg episode reward: [(0, '4.721')] +[2023-02-25 03:56:53,094][00684] Fps is (10 sec: 2867.8, 60 sec: 3754.6, 300 sec: 3804.4). Total num frames: 8663040. Throughput: 0: 874.4. Samples: 1164692. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:56:53,096][00684] Avg episode reward: [(0, '4.727')] +[2023-02-25 03:56:56,381][19689] Updated weights for policy 0, policy_version 2118 (0.0018) +[2023-02-25 03:56:58,093][00684] Fps is (10 sec: 3688.2, 60 sec: 3823.1, 300 sec: 3818.3). Total num frames: 8683520. Throughput: 0: 866.3. Samples: 1167280. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:56:58,096][00684] Avg episode reward: [(0, '4.730')] +[2023-02-25 03:57:03,093][00684] Fps is (10 sec: 4505.9, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 8708096. Throughput: 0: 922.2. Samples: 1174852. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:57:03,095][00684] Avg episode reward: [(0, '4.780')] +[2023-02-25 03:57:04,747][19689] Updated weights for policy 0, policy_version 2128 (0.0011) +[2023-02-25 03:57:08,094][00684] Fps is (10 sec: 4095.7, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 8724480. Throughput: 0: 936.5. Samples: 1180460. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:57:08,096][00684] Avg episode reward: [(0, '4.623')] +[2023-02-25 03:57:13,094][00684] Fps is (10 sec: 3276.4, 60 sec: 3686.3, 300 sec: 3873.8). Total num frames: 8740864. Throughput: 0: 914.7. Samples: 1182912. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:57:13,096][00684] Avg episode reward: [(0, '4.577')] +[2023-02-25 03:57:16,803][19689] Updated weights for policy 0, policy_version 2138 (0.0021) +[2023-02-25 03:57:18,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 3860.0). Total num frames: 8757248. Throughput: 0: 861.5. Samples: 1188016. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:57:18,098][00684] Avg episode reward: [(0, '4.682')] +[2023-02-25 03:57:23,093][00684] Fps is (10 sec: 4506.2, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 8785920. Throughput: 0: 899.3. Samples: 1195268. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:57:23,096][00684] Avg episode reward: [(0, '4.758')] +[2023-02-25 03:57:25,078][19689] Updated weights for policy 0, policy_version 2148 (0.0020) +[2023-02-25 03:57:28,096][00684] Fps is (10 sec: 5323.5, 60 sec: 3822.8, 300 sec: 3901.7). Total num frames: 8810496. Throughput: 0: 938.6. Samples: 1199040. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:57:28,098][00684] Avg episode reward: [(0, '4.666')] +[2023-02-25 03:57:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3901.7). Total num frames: 8826880. Throughput: 0: 975.6. Samples: 1204652. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:57:33,096][00684] Avg episode reward: [(0, '4.784')] +[2023-02-25 03:57:37,338][19689] Updated weights for policy 0, policy_version 2158 (0.0012) +[2023-02-25 03:57:38,093][00684] Fps is (10 sec: 3277.7, 60 sec: 3686.6, 300 sec: 3901.6). Total num frames: 8843264. Throughput: 0: 999.6. Samples: 1209672. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 03:57:38,102][00684] Avg episode reward: [(0, '4.998')] +[2023-02-25 03:57:43,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3873.8). Total num frames: 8859648. Throughput: 0: 995.8. Samples: 1212092. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 03:57:43,107][00684] Avg episode reward: [(0, '4.780')] +[2023-02-25 03:57:46,710][19689] Updated weights for policy 0, policy_version 2168 (0.0017) +[2023-02-25 03:57:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3959.8, 300 sec: 3873.9). Total num frames: 8884224. Throughput: 0: 987.5. Samples: 1219288. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 03:57:48,096][00684] Avg episode reward: [(0, '4.552')] +[2023-02-25 03:57:48,108][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002169_8884224.pth... +[2023-02-25 03:57:48,272][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001943_7958528.pth +[2023-02-25 03:57:53,096][00684] Fps is (10 sec: 4913.9, 60 sec: 4095.9, 300 sec: 3901.6). Total num frames: 8908800. Throughput: 0: 1011.0. Samples: 1225956. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:57:53,099][00684] Avg episode reward: [(0, '4.404')] +[2023-02-25 03:57:56,620][19689] Updated weights for policy 0, policy_version 2178 (0.0012) +[2023-02-25 03:57:58,094][00684] Fps is (10 sec: 4095.9, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 8925184. Throughput: 0: 1013.5. Samples: 1228520. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:57:58,099][00684] Avg episode reward: [(0, '4.502')] +[2023-02-25 03:58:03,093][00684] Fps is (10 sec: 2867.9, 60 sec: 3822.9, 300 sec: 3873.8). Total num frames: 8937472. Throughput: 0: 1014.0. Samples: 1233644. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:58:03,100][00684] Avg episode reward: [(0, '4.680')] +[2023-02-25 03:58:07,240][19689] Updated weights for policy 0, policy_version 2188 (0.0018) +[2023-02-25 03:58:08,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 8962048. Throughput: 0: 986.8. Samples: 1239676. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:58:08,096][00684] Avg episode reward: [(0, '4.714')] +[2023-02-25 03:58:13,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4096.1, 300 sec: 3887.7). Total num frames: 8986624. Throughput: 0: 987.2. Samples: 1243460. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:58:13,095][00684] Avg episode reward: [(0, '4.728')] +[2023-02-25 03:58:15,997][19689] Updated weights for policy 0, policy_version 2198 (0.0011) +[2023-02-25 03:58:18,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4164.3, 300 sec: 3901.6). Total num frames: 9007104. Throughput: 0: 1016.1. Samples: 1250376. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:58:18,098][00684] Avg episode reward: [(0, '4.758')] +[2023-02-25 03:58:23,100][00684] Fps is (10 sec: 3683.9, 60 sec: 3959.0, 300 sec: 3887.6). Total num frames: 9023488. Throughput: 0: 1018.1. Samples: 1255492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:58:23,106][00684] Avg episode reward: [(0, '4.673')] +[2023-02-25 03:58:28,094][00684] Fps is (10 sec: 3686.1, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 9043968. Throughput: 0: 1021.8. Samples: 1258076. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:58:28,103][00684] Avg episode reward: [(0, '4.750')] +[2023-02-25 03:58:28,092][19689] Updated weights for policy 0, policy_version 2208 (0.0017) +[2023-02-25 03:58:33,093][00684] Fps is (10 sec: 4098.8, 60 sec: 3959.5, 300 sec: 3887.8). Total num frames: 9064448. Throughput: 0: 993.9. Samples: 1264012. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 03:58:33,101][00684] Avg episode reward: [(0, '4.943')] +[2023-02-25 03:58:36,599][19689] Updated weights for policy 0, policy_version 2218 (0.0012) +[2023-02-25 03:58:38,093][00684] Fps is (10 sec: 4506.0, 60 sec: 4096.0, 300 sec: 3887.8). Total num frames: 9089024. Throughput: 0: 1011.9. Samples: 1271488. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:58:38,096][00684] Avg episode reward: [(0, '5.153')] +[2023-02-25 03:58:43,096][00684] Fps is (10 sec: 4504.2, 60 sec: 4164.1, 300 sec: 3915.5). Total num frames: 9109504. Throughput: 0: 1025.8. Samples: 1274684. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:58:43,102][00684] Avg episode reward: [(0, '5.077')] +[2023-02-25 03:58:47,845][19689] Updated weights for policy 0, policy_version 2228 (0.0017) +[2023-02-25 03:58:48,095][00684] Fps is (10 sec: 3685.7, 60 sec: 4027.6, 300 sec: 3901.6). Total num frames: 9125888. Throughput: 0: 1025.5. Samples: 1279792. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:58:48,100][00684] Avg episode reward: [(0, '5.023')] +[2023-02-25 03:58:53,098][00684] Fps is (10 sec: 3276.3, 60 sec: 3891.1, 300 sec: 3887.7). Total num frames: 9142272. Throughput: 0: 1004.5. Samples: 1284884. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:58:53,100][00684] Avg episode reward: [(0, '4.965')] +[2023-02-25 03:58:58,093][00684] Fps is (10 sec: 3277.4, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 9158656. Throughput: 0: 970.0. Samples: 1287112. Policy #0 lag: (min: 0.0, avg: 1.3, max: 2.0) +[2023-02-25 03:58:58,100][00684] Avg episode reward: [(0, '4.961')] +[2023-02-25 03:59:00,052][19689] Updated weights for policy 0, policy_version 2238 (0.0012) +[2023-02-25 03:59:03,093][00684] Fps is (10 sec: 3278.3, 60 sec: 3959.5, 300 sec: 3846.1). Total num frames: 9175040. Throughput: 0: 929.0. Samples: 1292180. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:59:03,099][00684] Avg episode reward: [(0, '4.791')] +[2023-02-25 03:59:08,100][00684] Fps is (10 sec: 2865.3, 60 sec: 3754.3, 300 sec: 3832.1). Total num frames: 9187328. Throughput: 0: 913.6. Samples: 1296604. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:59:08,114][00684] Avg episode reward: [(0, '4.948')] +[2023-02-25 03:59:12,962][19689] Updated weights for policy 0, policy_version 2248 (0.0013) +[2023-02-25 03:59:13,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3686.4, 300 sec: 3846.1). Total num frames: 9207808. Throughput: 0: 912.5. Samples: 1299136. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:59:13,096][00684] Avg episode reward: [(0, '4.828')] +[2023-02-25 03:59:18,093][00684] Fps is (10 sec: 3688.9, 60 sec: 3618.1, 300 sec: 3846.1). Total num frames: 9224192. Throughput: 0: 892.5. Samples: 1304176. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 03:59:18,103][00684] Avg episode reward: [(0, '4.651')] +[2023-02-25 03:59:23,084][19689] Updated weights for policy 0, policy_version 2258 (0.0013) +[2023-02-25 03:59:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3755.1, 300 sec: 3873.9). Total num frames: 9248768. Throughput: 0: 867.0. Samples: 1310504. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 03:59:23,100][00684] Avg episode reward: [(0, '4.589')] +[2023-02-25 03:59:28,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3823.0, 300 sec: 3901.6). Total num frames: 9273344. Throughput: 0: 880.5. Samples: 1314304. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:59:28,097][00684] Avg episode reward: [(0, '4.791')] +[2023-02-25 03:59:32,087][19689] Updated weights for policy 0, policy_version 2268 (0.0012) +[2023-02-25 03:59:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 9289728. Throughput: 0: 918.2. Samples: 1321108. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:59:33,099][00684] Avg episode reward: [(0, '4.865')] +[2023-02-25 03:59:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3901.6). Total num frames: 9306112. Throughput: 0: 917.5. Samples: 1326168. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 03:59:38,096][00684] Avg episode reward: [(0, '4.825')] +[2023-02-25 03:59:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3618.3, 300 sec: 3901.6). Total num frames: 9326592. Throughput: 0: 923.9. Samples: 1328688. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:59:43,102][00684] Avg episode reward: [(0, '4.961')] +[2023-02-25 03:59:43,951][19689] Updated weights for policy 0, policy_version 2278 (0.0015) +[2023-02-25 03:59:48,094][00684] Fps is (10 sec: 4505.5, 60 sec: 3754.8, 300 sec: 3901.6). Total num frames: 9351168. Throughput: 0: 951.4. Samples: 1334992. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 03:59:48,104][00684] Avg episode reward: [(0, '4.837')] +[2023-02-25 03:59:48,116][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002283_9351168.pth... +[2023-02-25 03:59:48,285][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002057_8425472.pth +[2023-02-25 03:59:52,238][19689] Updated weights for policy 0, policy_version 2288 (0.0013) +[2023-02-25 03:59:53,093][00684] Fps is (10 sec: 4505.7, 60 sec: 3823.2, 300 sec: 3901.6). Total num frames: 9371648. Throughput: 0: 1018.1. Samples: 1342412. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 03:59:53,096][00684] Avg episode reward: [(0, '4.614')] +[2023-02-25 03:59:58,093][00684] Fps is (10 sec: 4096.1, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 9392128. Throughput: 0: 1029.6. Samples: 1345468. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 03:59:58,095][00684] Avg episode reward: [(0, '4.587')] +[2023-02-25 04:00:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 9408512. Throughput: 0: 1032.8. Samples: 1350652. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:00:03,097][00684] Avg episode reward: [(0, '4.792')] +[2023-02-25 04:00:03,784][19689] Updated weights for policy 0, policy_version 2298 (0.0024) +[2023-02-25 04:00:08,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.9, 300 sec: 3887.7). Total num frames: 9424896. Throughput: 0: 1008.0. Samples: 1355864. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:00:08,095][00684] Avg episode reward: [(0, '4.938')] +[2023-02-25 04:00:12,889][19689] Updated weights for policy 0, policy_version 2308 (0.0016) +[2023-02-25 04:00:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 9453568. Throughput: 0: 1002.2. Samples: 1359404. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:00:13,096][00684] Avg episode reward: [(0, '4.821')] +[2023-02-25 04:00:18,097][00684] Fps is (10 sec: 4913.3, 60 sec: 4164.0, 300 sec: 3901.7). Total num frames: 9474048. Throughput: 0: 1016.3. Samples: 1366844. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:00:18,100][00684] Avg episode reward: [(0, '4.656')] +[2023-02-25 04:00:22,840][19689] Updated weights for policy 0, policy_version 2318 (0.0011) +[2023-02-25 04:00:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 9494528. Throughput: 0: 1028.0. Samples: 1372428. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:00:23,096][00684] Avg episode reward: [(0, '4.881')] +[2023-02-25 04:00:28,098][00684] Fps is (10 sec: 3686.2, 60 sec: 3959.2, 300 sec: 3915.4). Total num frames: 9510912. Throughput: 0: 1028.3. Samples: 1374968. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:00:28,100][00684] Avg episode reward: [(0, '5.003')] +[2023-02-25 04:00:33,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3887.8). Total num frames: 9527296. Throughput: 0: 1003.7. Samples: 1380156. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:00:33,096][00684] Avg episode reward: [(0, '4.869')] +[2023-02-25 04:00:33,956][19689] Updated weights for policy 0, policy_version 2328 (0.0016) +[2023-02-25 04:00:38,093][00684] Fps is (10 sec: 4097.8, 60 sec: 4096.0, 300 sec: 3887.7). Total num frames: 9551872. Throughput: 0: 998.9. Samples: 1387364. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:00:38,096][00684] Avg episode reward: [(0, '4.693')] +[2023-02-25 04:00:42,050][19689] Updated weights for policy 0, policy_version 2338 (0.0012) +[2023-02-25 04:00:43,098][00684] Fps is (10 sec: 4912.8, 60 sec: 4163.9, 300 sec: 3901.6). Total num frames: 9576448. Throughput: 0: 1015.0. Samples: 1391148. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:00:43,101][00684] Avg episode reward: [(0, '4.729')] +[2023-02-25 04:00:48,100][00684] Fps is (10 sec: 4093.2, 60 sec: 4027.3, 300 sec: 3915.4). Total num frames: 9592832. Throughput: 0: 1029.8. Samples: 1397000. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:00:48,108][00684] Avg episode reward: [(0, '5.001')] +[2023-02-25 04:00:53,093][00684] Fps is (10 sec: 3278.4, 60 sec: 3959.5, 300 sec: 3915.5). Total num frames: 9609216. Throughput: 0: 1025.2. Samples: 1401996. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:00:53,100][00684] Avg episode reward: [(0, '4.829')] +[2023-02-25 04:00:54,261][19689] Updated weights for policy 0, policy_version 2348 (0.0015) +[2023-02-25 04:00:58,093][00684] Fps is (10 sec: 3688.9, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 9629696. Throughput: 0: 1005.4. Samples: 1404648. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:00:58,096][00684] Avg episode reward: [(0, '4.645')] +[2023-02-25 04:01:03,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 9654272. Throughput: 0: 998.6. Samples: 1411776. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:01:03,102][00684] Avg episode reward: [(0, '4.696')] +[2023-02-25 04:01:03,158][19689] Updated weights for policy 0, policy_version 2358 (0.0019) +[2023-02-25 04:01:08,094][00684] Fps is (10 sec: 4914.8, 60 sec: 4232.5, 300 sec: 3929.4). Total num frames: 9678848. Throughput: 0: 1035.8. Samples: 1419040. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:01:08,097][00684] Avg episode reward: [(0, '4.605')] +[2023-02-25 04:01:13,096][00684] Fps is (10 sec: 3685.4, 60 sec: 3959.3, 300 sec: 3901.6). Total num frames: 9691136. Throughput: 0: 1023.5. Samples: 1421024. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:01:13,098][00684] Avg episode reward: [(0, '4.767')] +[2023-02-25 04:01:15,443][19689] Updated weights for policy 0, policy_version 2368 (0.0023) +[2023-02-25 04:01:18,093][00684] Fps is (10 sec: 2457.8, 60 sec: 3823.2, 300 sec: 3887.7). Total num frames: 9703424. Throughput: 0: 995.1. Samples: 1424936. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:01:18,101][00684] Avg episode reward: [(0, '4.679')] +[2023-02-25 04:01:23,096][00684] Fps is (10 sec: 2457.6, 60 sec: 3686.2, 300 sec: 3846.0). Total num frames: 9715712. Throughput: 0: 920.8. Samples: 1428804. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:01:23,099][00684] Avg episode reward: [(0, '4.553')] +[2023-02-25 04:01:28,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.7, 300 sec: 3818.3). Total num frames: 9732096. Throughput: 0: 882.0. Samples: 1430832. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:01:28,096][00684] Avg episode reward: [(0, '4.649')] +[2023-02-25 04:01:29,159][19689] Updated weights for policy 0, policy_version 2378 (0.0030) +[2023-02-25 04:01:33,093][00684] Fps is (10 sec: 4097.1, 60 sec: 3822.9, 300 sec: 3846.1). Total num frames: 9756672. Throughput: 0: 892.8. Samples: 1437168. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:01:33,098][00684] Avg episode reward: [(0, '4.866')] +[2023-02-25 04:01:37,541][19689] Updated weights for policy 0, policy_version 2388 (0.0012) +[2023-02-25 04:01:38,103][00684] Fps is (10 sec: 4910.2, 60 sec: 3822.3, 300 sec: 3887.6). Total num frames: 9781248. Throughput: 0: 946.5. Samples: 1444600. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:01:38,106][00684] Avg episode reward: [(0, '4.866')] +[2023-02-25 04:01:43,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3686.7, 300 sec: 3901.7). Total num frames: 9797632. Throughput: 0: 945.3. Samples: 1447188. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:01:43,096][00684] Avg episode reward: [(0, '4.781')] +[2023-02-25 04:01:48,093][00684] Fps is (10 sec: 3280.2, 60 sec: 3686.8, 300 sec: 3901.6). Total num frames: 9814016. Throughput: 0: 899.3. Samples: 1452244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:01:48,096][00684] Avg episode reward: [(0, '4.766')] +[2023-02-25 04:01:48,112][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002396_9814016.pth... +[2023-02-25 04:01:48,326][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002169_8884224.pth +[2023-02-25 04:01:49,926][19689] Updated weights for policy 0, policy_version 2398 (0.0013) +[2023-02-25 04:01:53,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 9834496. Throughput: 0: 853.1. Samples: 1457428. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:01:53,096][00684] Avg episode reward: [(0, '4.715')] +[2023-02-25 04:01:58,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 9859072. Throughput: 0: 891.8. Samples: 1461152. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:01:58,096][00684] Avg episode reward: [(0, '4.727')] +[2023-02-25 04:01:58,557][19689] Updated weights for policy 0, policy_version 2408 (0.0013) +[2023-02-25 04:02:03,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 9883648. Throughput: 0: 971.7. Samples: 1468664. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:02:03,102][00684] Avg episode reward: [(0, '4.824')] +[2023-02-25 04:02:08,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3929.4). Total num frames: 9900032. Throughput: 0: 1005.0. Samples: 1474028. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:02:08,098][00684] Avg episode reward: [(0, '4.703')] +[2023-02-25 04:02:09,232][19689] Updated weights for policy 0, policy_version 2418 (0.0014) +[2023-02-25 04:02:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.8, 300 sec: 3929.4). Total num frames: 9916416. Throughput: 0: 1014.9. Samples: 1476504. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:13,096][00684] Avg episode reward: [(0, '4.733')] +[2023-02-25 04:02:18,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 9936896. Throughput: 0: 989.7. Samples: 1481704. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:18,096][00684] Avg episode reward: [(0, '4.880')] +[2023-02-25 04:02:19,563][19689] Updated weights for policy 0, policy_version 2428 (0.0014) +[2023-02-25 04:02:23,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.2, 300 sec: 3901.7). Total num frames: 9961472. Throughput: 0: 988.8. Samples: 1489084. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:02:23,095][00684] Avg episode reward: [(0, '4.391')] +[2023-02-25 04:02:28,093][00684] Fps is (10 sec: 4505.5, 60 sec: 4164.3, 300 sec: 3915.5). Total num frames: 9981952. Throughput: 0: 1015.7. Samples: 1492896. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:02:28,101][00684] Avg episode reward: [(0, '4.512')] +[2023-02-25 04:02:28,380][19689] Updated weights for policy 0, policy_version 2438 (0.0012) +[2023-02-25 04:02:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 9998336. Throughput: 0: 1023.7. Samples: 1498312. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:33,099][00684] Avg episode reward: [(0, '4.768')] +[2023-02-25 04:02:38,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3960.1, 300 sec: 3929.4). Total num frames: 10018816. Throughput: 0: 1022.4. Samples: 1503436. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:38,101][00684] Avg episode reward: [(0, '4.746')] +[2023-02-25 04:02:40,224][19689] Updated weights for policy 0, policy_version 2448 (0.0037) +[2023-02-25 04:02:43,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 10039296. Throughput: 0: 997.6. Samples: 1506044. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:43,096][00684] Avg episode reward: [(0, '4.838')] +[2023-02-25 04:02:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 10059776. Throughput: 0: 998.8. Samples: 1513612. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:02:48,095][00684] Avg episode reward: [(0, '4.893')] +[2023-02-25 04:02:48,522][19689] Updated weights for policy 0, policy_version 2458 (0.0012) +[2023-02-25 04:02:53,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 10084352. Throughput: 0: 1027.3. Samples: 1520256. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:53,098][00684] Avg episode reward: [(0, '4.734')] +[2023-02-25 04:02:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 10100736. Throughput: 0: 1028.5. Samples: 1522788. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:02:58,097][00684] Avg episode reward: [(0, '4.602')] +[2023-02-25 04:03:00,307][19689] Updated weights for policy 0, policy_version 2468 (0.0026) +[2023-02-25 04:03:03,094][00684] Fps is (10 sec: 3276.6, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 10117120. Throughput: 0: 1023.3. Samples: 1527752. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:03,100][00684] Avg episode reward: [(0, '4.518')] +[2023-02-25 04:03:08,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 10141696. Throughput: 0: 997.5. Samples: 1533972. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:03:08,096][00684] Avg episode reward: [(0, '4.687')] +[2023-02-25 04:03:09,531][19689] Updated weights for policy 0, policy_version 2478 (0.0012) +[2023-02-25 04:03:13,093][00684] Fps is (10 sec: 4915.5, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 10166272. Throughput: 0: 994.7. Samples: 1537656. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:03:13,096][00684] Avg episode reward: [(0, '4.782')] +[2023-02-25 04:03:18,096][00684] Fps is (10 sec: 4095.1, 60 sec: 4095.8, 300 sec: 3929.4). Total num frames: 10182656. Throughput: 0: 1023.4. Samples: 1544368. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:03:18,098][00684] Avg episode reward: [(0, '4.788')] +[2023-02-25 04:03:19,375][19689] Updated weights for policy 0, policy_version 2488 (0.0012) +[2023-02-25 04:03:23,100][00684] Fps is (10 sec: 3274.6, 60 sec: 3959.0, 300 sec: 3915.4). Total num frames: 10199040. Throughput: 0: 1022.2. Samples: 1549440. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:23,110][00684] Avg episode reward: [(0, '4.692')] +[2023-02-25 04:03:28,093][00684] Fps is (10 sec: 3277.6, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 10215424. Throughput: 0: 1009.6. Samples: 1551476. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:28,096][00684] Avg episode reward: [(0, '4.784')] +[2023-02-25 04:03:33,095][00684] Fps is (10 sec: 2868.7, 60 sec: 3822.8, 300 sec: 3859.9). Total num frames: 10227712. Throughput: 0: 929.7. Samples: 1555452. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:33,099][00684] Avg episode reward: [(0, '4.860')] +[2023-02-25 04:03:33,502][19689] Updated weights for policy 0, policy_version 2498 (0.0024) +[2023-02-25 04:03:38,094][00684] Fps is (10 sec: 2867.0, 60 sec: 3754.6, 300 sec: 3846.1). Total num frames: 10244096. Throughput: 0: 894.7. Samples: 1560516. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:38,097][00684] Avg episode reward: [(0, '4.949')] +[2023-02-25 04:03:43,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 10264576. Throughput: 0: 903.2. Samples: 1563432. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:43,097][00684] Avg episode reward: [(0, '5.060')] +[2023-02-25 04:03:44,565][19689] Updated weights for policy 0, policy_version 2508 (0.0011) +[2023-02-25 04:03:48,093][00684] Fps is (10 sec: 3686.6, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 10280960. Throughput: 0: 918.0. Samples: 1569060. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:03:48,100][00684] Avg episode reward: [(0, '4.940')] +[2023-02-25 04:03:48,122][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002510_10280960.pth... +[2023-02-25 04:03:48,318][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002283_9351168.pth +[2023-02-25 04:03:53,094][00684] Fps is (10 sec: 3685.9, 60 sec: 3618.1, 300 sec: 3873.8). Total num frames: 10301440. Throughput: 0: 890.9. Samples: 1574064. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:03:53,100][00684] Avg episode reward: [(0, '4.753')] +[2023-02-25 04:03:56,540][19689] Updated weights for policy 0, policy_version 2518 (0.0013) +[2023-02-25 04:03:58,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3873.8). Total num frames: 10317824. Throughput: 0: 866.7. Samples: 1576656. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:03:58,096][00684] Avg episode reward: [(0, '4.646')] +[2023-02-25 04:04:03,093][00684] Fps is (10 sec: 4096.5, 60 sec: 3754.7, 300 sec: 3915.6). Total num frames: 10342400. Throughput: 0: 879.2. Samples: 1583932. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:04:03,099][00684] Avg episode reward: [(0, '4.482')] +[2023-02-25 04:04:04,782][19689] Updated weights for policy 0, policy_version 2528 (0.0017) +[2023-02-25 04:04:08,093][00684] Fps is (10 sec: 4915.3, 60 sec: 3754.7, 300 sec: 3929.4). Total num frames: 10366976. Throughput: 0: 921.2. Samples: 1590888. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:04:08,096][00684] Avg episode reward: [(0, '4.418')] +[2023-02-25 04:04:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3618.1, 300 sec: 3929.4). Total num frames: 10383360. Throughput: 0: 932.2. Samples: 1593424. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:04:13,096][00684] Avg episode reward: [(0, '4.454')] +[2023-02-25 04:04:15,971][19689] Updated weights for policy 0, policy_version 2538 (0.0034) +[2023-02-25 04:04:18,095][00684] Fps is (10 sec: 3276.3, 60 sec: 3618.2, 300 sec: 3901.6). Total num frames: 10399744. Throughput: 0: 957.7. Samples: 1598548. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:04:18,101][00684] Avg episode reward: [(0, '4.561')] +[2023-02-25 04:04:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3755.1, 300 sec: 3901.6). Total num frames: 10424320. Throughput: 0: 976.8. Samples: 1604472. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:04:23,096][00684] Avg episode reward: [(0, '4.679')] +[2023-02-25 04:04:25,793][19689] Updated weights for policy 0, policy_version 2548 (0.0012) +[2023-02-25 04:04:28,093][00684] Fps is (10 sec: 4915.9, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 10448896. Throughput: 0: 995.6. Samples: 1608232. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:04:28,102][00684] Avg episode reward: [(0, '4.640')] +[2023-02-25 04:04:33,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3959.6, 300 sec: 3929.4). Total num frames: 10465280. Throughput: 0: 1028.0. Samples: 1615320. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:04:33,097][00684] Avg episode reward: [(0, '4.806')] +[2023-02-25 04:04:35,762][19689] Updated weights for policy 0, policy_version 2558 (0.0012) +[2023-02-25 04:04:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.8, 300 sec: 3929.4). Total num frames: 10485760. Throughput: 0: 1027.5. Samples: 1620300. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:04:38,096][00684] Avg episode reward: [(0, '5.007')] +[2023-02-25 04:04:43,094][00684] Fps is (10 sec: 3686.2, 60 sec: 3959.4, 300 sec: 3901.6). Total num frames: 10502144. Throughput: 0: 1026.0. Samples: 1622828. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:04:43,100][00684] Avg episode reward: [(0, '4.893')] +[2023-02-25 04:04:46,814][19689] Updated weights for policy 0, policy_version 2568 (0.0022) +[2023-02-25 04:04:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 10522624. Throughput: 0: 992.2. Samples: 1628580. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:04:48,096][00684] Avg episode reward: [(0, '4.654')] +[2023-02-25 04:04:53,093][00684] Fps is (10 sec: 4505.9, 60 sec: 4096.1, 300 sec: 3915.5). Total num frames: 10547200. Throughput: 0: 1003.3. Samples: 1636036. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:04:53,096][00684] Avg episode reward: [(0, '4.648')] +[2023-02-25 04:04:54,973][19689] Updated weights for policy 0, policy_version 2578 (0.0012) +[2023-02-25 04:04:58,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 10567680. Throughput: 0: 1025.4. Samples: 1639568. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:04:58,098][00684] Avg episode reward: [(0, '4.550')] +[2023-02-25 04:05:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 10584064. Throughput: 0: 1025.5. Samples: 1644696. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:05:03,097][00684] Avg episode reward: [(0, '4.394')] +[2023-02-25 04:05:06,916][19689] Updated weights for policy 0, policy_version 2588 (0.0011) +[2023-02-25 04:05:08,094][00684] Fps is (10 sec: 3276.6, 60 sec: 3891.1, 300 sec: 3887.7). Total num frames: 10600448. Throughput: 0: 1008.2. Samples: 1649844. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:05:08,100][00684] Avg episode reward: [(0, '4.460')] +[2023-02-25 04:05:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3901.7). Total num frames: 10625024. Throughput: 0: 991.9. Samples: 1652868. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:05:13,096][00684] Avg episode reward: [(0, '4.481')] +[2023-02-25 04:05:15,772][19689] Updated weights for policy 0, policy_version 2598 (0.0018) +[2023-02-25 04:05:18,094][00684] Fps is (10 sec: 4915.4, 60 sec: 4164.3, 300 sec: 3915.5). Total num frames: 10649600. Throughput: 0: 1002.6. Samples: 1660436. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:18,102][00684] Avg episode reward: [(0, '4.680')] +[2023-02-25 04:05:23,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3929.4). Total num frames: 10670080. Throughput: 0: 1025.8. Samples: 1666460. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:23,099][00684] Avg episode reward: [(0, '4.735')] +[2023-02-25 04:05:26,496][19689] Updated weights for policy 0, policy_version 2608 (0.0027) +[2023-02-25 04:05:28,093][00684] Fps is (10 sec: 3686.6, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 10686464. Throughput: 0: 1024.4. Samples: 1668924. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:05:28,099][00684] Avg episode reward: [(0, '4.603')] +[2023-02-25 04:05:33,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 10702848. Throughput: 0: 1009.2. Samples: 1673996. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:05:33,096][00684] Avg episode reward: [(0, '4.689')] +[2023-02-25 04:05:37,082][19689] Updated weights for policy 0, policy_version 2618 (0.0019) +[2023-02-25 04:05:38,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3901.7). Total num frames: 10727424. Throughput: 0: 993.0. Samples: 1680720. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:38,099][00684] Avg episode reward: [(0, '5.073')] +[2023-02-25 04:05:43,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3901.7). Total num frames: 10743808. Throughput: 0: 981.7. Samples: 1683744. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:05:43,098][00684] Avg episode reward: [(0, '4.971')] +[2023-02-25 04:05:48,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 10760192. Throughput: 0: 968.9. Samples: 1688296. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:48,097][00684] Avg episode reward: [(0, '4.878')] +[2023-02-25 04:05:48,112][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002627_10760192.pth... +[2023-02-25 04:05:48,429][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002396_9814016.pth +[2023-02-25 04:05:49,946][19689] Updated weights for policy 0, policy_version 2628 (0.0012) +[2023-02-25 04:05:53,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3873.8). Total num frames: 10772480. Throughput: 0: 939.5. Samples: 1692120. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:53,096][00684] Avg episode reward: [(0, '4.724')] +[2023-02-25 04:05:58,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3618.1, 300 sec: 3832.2). Total num frames: 10784768. Throughput: 0: 915.9. Samples: 1694084. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:05:58,097][00684] Avg episode reward: [(0, '4.757')] +[2023-02-25 04:06:03,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3618.1, 300 sec: 3804.4). Total num frames: 10801152. Throughput: 0: 856.1. Samples: 1698960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:06:03,102][00684] Avg episode reward: [(0, '4.694')] +[2023-02-25 04:06:03,153][19689] Updated weights for policy 0, policy_version 2638 (0.0014) +[2023-02-25 04:06:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3823.0, 300 sec: 3860.0). Total num frames: 10829824. Throughput: 0: 883.9. Samples: 1706236. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:06:08,096][00684] Avg episode reward: [(0, '4.695')] +[2023-02-25 04:06:11,181][19689] Updated weights for policy 0, policy_version 2648 (0.0012) +[2023-02-25 04:06:13,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3887.7). Total num frames: 10850304. Throughput: 0: 911.9. Samples: 1709960. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:06:13,096][00684] Avg episode reward: [(0, '5.080')] +[2023-02-25 04:06:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3915.5). Total num frames: 10870784. Throughput: 0: 929.8. Samples: 1715836. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:06:18,096][00684] Avg episode reward: [(0, '5.007')] +[2023-02-25 04:06:22,930][19689] Updated weights for policy 0, policy_version 2658 (0.0011) +[2023-02-25 04:06:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3915.5). Total num frames: 10887168. Throughput: 0: 891.3. Samples: 1720828. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:06:23,099][00684] Avg episode reward: [(0, '4.727')] +[2023-02-25 04:06:28,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3887.7). Total num frames: 10903552. Throughput: 0: 881.2. Samples: 1723400. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:06:28,098][00684] Avg episode reward: [(0, '4.569')] +[2023-02-25 04:06:32,292][19689] Updated weights for policy 0, policy_version 2668 (0.0012) +[2023-02-25 04:06:33,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3901.7). Total num frames: 10932224. Throughput: 0: 937.3. Samples: 1730476. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:06:33,096][00684] Avg episode reward: [(0, '4.541')] +[2023-02-25 04:06:38,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3754.7, 300 sec: 3915.5). Total num frames: 10952704. Throughput: 0: 1013.3. Samples: 1737720. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:06:38,100][00684] Avg episode reward: [(0, '4.618')] +[2023-02-25 04:06:42,586][19689] Updated weights for policy 0, policy_version 2678 (0.0027) +[2023-02-25 04:06:43,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3822.9, 300 sec: 3929.4). Total num frames: 10973184. Throughput: 0: 1023.8. Samples: 1740156. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:06:43,096][00684] Avg episode reward: [(0, '4.702')] +[2023-02-25 04:06:48,095][00684] Fps is (10 sec: 3685.9, 60 sec: 3822.8, 300 sec: 3915.5). Total num frames: 10989568. Throughput: 0: 1026.7. Samples: 1745164. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:06:48,099][00684] Avg episode reward: [(0, '4.652')] +[2023-02-25 04:06:53,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 11005952. Throughput: 0: 990.7. Samples: 1750816. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:06:53,096][00684] Avg episode reward: [(0, '4.588')] +[2023-02-25 04:06:53,391][19689] Updated weights for policy 0, policy_version 2688 (0.0013) +[2023-02-25 04:06:58,093][00684] Fps is (10 sec: 4096.6, 60 sec: 4096.0, 300 sec: 3887.7). Total num frames: 11030528. Throughput: 0: 989.6. Samples: 1754492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:06:58,096][00684] Avg episode reward: [(0, '4.657')] +[2023-02-25 04:07:01,892][19689] Updated weights for policy 0, policy_version 2698 (0.0019) +[2023-02-25 04:07:03,097][00684] Fps is (10 sec: 4913.4, 60 sec: 4232.3, 300 sec: 3915.5). Total num frames: 11055104. Throughput: 0: 1022.2. Samples: 1761840. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:07:03,102][00684] Avg episode reward: [(0, '4.718')] +[2023-02-25 04:07:08,094][00684] Fps is (10 sec: 4095.5, 60 sec: 4027.6, 300 sec: 3915.5). Total num frames: 11071488. Throughput: 0: 1023.4. Samples: 1766884. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:07:08,096][00684] Avg episode reward: [(0, '4.433')] +[2023-02-25 04:07:13,093][00684] Fps is (10 sec: 3278.0, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 11087872. Throughput: 0: 1021.9. Samples: 1769384. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:07:13,096][00684] Avg episode reward: [(0, '4.505')] +[2023-02-25 04:07:13,568][19689] Updated weights for policy 0, policy_version 2708 (0.0017) +[2023-02-25 04:07:18,093][00684] Fps is (10 sec: 3686.9, 60 sec: 3959.5, 300 sec: 3887.7). Total num frames: 11108352. Throughput: 0: 987.2. Samples: 1774900. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:07:18,096][00684] Avg episode reward: [(0, '4.816')] +[2023-02-25 04:07:22,528][19689] Updated weights for policy 0, policy_version 2718 (0.0012) +[2023-02-25 04:07:23,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 11132928. Throughput: 0: 990.2. Samples: 1782280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:07:23,101][00684] Avg episode reward: [(0, '4.760')] +[2023-02-25 04:07:28,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3915.5). Total num frames: 11153408. Throughput: 0: 1021.0. Samples: 1786100. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:07:28,102][00684] Avg episode reward: [(0, '4.569')] +[2023-02-25 04:07:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 11169792. Throughput: 0: 1020.7. Samples: 1791096. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:07:33,096][00684] Avg episode reward: [(0, '4.664')] +[2023-02-25 04:07:33,212][19689] Updated weights for policy 0, policy_version 2728 (0.0011) +[2023-02-25 04:07:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3887.7). Total num frames: 11186176. Throughput: 0: 1009.2. Samples: 1796232. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:07:38,101][00684] Avg episode reward: [(0, '4.691')] +[2023-02-25 04:07:43,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 11210752. Throughput: 0: 992.0. Samples: 1799132. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:07:43,095][00684] Avg episode reward: [(0, '4.968')] +[2023-02-25 04:07:43,477][19689] Updated weights for policy 0, policy_version 2738 (0.0012) +[2023-02-25 04:07:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4096.1, 300 sec: 3901.6). Total num frames: 11235328. Throughput: 0: 996.1. Samples: 1806660. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:07:48,096][00684] Avg episode reward: [(0, '4.831')] +[2023-02-25 04:07:48,115][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002743_11235328.pth... +[2023-02-25 04:07:48,273][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002510_10280960.pth +[2023-02-25 04:07:52,734][19689] Updated weights for policy 0, policy_version 2748 (0.0014) +[2023-02-25 04:07:53,095][00684] Fps is (10 sec: 4504.9, 60 sec: 4164.1, 300 sec: 3915.5). Total num frames: 11255808. Throughput: 0: 1023.5. Samples: 1812944. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:07:53,098][00684] Avg episode reward: [(0, '4.708')] +[2023-02-25 04:07:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 11268096. Throughput: 0: 1026.9. Samples: 1815596. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:07:58,101][00684] Avg episode reward: [(0, '4.745')] +[2023-02-25 04:08:03,093][00684] Fps is (10 sec: 2867.8, 60 sec: 3823.2, 300 sec: 3873.8). Total num frames: 11284480. Throughput: 0: 994.5. Samples: 1819652. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:08:03,102][00684] Avg episode reward: [(0, '4.774')] +[2023-02-25 04:08:07,441][19689] Updated weights for policy 0, policy_version 2758 (0.0014) +[2023-02-25 04:08:08,098][00684] Fps is (10 sec: 2865.8, 60 sec: 3754.4, 300 sec: 3832.1). Total num frames: 11296768. Throughput: 0: 917.4. Samples: 1823568. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:08:08,101][00684] Avg episode reward: [(0, '4.552')] +[2023-02-25 04:08:13,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3754.7, 300 sec: 3832.2). Total num frames: 11313152. Throughput: 0: 888.2. Samples: 1826068. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:08:13,096][00684] Avg episode reward: [(0, '4.542')] +[2023-02-25 04:08:17,875][19689] Updated weights for policy 0, policy_version 2768 (0.0013) +[2023-02-25 04:08:18,097][00684] Fps is (10 sec: 4096.4, 60 sec: 3822.7, 300 sec: 3860.0). Total num frames: 11337728. Throughput: 0: 917.3. Samples: 1832376. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:08:18,102][00684] Avg episode reward: [(0, '4.557')] +[2023-02-25 04:08:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 11354112. Throughput: 0: 921.2. Samples: 1837684. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:08:23,097][00684] Avg episode reward: [(0, '4.480')] +[2023-02-25 04:08:28,093][00684] Fps is (10 sec: 3278.1, 60 sec: 3618.1, 300 sec: 3873.9). Total num frames: 11370496. Throughput: 0: 913.6. Samples: 1840244. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:08:28,101][00684] Avg episode reward: [(0, '4.351')] +[2023-02-25 04:08:29,812][19689] Updated weights for policy 0, policy_version 2778 (0.0016) +[2023-02-25 04:08:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3887.7). Total num frames: 11390976. Throughput: 0: 860.1. Samples: 1845364. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:08:33,101][00684] Avg episode reward: [(0, '4.558')] +[2023-02-25 04:08:38,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 11415552. Throughput: 0: 880.3. Samples: 1852556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:08:38,100][00684] Avg episode reward: [(0, '4.720')] +[2023-02-25 04:08:38,781][19689] Updated weights for policy 0, policy_version 2788 (0.0023) +[2023-02-25 04:08:43,098][00684] Fps is (10 sec: 4503.4, 60 sec: 3754.4, 300 sec: 3915.4). Total num frames: 11436032. Throughput: 0: 904.3. Samples: 1856292. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:08:43,101][00684] Avg episode reward: [(0, '4.439')] +[2023-02-25 04:08:48,099][00684] Fps is (10 sec: 3684.3, 60 sec: 3617.8, 300 sec: 3901.6). Total num frames: 11452416. Throughput: 0: 938.5. Samples: 1861888. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:08:48,102][00684] Avg episode reward: [(0, '4.272')] +[2023-02-25 04:08:49,656][19689] Updated weights for policy 0, policy_version 2798 (0.0018) +[2023-02-25 04:08:53,093][00684] Fps is (10 sec: 3278.4, 60 sec: 3550.0, 300 sec: 3901.6). Total num frames: 11468800. Throughput: 0: 964.9. Samples: 1866984. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:08:53,096][00684] Avg episode reward: [(0, '4.381')] +[2023-02-25 04:08:58,093][00684] Fps is (10 sec: 3688.5, 60 sec: 3686.4, 300 sec: 3887.7). Total num frames: 11489280. Throughput: 0: 965.1. Samples: 1869496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:08:58,095][00684] Avg episode reward: [(0, '4.590')] +[2023-02-25 04:08:59,928][19689] Updated weights for policy 0, policy_version 2808 (0.0012) +[2023-02-25 04:09:03,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 11513856. Throughput: 0: 985.1. Samples: 1876700. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:09:03,100][00684] Avg episode reward: [(0, '4.706')] +[2023-02-25 04:09:08,096][00684] Fps is (10 sec: 4913.8, 60 sec: 4027.9, 300 sec: 3915.5). Total num frames: 11538432. Throughput: 0: 1024.0. Samples: 1883768. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:09:08,099][00684] Avg episode reward: [(0, '4.673')] +[2023-02-25 04:09:08,677][19689] Updated weights for policy 0, policy_version 2818 (0.0011) +[2023-02-25 04:09:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 11554816. Throughput: 0: 1023.7. Samples: 1886312. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:09:13,096][00684] Avg episode reward: [(0, '4.572')] +[2023-02-25 04:09:18,095][00684] Fps is (10 sec: 3277.1, 60 sec: 3891.3, 300 sec: 3887.7). Total num frames: 11571200. Throughput: 0: 1021.6. Samples: 1891340. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:09:18,098][00684] Avg episode reward: [(0, '4.558')] +[2023-02-25 04:09:20,811][19689] Updated weights for policy 0, policy_version 2828 (0.0012) +[2023-02-25 04:09:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3873.8). Total num frames: 11591680. Throughput: 0: 993.1. Samples: 1897244. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:09:23,102][00684] Avg episode reward: [(0, '4.777')] +[2023-02-25 04:09:28,093][00684] Fps is (10 sec: 4506.4, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 11616256. Throughput: 0: 991.5. Samples: 1900904. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:09:28,098][00684] Avg episode reward: [(0, '4.659')] +[2023-02-25 04:09:29,217][19689] Updated weights for policy 0, policy_version 2838 (0.0015) +[2023-02-25 04:09:33,097][00684] Fps is (10 sec: 4503.9, 60 sec: 4095.7, 300 sec: 3901.6). Total num frames: 11636736. Throughput: 0: 1023.6. Samples: 1907948. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:09:33,100][00684] Avg episode reward: [(0, '4.501')] +[2023-02-25 04:09:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 11653120. Throughput: 0: 1023.9. Samples: 1913060. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:09:38,097][00684] Avg episode reward: [(0, '4.546')] +[2023-02-25 04:09:40,708][19689] Updated weights for policy 0, policy_version 2848 (0.0012) +[2023-02-25 04:09:43,093][00684] Fps is (10 sec: 3687.8, 60 sec: 3959.8, 300 sec: 3901.6). Total num frames: 11673600. Throughput: 0: 1025.0. Samples: 1915620. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:09:43,098][00684] Avg episode reward: [(0, '4.561')] +[2023-02-25 04:09:48,095][00684] Fps is (10 sec: 4095.3, 60 sec: 4028.0, 300 sec: 3887.7). Total num frames: 11694080. Throughput: 0: 992.1. Samples: 1921348. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:09:48,096][00684] Avg episode reward: [(0, '4.432')] +[2023-02-25 04:09:48,114][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002855_11694080.pth... +[2023-02-25 04:09:48,241][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002627_10760192.pth +[2023-02-25 04:09:50,252][19689] Updated weights for policy 0, policy_version 2858 (0.0029) +[2023-02-25 04:09:53,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3901.6). Total num frames: 11718656. Throughput: 0: 999.2. Samples: 1928728. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:09:53,095][00684] Avg episode reward: [(0, '4.618')] +[2023-02-25 04:09:58,094][00684] Fps is (10 sec: 4505.9, 60 sec: 4164.2, 300 sec: 3915.5). Total num frames: 11739136. Throughput: 0: 1019.7. Samples: 1932200. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:09:58,097][00684] Avg episode reward: [(0, '4.839')] +[2023-02-25 04:10:00,243][19689] Updated weights for policy 0, policy_version 2868 (0.0022) +[2023-02-25 04:10:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 11755520. Throughput: 0: 1018.8. Samples: 1937184. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:10:03,101][00684] Avg episode reward: [(0, '4.689')] +[2023-02-25 04:10:08,093][00684] Fps is (10 sec: 3277.1, 60 sec: 3891.4, 300 sec: 3887.7). Total num frames: 11771904. Throughput: 0: 999.6. Samples: 1942228. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:10:08,096][00684] Avg episode reward: [(0, '4.787')] +[2023-02-25 04:10:11,095][19689] Updated weights for policy 0, policy_version 2878 (0.0015) +[2023-02-25 04:10:13,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4027.7, 300 sec: 3887.7). Total num frames: 11796480. Throughput: 0: 988.4. Samples: 1945384. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:10:13,098][00684] Avg episode reward: [(0, '4.781')] +[2023-02-25 04:10:18,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4027.8, 300 sec: 3873.8). Total num frames: 11812864. Throughput: 0: 968.3. Samples: 1951516. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:10:18,098][00684] Avg episode reward: [(0, '4.590')] +[2023-02-25 04:10:23,093][00684] Fps is (10 sec: 2867.3, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 11825152. Throughput: 0: 951.8. Samples: 1955892. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:10:23,096][00684] Avg episode reward: [(0, '4.526')] +[2023-02-25 04:10:23,384][19689] Updated weights for policy 0, policy_version 2888 (0.0020) +[2023-02-25 04:10:28,095][00684] Fps is (10 sec: 2457.3, 60 sec: 3686.3, 300 sec: 3846.1). Total num frames: 11837440. Throughput: 0: 939.4. Samples: 1957896. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:10:28,097][00684] Avg episode reward: [(0, '4.650')] +[2023-02-25 04:10:33,095][00684] Fps is (10 sec: 2866.6, 60 sec: 3618.2, 300 sec: 3818.3). Total num frames: 11853824. Throughput: 0: 904.1. Samples: 1962032. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:10:33,108][00684] Avg episode reward: [(0, '4.594')] +[2023-02-25 04:10:36,948][19689] Updated weights for policy 0, policy_version 2898 (0.0015) +[2023-02-25 04:10:38,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3686.4, 300 sec: 3832.2). Total num frames: 11874304. Throughput: 0: 859.4. Samples: 1967400. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:10:38,096][00684] Avg episode reward: [(0, '4.595')] +[2023-02-25 04:10:43,093][00684] Fps is (10 sec: 4506.5, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 11898880. Throughput: 0: 865.7. Samples: 1971156. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:10:43,096][00684] Avg episode reward: [(0, '4.547')] +[2023-02-25 04:10:45,055][19689] Updated weights for policy 0, policy_version 2908 (0.0012) +[2023-02-25 04:10:48,098][00684] Fps is (10 sec: 4503.4, 60 sec: 3754.5, 300 sec: 3887.7). Total num frames: 11919360. Throughput: 0: 924.3. Samples: 1978784. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:10:48,108][00684] Avg episode reward: [(0, '4.547')] +[2023-02-25 04:10:53,095][00684] Fps is (10 sec: 4095.4, 60 sec: 3686.3, 300 sec: 3915.5). Total num frames: 11939840. Throughput: 0: 927.2. Samples: 1983952. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:10:53,097][00684] Avg episode reward: [(0, '4.497')] +[2023-02-25 04:10:56,388][19689] Updated weights for policy 0, policy_version 2918 (0.0012) +[2023-02-25 04:10:58,093][00684] Fps is (10 sec: 3688.2, 60 sec: 3618.2, 300 sec: 3915.5). Total num frames: 11956224. Throughput: 0: 914.1. Samples: 1986520. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:10:58,096][00684] Avg episode reward: [(0, '4.549')] +[2023-02-25 04:11:03,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3686.4, 300 sec: 3887.7). Total num frames: 11976704. Throughput: 0: 897.7. Samples: 1991912. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:11:03,096][00684] Avg episode reward: [(0, '4.642')] +[2023-02-25 04:11:06,043][19689] Updated weights for policy 0, policy_version 2928 (0.0033) +[2023-02-25 04:11:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 12001280. Throughput: 0: 967.2. Samples: 1999416. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:11:08,101][00684] Avg episode reward: [(0, '4.592')] +[2023-02-25 04:11:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 12021760. Throughput: 0: 1004.2. Samples: 2003084. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:11:13,096][00684] Avg episode reward: [(0, '4.775')] +[2023-02-25 04:11:15,623][19689] Updated weights for policy 0, policy_version 2938 (0.0012) +[2023-02-25 04:11:18,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 12038144. Throughput: 0: 1026.8. Samples: 2008236. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:11:18,095][00684] Avg episode reward: [(0, '4.755')] +[2023-02-25 04:11:23,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 12054528. Throughput: 0: 1017.6. Samples: 2013192. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:11:23,098][00684] Avg episode reward: [(0, '4.492')] +[2023-02-25 04:11:27,092][19689] Updated weights for policy 0, policy_version 2948 (0.0023) +[2023-02-25 04:11:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.8, 300 sec: 3887.7). Total num frames: 12079104. Throughput: 0: 997.5. Samples: 2016044. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:11:28,096][00684] Avg episode reward: [(0, '4.608')] +[2023-02-25 04:11:33,093][00684] Fps is (10 sec: 4915.1, 60 sec: 4164.4, 300 sec: 3901.6). Total num frames: 12103680. Throughput: 0: 996.2. Samples: 2023608. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:11:33,096][00684] Avg episode reward: [(0, '4.656')] +[2023-02-25 04:11:35,596][19689] Updated weights for policy 0, policy_version 2958 (0.0011) +[2023-02-25 04:11:38,096][00684] Fps is (10 sec: 4504.4, 60 sec: 4164.1, 300 sec: 3901.6). Total num frames: 12124160. Throughput: 0: 1022.2. Samples: 2029952. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:11:38,099][00684] Avg episode reward: [(0, '4.550')] +[2023-02-25 04:11:43,094][00684] Fps is (10 sec: 3686.3, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 12140544. Throughput: 0: 1022.8. Samples: 2032548. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:11:43,105][00684] Avg episode reward: [(0, '4.707')] +[2023-02-25 04:11:47,310][19689] Updated weights for policy 0, policy_version 2968 (0.0016) +[2023-02-25 04:11:48,094][00684] Fps is (10 sec: 3277.6, 60 sec: 3959.8, 300 sec: 3901.6). Total num frames: 12156928. Throughput: 0: 1015.7. Samples: 2037620. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:11:48,096][00684] Avg episode reward: [(0, '4.612')] +[2023-02-25 04:11:48,117][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002968_12156928.pth... +[2023-02-25 04:11:48,413][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002743_11235328.pth +[2023-02-25 04:11:53,093][00684] Fps is (10 sec: 4096.3, 60 sec: 4027.8, 300 sec: 3901.6). Total num frames: 12181504. Throughput: 0: 992.6. Samples: 2044084. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:11:53,096][00684] Avg episode reward: [(0, '4.423')] +[2023-02-25 04:11:55,960][19689] Updated weights for policy 0, policy_version 2978 (0.0013) +[2023-02-25 04:11:58,093][00684] Fps is (10 sec: 4915.4, 60 sec: 4164.3, 300 sec: 3901.7). Total num frames: 12206080. Throughput: 0: 994.9. Samples: 2047856. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:11:58,095][00684] Avg episode reward: [(0, '4.429')] +[2023-02-25 04:12:03,097][00684] Fps is (10 sec: 4094.4, 60 sec: 4095.7, 300 sec: 3901.6). Total num frames: 12222464. Throughput: 0: 1024.3. Samples: 2054332. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:12:03,100][00684] Avg episode reward: [(0, '4.500')] +[2023-02-25 04:12:06,882][19689] Updated weights for policy 0, policy_version 2988 (0.0011) +[2023-02-25 04:12:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 12242944. Throughput: 0: 1028.5. Samples: 2059476. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:12:08,095][00684] Avg episode reward: [(0, '4.498')] +[2023-02-25 04:12:13,093][00684] Fps is (10 sec: 3687.7, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 12259328. Throughput: 0: 1020.8. Samples: 2061980. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:12:13,096][00684] Avg episode reward: [(0, '4.564')] +[2023-02-25 04:12:17,058][19689] Updated weights for policy 0, policy_version 2998 (0.0011) +[2023-02-25 04:12:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 12283904. Throughput: 0: 997.1. Samples: 2068476. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:12:18,096][00684] Avg episode reward: [(0, '4.822')] +[2023-02-25 04:12:23,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4232.5, 300 sec: 3915.5). Total num frames: 12308480. Throughput: 0: 1021.2. Samples: 2075904. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:12:23,096][00684] Avg episode reward: [(0, '4.797')] +[2023-02-25 04:12:26,615][19689] Updated weights for policy 0, policy_version 3008 (0.0013) +[2023-02-25 04:12:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 12324864. Throughput: 0: 1025.8. Samples: 2078708. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0) +[2023-02-25 04:12:28,101][00684] Avg episode reward: [(0, '4.822')] +[2023-02-25 04:12:33,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 12337152. Throughput: 0: 1018.9. Samples: 2083468. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:12:33,096][00684] Avg episode reward: [(0, '4.665')] +[2023-02-25 04:12:38,094][00684] Fps is (10 sec: 2457.4, 60 sec: 3754.8, 300 sec: 3860.0). Total num frames: 12349440. Throughput: 0: 959.9. Samples: 2087280. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:12:38,097][00684] Avg episode reward: [(0, '4.772')] +[2023-02-25 04:12:41,404][19689] Updated weights for policy 0, policy_version 3018 (0.0012) +[2023-02-25 04:12:43,096][00684] Fps is (10 sec: 2866.5, 60 sec: 3754.5, 300 sec: 3832.2). Total num frames: 12365824. Throughput: 0: 919.9. Samples: 2089256. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:12:43,103][00684] Avg episode reward: [(0, '4.651')] +[2023-02-25 04:12:48,093][00684] Fps is (10 sec: 3686.7, 60 sec: 3823.0, 300 sec: 3832.2). Total num frames: 12386304. Throughput: 0: 887.7. Samples: 2094276. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:12:48,096][00684] Avg episode reward: [(0, '4.654')] +[2023-02-25 04:12:51,556][19689] Updated weights for policy 0, policy_version 3028 (0.0020) +[2023-02-25 04:12:53,093][00684] Fps is (10 sec: 4097.0, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 12406784. Throughput: 0: 922.8. Samples: 2101000. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:12:53,100][00684] Avg episode reward: [(0, '4.724')] +[2023-02-25 04:12:58,098][00684] Fps is (10 sec: 3684.7, 60 sec: 3617.8, 300 sec: 3859.9). Total num frames: 12423168. Throughput: 0: 924.8. Samples: 2103600. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:12:58,100][00684] Avg episode reward: [(0, '4.629')] +[2023-02-25 04:13:03,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.4, 300 sec: 3873.9). Total num frames: 12439552. Throughput: 0: 893.1. Samples: 2108664. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:13:03,101][00684] Avg episode reward: [(0, '4.579')] +[2023-02-25 04:13:03,577][19689] Updated weights for policy 0, policy_version 3038 (0.0011) +[2023-02-25 04:13:08,093][00684] Fps is (10 sec: 4097.9, 60 sec: 3686.4, 300 sec: 3901.6). Total num frames: 12464128. Throughput: 0: 863.5. Samples: 2114760. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:13:08,102][00684] Avg episode reward: [(0, '4.659')] +[2023-02-25 04:13:12,314][19689] Updated weights for policy 0, policy_version 3048 (0.0017) +[2023-02-25 04:13:13,093][00684] Fps is (10 sec: 4915.1, 60 sec: 3822.9, 300 sec: 3901.7). Total num frames: 12488704. Throughput: 0: 883.3. Samples: 2118456. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:13:13,097][00684] Avg episode reward: [(0, '4.576')] +[2023-02-25 04:13:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3754.7, 300 sec: 3915.5). Total num frames: 12509184. Throughput: 0: 934.0. Samples: 2125496. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:13:18,096][00684] Avg episode reward: [(0, '4.560')] +[2023-02-25 04:13:22,713][19689] Updated weights for policy 0, policy_version 3058 (0.0011) +[2023-02-25 04:13:23,095][00684] Fps is (10 sec: 3685.9, 60 sec: 3618.0, 300 sec: 3915.5). Total num frames: 12525568. Throughput: 0: 958.9. Samples: 2130432. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:13:23,097][00684] Avg episode reward: [(0, '4.490')] +[2023-02-25 04:13:28,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3901.6). Total num frames: 12541952. Throughput: 0: 971.2. Samples: 2132956. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:13:28,101][00684] Avg episode reward: [(0, '4.447')] +[2023-02-25 04:13:33,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3754.7, 300 sec: 3887.7). Total num frames: 12562432. Throughput: 0: 990.7. Samples: 2138856. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:13:33,098][00684] Avg episode reward: [(0, '4.524')] +[2023-02-25 04:13:33,334][19689] Updated weights for policy 0, policy_version 3068 (0.0014) +[2023-02-25 04:13:38,093][00684] Fps is (10 sec: 4915.1, 60 sec: 4027.8, 300 sec: 3915.6). Total num frames: 12591104. Throughput: 0: 1008.4. Samples: 2146376. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:13:38,095][00684] Avg episode reward: [(0, '4.582')] +[2023-02-25 04:13:42,115][19689] Updated weights for policy 0, policy_version 3078 (0.0012) +[2023-02-25 04:13:43,093][00684] Fps is (10 sec: 4505.5, 60 sec: 4027.9, 300 sec: 3915.6). Total num frames: 12607488. Throughput: 0: 1024.5. Samples: 2149696. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:13:43,098][00684] Avg episode reward: [(0, '4.553')] +[2023-02-25 04:13:48,096][00684] Fps is (10 sec: 3275.9, 60 sec: 3959.3, 300 sec: 3915.5). Total num frames: 12623872. Throughput: 0: 1025.4. Samples: 2154808. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:13:48,099][00684] Avg episode reward: [(0, '4.533')] +[2023-02-25 04:13:48,115][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003082_12623872.pth... +[2023-02-25 04:13:48,363][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002855_11694080.pth +[2023-02-25 04:13:53,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 12640256. Throughput: 0: 1001.4. Samples: 2159824. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:13:53,097][00684] Avg episode reward: [(0, '4.428')] +[2023-02-25 04:13:54,078][19689] Updated weights for policy 0, policy_version 3088 (0.0012) +[2023-02-25 04:13:58,095][00684] Fps is (10 sec: 4096.5, 60 sec: 4027.9, 300 sec: 3901.6). Total num frames: 12664832. Throughput: 0: 990.5. Samples: 2163028. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:13:58,097][00684] Avg episode reward: [(0, '4.523')] +[2023-02-25 04:14:02,138][19689] Updated weights for policy 0, policy_version 3098 (0.0012) +[2023-02-25 04:14:03,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3901.7). Total num frames: 12689408. Throughput: 0: 998.2. Samples: 2170416. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:14:03,096][00684] Avg episode reward: [(0, '4.902')] +[2023-02-25 04:14:08,093][00684] Fps is (10 sec: 4506.4, 60 sec: 4096.0, 300 sec: 3915.5). Total num frames: 12709888. Throughput: 0: 1024.8. Samples: 2176548. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:14:08,096][00684] Avg episode reward: [(0, '4.748')] +[2023-02-25 04:14:13,100][00684] Fps is (10 sec: 3683.9, 60 sec: 3959.0, 300 sec: 3915.4). Total num frames: 12726272. Throughput: 0: 1022.5. Samples: 2178976. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:14:13,103][00684] Avg episode reward: [(0, '4.681')] +[2023-02-25 04:14:13,582][19689] Updated weights for policy 0, policy_version 3108 (0.0013) +[2023-02-25 04:14:18,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 12742656. Throughput: 0: 1006.3. Samples: 2184140. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:14:18,096][00684] Avg episode reward: [(0, '4.583')] +[2023-02-25 04:14:23,002][19689] Updated weights for policy 0, policy_version 3118 (0.0012) +[2023-02-25 04:14:23,093][00684] Fps is (10 sec: 4508.7, 60 sec: 4096.1, 300 sec: 3915.5). Total num frames: 12771328. Throughput: 0: 991.3. Samples: 2190984. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:14:23,096][00684] Avg episode reward: [(0, '4.778')] +[2023-02-25 04:14:28,093][00684] Fps is (10 sec: 5324.8, 60 sec: 4232.5, 300 sec: 3929.4). Total num frames: 12795904. Throughput: 0: 1001.9. Samples: 2194780. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:14:28,098][00684] Avg episode reward: [(0, '4.536')] +[2023-02-25 04:14:33,065][19689] Updated weights for policy 0, policy_version 3128 (0.0016) +[2023-02-25 04:14:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 12812288. Throughput: 0: 1023.9. Samples: 2200880. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:14:33,099][00684] Avg episode reward: [(0, '4.354')] +[2023-02-25 04:14:38,096][00684] Fps is (10 sec: 3275.9, 60 sec: 3959.3, 300 sec: 3915.5). Total num frames: 12828672. Throughput: 0: 1026.5. Samples: 2206020. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:14:38,106][00684] Avg episode reward: [(0, '4.510')] +[2023-02-25 04:14:43,095][00684] Fps is (10 sec: 3276.2, 60 sec: 3959.4, 300 sec: 3901.6). Total num frames: 12845056. Throughput: 0: 1010.1. Samples: 2208484. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:14:43,097][00684] Avg episode reward: [(0, '4.641')] +[2023-02-25 04:14:44,048][19689] Updated weights for policy 0, policy_version 3138 (0.0012) +[2023-02-25 04:14:48,093][00684] Fps is (10 sec: 3687.4, 60 sec: 4027.9, 300 sec: 3887.7). Total num frames: 12865536. Throughput: 0: 995.5. Samples: 2215212. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:14:48,101][00684] Avg episode reward: [(0, '4.580')] +[2023-02-25 04:14:53,093][00684] Fps is (10 sec: 3687.0, 60 sec: 4027.7, 300 sec: 3873.9). Total num frames: 12881920. Throughput: 0: 971.1. Samples: 2220248. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:14:53,096][00684] Avg episode reward: [(0, '4.656')] +[2023-02-25 04:14:56,516][19689] Updated weights for policy 0, policy_version 3148 (0.0014) +[2023-02-25 04:14:58,095][00684] Fps is (10 sec: 2866.7, 60 sec: 3822.9, 300 sec: 3859.9). Total num frames: 12894208. Throughput: 0: 962.2. Samples: 2222272. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:14:58,098][00684] Avg episode reward: [(0, '4.865')] +[2023-02-25 04:15:03,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3860.0). Total num frames: 12910592. Throughput: 0: 937.0. Samples: 2226304. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:15:03,100][00684] Avg episode reward: [(0, '4.798')] +[2023-02-25 04:15:08,094][00684] Fps is (10 sec: 3277.0, 60 sec: 3618.1, 300 sec: 3832.2). Total num frames: 12926976. Throughput: 0: 891.0. Samples: 2231080. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:15:08,102][00684] Avg episode reward: [(0, '4.680')] +[2023-02-25 04:15:09,627][19689] Updated weights for policy 0, policy_version 3158 (0.0012) +[2023-02-25 04:15:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3755.1, 300 sec: 3860.0). Total num frames: 12951552. Throughput: 0: 870.0. Samples: 2233932. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:15:13,096][00684] Avg episode reward: [(0, '4.664')] +[2023-02-25 04:15:17,975][19689] Updated weights for policy 0, policy_version 3168 (0.0012) +[2023-02-25 04:15:18,093][00684] Fps is (10 sec: 4915.7, 60 sec: 3891.2, 300 sec: 3901.6). Total num frames: 12976128. Throughput: 0: 900.4. Samples: 2241400. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:15:18,096][00684] Avg episode reward: [(0, '4.725')] +[2023-02-25 04:15:23,094][00684] Fps is (10 sec: 4095.8, 60 sec: 3686.4, 300 sec: 3915.5). Total num frames: 12992512. Throughput: 0: 927.8. Samples: 2247768. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:15:23,102][00684] Avg episode reward: [(0, '4.849')] +[2023-02-25 04:15:28,096][00684] Fps is (10 sec: 3276.0, 60 sec: 3549.7, 300 sec: 3915.5). Total num frames: 13008896. Throughput: 0: 928.7. Samples: 2250276. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:15:28,106][00684] Avg episode reward: [(0, '4.841')] +[2023-02-25 04:15:28,990][19689] Updated weights for policy 0, policy_version 3178 (0.0014) +[2023-02-25 04:15:33,093][00684] Fps is (10 sec: 3277.0, 60 sec: 3549.9, 300 sec: 3901.6). Total num frames: 13025280. Throughput: 0: 893.6. Samples: 2255424. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:15:33,107][00684] Avg episode reward: [(0, '4.680')] +[2023-02-25 04:15:38,093][00684] Fps is (10 sec: 4097.0, 60 sec: 3686.6, 300 sec: 3901.6). Total num frames: 13049856. Throughput: 0: 924.5. Samples: 2261852. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:15:38,096][00684] Avg episode reward: [(0, '4.410')] +[2023-02-25 04:15:39,047][19689] Updated weights for policy 0, policy_version 3188 (0.0017) +[2023-02-25 04:15:43,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3823.0, 300 sec: 3915.6). Total num frames: 13074432. Throughput: 0: 960.2. Samples: 2265480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:15:43,096][00684] Avg episode reward: [(0, '4.483')] +[2023-02-25 04:15:48,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 13094912. Throughput: 0: 1018.2. Samples: 2272124. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:15:48,096][00684] Avg episode reward: [(0, '4.656')] +[2023-02-25 04:15:48,111][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003197_13094912.pth... +[2023-02-25 04:15:48,352][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002968_12156928.pth +[2023-02-25 04:15:48,898][19689] Updated weights for policy 0, policy_version 3198 (0.0012) +[2023-02-25 04:15:53,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3915.5). Total num frames: 13111296. Throughput: 0: 1023.8. Samples: 2277152. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:15:53,096][00684] Avg episode reward: [(0, '4.724')] +[2023-02-25 04:15:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3891.3, 300 sec: 3901.6). Total num frames: 13127680. Throughput: 0: 1016.4. Samples: 2279672. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:15:58,096][00684] Avg episode reward: [(0, '4.756')] +[2023-02-25 04:16:00,002][19689] Updated weights for policy 0, policy_version 3208 (0.0014) +[2023-02-25 04:16:03,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3901.6). Total num frames: 13152256. Throughput: 0: 991.6. Samples: 2286024. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:16:03,099][00684] Avg episode reward: [(0, '4.615')] +[2023-02-25 04:16:07,963][19689] Updated weights for policy 0, policy_version 3218 (0.0011) +[2023-02-25 04:16:08,093][00684] Fps is (10 sec: 5324.8, 60 sec: 4232.6, 300 sec: 3929.4). Total num frames: 13180928. Throughput: 0: 1020.2. Samples: 2293676. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:16:08,102][00684] Avg episode reward: [(0, '4.670')] +[2023-02-25 04:16:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4096.0, 300 sec: 3929.4). Total num frames: 13197312. Throughput: 0: 1030.7. Samples: 2296656. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:16:13,102][00684] Avg episode reward: [(0, '4.589')] +[2023-02-25 04:16:18,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 13213696. Throughput: 0: 1031.8. Samples: 2301856. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:16:18,100][00684] Avg episode reward: [(0, '4.662')] +[2023-02-25 04:16:19,887][19689] Updated weights for policy 0, policy_version 3228 (0.0014) +[2023-02-25 04:16:23,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 13230080. Throughput: 0: 1001.5. Samples: 2306920. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:16:23,103][00684] Avg episode reward: [(0, '4.745')] +[2023-02-25 04:16:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.2, 300 sec: 3901.6). Total num frames: 13254656. Throughput: 0: 1000.4. Samples: 2310500. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:16:28,096][00684] Avg episode reward: [(0, '4.773')] +[2023-02-25 04:16:28,958][19689] Updated weights for policy 0, policy_version 3238 (0.0016) +[2023-02-25 04:16:33,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.5, 300 sec: 3915.5). Total num frames: 13279232. Throughput: 0: 1023.3. Samples: 2318172. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:16:33,100][00684] Avg episode reward: [(0, '4.740')] +[2023-02-25 04:16:38,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 13299712. Throughput: 0: 1035.0. Samples: 2323728. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:16:38,101][00684] Avg episode reward: [(0, '4.726')] +[2023-02-25 04:16:39,597][19689] Updated weights for policy 0, policy_version 3248 (0.0025) +[2023-02-25 04:16:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3929.4). Total num frames: 13316096. Throughput: 0: 1036.6. Samples: 2326320. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:16:43,099][00684] Avg episode reward: [(0, '4.624')] +[2023-02-25 04:16:48,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3901.6). Total num frames: 13332480. Throughput: 0: 1007.7. Samples: 2331372. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:16:48,095][00684] Avg episode reward: [(0, '4.672')] +[2023-02-25 04:16:50,004][19689] Updated weights for policy 0, policy_version 3258 (0.0026) +[2023-02-25 04:16:53,094][00684] Fps is (10 sec: 4095.7, 60 sec: 4096.0, 300 sec: 3901.6). Total num frames: 13357056. Throughput: 0: 996.8. Samples: 2338532. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:16:53,096][00684] Avg episode reward: [(0, '4.826')] +[2023-02-25 04:16:58,095][00684] Fps is (10 sec: 4914.3, 60 sec: 4232.4, 300 sec: 3929.4). Total num frames: 13381632. Throughput: 0: 1013.7. Samples: 2342276. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:16:58,098][00684] Avg episode reward: [(0, '4.778')] +[2023-02-25 04:16:58,920][19689] Updated weights for policy 0, policy_version 3268 (0.0024) +[2023-02-25 04:17:03,095][00684] Fps is (10 sec: 4095.5, 60 sec: 4095.9, 300 sec: 3915.5). Total num frames: 13398016. Throughput: 0: 1030.3. Samples: 2348220. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:17:03,098][00684] Avg episode reward: [(0, '4.718')] +[2023-02-25 04:17:08,093][00684] Fps is (10 sec: 2867.7, 60 sec: 3822.9, 300 sec: 3901.6). Total num frames: 13410304. Throughput: 0: 1010.3. Samples: 2352384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:17:08,096][00684] Avg episode reward: [(0, '4.788')] +[2023-02-25 04:17:13,093][00684] Fps is (10 sec: 2458.0, 60 sec: 3754.7, 300 sec: 3860.0). Total num frames: 13422592. Throughput: 0: 975.0. Samples: 2354376. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:17:13,098][00684] Avg episode reward: [(0, '4.955')] +[2023-02-25 04:17:13,396][19689] Updated weights for policy 0, policy_version 3278 (0.0013) +[2023-02-25 04:17:18,094][00684] Fps is (10 sec: 2867.1, 60 sec: 3754.6, 300 sec: 3832.2). Total num frames: 13438976. Throughput: 0: 897.4. Samples: 2358556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:17:18,096][00684] Avg episode reward: [(0, '4.865')] +[2023-02-25 04:17:23,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3860.0). Total num frames: 13463552. Throughput: 0: 906.4. Samples: 2364516. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:17:23,103][00684] Avg episode reward: [(0, '4.863')] +[2023-02-25 04:17:23,804][19689] Updated weights for policy 0, policy_version 3288 (0.0023) +[2023-02-25 04:17:28,093][00684] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3887.7). Total num frames: 13484032. Throughput: 0: 932.1. Samples: 2368264. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:17:28,098][00684] Avg episode reward: [(0, '4.948')] +[2023-02-25 04:17:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3901.6). Total num frames: 13500416. Throughput: 0: 937.4. Samples: 2373556. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:17:33,096][00684] Avg episode reward: [(0, '4.921')] +[2023-02-25 04:17:34,796][19689] Updated weights for policy 0, policy_version 3298 (0.0018) +[2023-02-25 04:17:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 3901.6). Total num frames: 13516800. Throughput: 0: 891.6. Samples: 2378652. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:17:38,095][00684] Avg episode reward: [(0, '5.078')] +[2023-02-25 04:17:43,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3915.5). Total num frames: 13541376. Throughput: 0: 874.8. Samples: 2381640. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:17:43,096][00684] Avg episode reward: [(0, '4.963')] +[2023-02-25 04:17:44,273][19689] Updated weights for policy 0, policy_version 3308 (0.0014) +[2023-02-25 04:17:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3929.4). Total num frames: 13565952. Throughput: 0: 912.2. Samples: 2389268. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:17:48,096][00684] Avg episode reward: [(0, '5.131')] +[2023-02-25 04:17:48,110][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003312_13565952.pth... +[2023-02-25 04:17:48,277][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003082_12623872.pth +[2023-02-25 04:17:53,094][00684] Fps is (10 sec: 4505.1, 60 sec: 3822.9, 300 sec: 3943.3). Total num frames: 13586432. Throughput: 0: 966.0. Samples: 2395856. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:17:53,097][00684] Avg episode reward: [(0, '5.267')] +[2023-02-25 04:17:53,717][19689] Updated weights for policy 0, policy_version 3318 (0.0022) +[2023-02-25 04:17:58,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3943.3). Total num frames: 13602816. Throughput: 0: 980.1. Samples: 2398480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:17:58,095][00684] Avg episode reward: [(0, '5.212')] +[2023-02-25 04:18:03,096][00684] Fps is (10 sec: 3276.1, 60 sec: 3686.3, 300 sec: 3915.5). Total num frames: 13619200. Throughput: 0: 1002.1. Samples: 2403652. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:18:03,104][00684] Avg episode reward: [(0, '5.516')] +[2023-02-25 04:18:04,968][19689] Updated weights for policy 0, policy_version 3328 (0.0012) +[2023-02-25 04:18:08,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 13643776. Throughput: 0: 1016.6. Samples: 2410264. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:18:08,096][00684] Avg episode reward: [(0, '5.355')] +[2023-02-25 04:18:13,084][19689] Updated weights for policy 0, policy_version 3338 (0.0012) +[2023-02-25 04:18:13,096][00684] Fps is (10 sec: 5325.1, 60 sec: 4164.1, 300 sec: 3943.2). Total num frames: 13672448. Throughput: 0: 1019.0. Samples: 2414120. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:18:13,099][00684] Avg episode reward: [(0, '5.259')] +[2023-02-25 04:18:18,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.6, 300 sec: 3957.2). Total num frames: 13692928. Throughput: 0: 1052.4. Samples: 2420916. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:18:18,096][00684] Avg episode reward: [(0, '5.181')] +[2023-02-25 04:18:23,093][00684] Fps is (10 sec: 3687.4, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 13709312. Throughput: 0: 1053.8. Samples: 2426072. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:18:23,098][00684] Avg episode reward: [(0, '5.447')] +[2023-02-25 04:18:24,474][19689] Updated weights for policy 0, policy_version 3348 (0.0015) +[2023-02-25 04:18:28,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 3943.3). Total num frames: 13725696. Throughput: 0: 1045.3. Samples: 2428680. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:18:28,099][00684] Avg episode reward: [(0, '5.386')] +[2023-02-25 04:18:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 3929.4). Total num frames: 13750272. Throughput: 0: 1016.7. Samples: 2435020. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:18:33,096][00684] Avg episode reward: [(0, '5.244')] +[2023-02-25 04:18:33,744][19689] Updated weights for policy 0, policy_version 3358 (0.0018) +[2023-02-25 04:18:38,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 3957.2). Total num frames: 13774848. Throughput: 0: 1040.7. Samples: 2442688. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:18:38,096][00684] Avg episode reward: [(0, '5.451')] +[2023-02-25 04:18:43,093][00684] Fps is (10 sec: 4096.1, 60 sec: 4164.3, 300 sec: 3957.2). Total num frames: 13791232. Throughput: 0: 1051.1. Samples: 2445780. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:18:43,095][00684] Avg episode reward: [(0, '5.625')] +[2023-02-25 04:18:43,129][19689] Updated weights for policy 0, policy_version 3368 (0.0014) +[2023-02-25 04:18:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 3971.0). Total num frames: 13811712. Throughput: 0: 1051.7. Samples: 2450976. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:18:48,099][00684] Avg episode reward: [(0, '5.674')] +[2023-02-25 04:18:53,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.8, 300 sec: 3943.3). Total num frames: 13828096. Throughput: 0: 1021.0. Samples: 2456208. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:18:53,096][00684] Avg episode reward: [(0, '5.736')] +[2023-02-25 04:18:54,031][19689] Updated weights for policy 0, policy_version 3378 (0.0016) +[2023-02-25 04:18:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 3943.3). Total num frames: 13852672. Throughput: 0: 1017.1. Samples: 2459888. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:18:58,096][00684] Avg episode reward: [(0, '5.610')] +[2023-02-25 04:19:02,092][19689] Updated weights for policy 0, policy_version 3388 (0.0012) +[2023-02-25 04:19:03,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4301.0, 300 sec: 3957.2). Total num frames: 13877248. Throughput: 0: 1037.9. Samples: 2467620. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:19:03,096][00684] Avg episode reward: [(0, '5.544')] +[2023-02-25 04:19:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 3971.1). Total num frames: 13897728. Throughput: 0: 1051.7. Samples: 2473400. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:19:08,098][00684] Avg episode reward: [(0, '5.548')] +[2023-02-25 04:19:13,095][00684] Fps is (10 sec: 3685.7, 60 sec: 4027.8, 300 sec: 3971.0). Total num frames: 13914112. Throughput: 0: 1050.5. Samples: 2475956. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:19:13,098][00684] Avg episode reward: [(0, '5.909')] +[2023-02-25 04:19:13,103][19675] Saving new best policy, reward=5.909! +[2023-02-25 04:19:14,439][19689] Updated weights for policy 0, policy_version 3398 (0.0027) +[2023-02-25 04:19:18,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3929.4). Total num frames: 13930496. Throughput: 0: 1024.0. Samples: 2481100. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:19:18,096][00684] Avg episode reward: [(0, '6.153')] +[2023-02-25 04:19:18,182][19675] Saving new best policy, reward=6.153! +[2023-02-25 04:19:23,093][00684] Fps is (10 sec: 4096.8, 60 sec: 4096.0, 300 sec: 3929.4). Total num frames: 13955072. Throughput: 0: 1000.2. Samples: 2487696. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:19:23,095][00684] Avg episode reward: [(0, '5.872')] +[2023-02-25 04:19:24,180][19689] Updated weights for policy 0, policy_version 3408 (0.0030) +[2023-02-25 04:19:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3915.5). Total num frames: 13967360. Throughput: 0: 989.0. Samples: 2490284. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:19:28,100][00684] Avg episode reward: [(0, '5.828')] +[2023-02-25 04:19:33,097][00684] Fps is (10 sec: 2866.1, 60 sec: 3891.0, 300 sec: 3915.5). Total num frames: 13983744. Throughput: 0: 968.5. Samples: 2494560. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:19:33,108][00684] Avg episode reward: [(0, '5.580')] +[2023-02-25 04:19:38,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3901.6). Total num frames: 13996032. Throughput: 0: 943.8. Samples: 2498680. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:19:38,097][00684] Avg episode reward: [(0, '5.714')] +[2023-02-25 04:19:38,395][19689] Updated weights for policy 0, policy_version 3418 (0.0012) +[2023-02-25 04:19:43,093][00684] Fps is (10 sec: 3278.0, 60 sec: 3754.7, 300 sec: 3901.6). Total num frames: 14016512. Throughput: 0: 916.9. Samples: 2501148. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:19:43,096][00684] Avg episode reward: [(0, '5.746')] +[2023-02-25 04:19:48,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3754.7, 300 sec: 3915.5). Total num frames: 14036992. Throughput: 0: 879.2. Samples: 2507184. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:19:48,102][00684] Avg episode reward: [(0, '6.296')] +[2023-02-25 04:19:48,116][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003427_14036992.pth... +[2023-02-25 04:19:48,248][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003197_13094912.pth +[2023-02-25 04:19:48,263][19675] Saving new best policy, reward=6.296! +[2023-02-25 04:19:48,582][19689] Updated weights for policy 0, policy_version 3428 (0.0030) +[2023-02-25 04:19:53,095][00684] Fps is (10 sec: 4914.5, 60 sec: 3959.4, 300 sec: 3971.0). Total num frames: 14065664. Throughput: 0: 917.5. Samples: 2514688. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:19:53,097][00684] Avg episode reward: [(0, '6.378')] +[2023-02-25 04:19:53,104][19675] Saving new best policy, reward=6.378! +[2023-02-25 04:19:57,114][19689] Updated weights for policy 0, policy_version 3438 (0.0013) +[2023-02-25 04:19:58,093][00684] Fps is (10 sec: 4505.7, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 14082048. Throughput: 0: 936.7. Samples: 2518104. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:19:58,100][00684] Avg episode reward: [(0, '6.228')] +[2023-02-25 04:20:03,093][00684] Fps is (10 sec: 3277.3, 60 sec: 3686.4, 300 sec: 3971.1). Total num frames: 14098432. Throughput: 0: 938.6. Samples: 2523336. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:20:03,104][00684] Avg episode reward: [(0, '6.204')] +[2023-02-25 04:20:08,094][00684] Fps is (10 sec: 3276.6, 60 sec: 3618.1, 300 sec: 3943.3). Total num frames: 14114816. Throughput: 0: 908.4. Samples: 2528576. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:20:08,099][00684] Avg episode reward: [(0, '5.903')] +[2023-02-25 04:20:09,003][19689] Updated weights for policy 0, policy_version 3448 (0.0018) +[2023-02-25 04:20:13,097][00684] Fps is (10 sec: 4094.5, 60 sec: 3754.5, 300 sec: 3943.2). Total num frames: 14139392. Throughput: 0: 921.3. Samples: 2531744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:20:13,100][00684] Avg episode reward: [(0, '6.137')] +[2023-02-25 04:20:17,506][19689] Updated weights for policy 0, policy_version 3458 (0.0011) +[2023-02-25 04:20:18,093][00684] Fps is (10 sec: 5325.0, 60 sec: 3959.5, 300 sec: 3984.9). Total num frames: 14168064. Throughput: 0: 995.4. Samples: 2539348. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:20:18,100][00684] Avg episode reward: [(0, '6.846')] +[2023-02-25 04:20:18,118][19675] Saving new best policy, reward=6.846! +[2023-02-25 04:20:23,093][00684] Fps is (10 sec: 4507.3, 60 sec: 3822.9, 300 sec: 3985.0). Total num frames: 14184448. Throughput: 0: 1039.1. Samples: 2545440. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:20:23,099][00684] Avg episode reward: [(0, '6.680')] +[2023-02-25 04:20:28,094][00684] Fps is (10 sec: 3276.7, 60 sec: 3891.2, 300 sec: 3984.9). Total num frames: 14200832. Throughput: 0: 1040.7. Samples: 2547980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:20:28,096][00684] Avg episode reward: [(0, '6.817')] +[2023-02-25 04:20:28,343][19689] Updated weights for policy 0, policy_version 3468 (0.0013) +[2023-02-25 04:20:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.7, 300 sec: 3971.0). Total num frames: 14221312. Throughput: 0: 1019.7. Samples: 2553072. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:20:33,096][00684] Avg episode reward: [(0, '6.554')] +[2023-02-25 04:20:38,093][00684] Fps is (10 sec: 4096.2, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 14241792. Throughput: 0: 1006.3. Samples: 2559968. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:20:38,096][00684] Avg episode reward: [(0, '6.175')] +[2023-02-25 04:20:38,122][19689] Updated weights for policy 0, policy_version 3478 (0.0013) +[2023-02-25 04:20:43,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.5, 300 sec: 3984.9). Total num frames: 14270464. Throughput: 0: 1014.8. Samples: 2563772. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:20:43,096][00684] Avg episode reward: [(0, '6.609')] +[2023-02-25 04:20:47,677][19689] Updated weights for policy 0, policy_version 3488 (0.0018) +[2023-02-25 04:20:48,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3984.9). Total num frames: 14286848. Throughput: 0: 1038.0. Samples: 2570048. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:20:48,096][00684] Avg episode reward: [(0, '6.787')] +[2023-02-25 04:20:53,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.6, 300 sec: 3984.9). Total num frames: 14303232. Throughput: 0: 1036.2. Samples: 2575204. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:20:53,097][00684] Avg episode reward: [(0, '6.702')] +[2023-02-25 04:20:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3957.2). Total num frames: 14319616. Throughput: 0: 1023.2. Samples: 2577784. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:20:58,095][00684] Avg episode reward: [(0, '7.048')] +[2023-02-25 04:20:58,113][19675] Saving new best policy, reward=7.048! +[2023-02-25 04:20:58,784][19689] Updated weights for policy 0, policy_version 3498 (0.0026) +[2023-02-25 04:21:03,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3957.2). Total num frames: 14348288. Throughput: 0: 1004.6. Samples: 2584556. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:21:03,099][00684] Avg episode reward: [(0, '7.195')] +[2023-02-25 04:21:03,102][19675] Saving new best policy, reward=7.195! +[2023-02-25 04:21:07,060][19689] Updated weights for policy 0, policy_version 3508 (0.0022) +[2023-02-25 04:21:08,096][00684] Fps is (10 sec: 5323.3, 60 sec: 4300.6, 300 sec: 3984.9). Total num frames: 14372864. Throughput: 0: 1039.4. Samples: 2592216. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:21:08,103][00684] Avg episode reward: [(0, '6.659')] +[2023-02-25 04:21:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.5, 300 sec: 3984.9). Total num frames: 14389248. Throughput: 0: 1042.7. Samples: 2594900. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:21:13,096][00684] Avg episode reward: [(0, '7.022')] +[2023-02-25 04:21:18,093][00684] Fps is (10 sec: 3277.7, 60 sec: 3959.5, 300 sec: 3984.9). Total num frames: 14405632. Throughput: 0: 1045.8. Samples: 2600132. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:21:18,095][00684] Avg episode reward: [(0, '6.699')] +[2023-02-25 04:21:18,756][19689] Updated weights for policy 0, policy_version 3518 (0.0043) +[2023-02-25 04:21:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3971.0). Total num frames: 14426112. Throughput: 0: 1008.4. Samples: 2605344. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:21:23,103][00684] Avg episode reward: [(0, '7.450')] +[2023-02-25 04:21:23,107][19675] Saving new best policy, reward=7.450! +[2023-02-25 04:21:27,735][19689] Updated weights for policy 0, policy_version 3528 (0.0021) +[2023-02-25 04:21:28,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3971.0). Total num frames: 14450688. Throughput: 0: 1007.6. Samples: 2609112. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:21:28,098][00684] Avg episode reward: [(0, '6.779')] +[2023-02-25 04:21:33,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3971.0). Total num frames: 14471168. Throughput: 0: 1038.1. Samples: 2616764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:21:33,099][00684] Avg episode reward: [(0, '6.841')] +[2023-02-25 04:21:37,787][19689] Updated weights for policy 0, policy_version 3538 (0.0012) +[2023-02-25 04:21:38,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 3984.9). Total num frames: 14491648. Throughput: 0: 1047.5. Samples: 2622340. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:21:38,096][00684] Avg episode reward: [(0, '6.996')] +[2023-02-25 04:21:43,097][00684] Fps is (10 sec: 3275.6, 60 sec: 3891.0, 300 sec: 3971.0). Total num frames: 14503936. Throughput: 0: 1040.3. Samples: 2624600. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:21:43,101][00684] Avg episode reward: [(0, '7.067')] +[2023-02-25 04:21:48,096][00684] Fps is (10 sec: 2866.4, 60 sec: 3891.0, 300 sec: 3943.2). Total num frames: 14520320. Throughput: 0: 981.4. Samples: 2628720. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:21:48,099][00684] Avg episode reward: [(0, '7.203')] +[2023-02-25 04:21:48,113][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003545_14520320.pth... +[2023-02-25 04:21:48,325][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003312_13565952.pth +[2023-02-25 04:21:52,276][19689] Updated weights for policy 0, policy_version 3548 (0.0014) +[2023-02-25 04:21:53,093][00684] Fps is (10 sec: 3278.0, 60 sec: 3891.2, 300 sec: 3915.5). Total num frames: 14536704. Throughput: 0: 909.7. Samples: 2633152. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:21:53,101][00684] Avg episode reward: [(0, '7.453')] +[2023-02-25 04:21:53,103][19675] Saving new best policy, reward=7.453! +[2023-02-25 04:21:58,095][00684] Fps is (10 sec: 3277.2, 60 sec: 3891.1, 300 sec: 3915.5). Total num frames: 14553088. Throughput: 0: 905.5. Samples: 2635648. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:21:58,104][00684] Avg episode reward: [(0, '7.569')] +[2023-02-25 04:21:58,117][19675] Saving new best policy, reward=7.569! +[2023-02-25 04:22:02,331][19689] Updated weights for policy 0, policy_version 3558 (0.0012) +[2023-02-25 04:22:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3943.3). Total num frames: 14573568. Throughput: 0: 935.6. Samples: 2642236. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:22:03,099][00684] Avg episode reward: [(0, '7.764')] +[2023-02-25 04:22:03,146][19675] Saving new best policy, reward=7.764! +[2023-02-25 04:22:08,093][00684] Fps is (10 sec: 4096.6, 60 sec: 3686.6, 300 sec: 3971.0). Total num frames: 14594048. Throughput: 0: 934.5. Samples: 2647396. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:22:08,101][00684] Avg episode reward: [(0, '7.421')] +[2023-02-25 04:22:13,096][00684] Fps is (10 sec: 3685.4, 60 sec: 3686.2, 300 sec: 3971.0). Total num frames: 14610432. Throughput: 0: 908.2. Samples: 2649984. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:22:13,104][00684] Avg episode reward: [(0, '7.562')] +[2023-02-25 04:22:14,011][19689] Updated weights for policy 0, policy_version 3568 (0.0031) +[2023-02-25 04:22:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 14635008. Throughput: 0: 871.7. Samples: 2655992. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:22:18,095][00684] Avg episode reward: [(0, '8.086')] +[2023-02-25 04:22:18,113][19675] Saving new best policy, reward=8.086! +[2023-02-25 04:22:22,286][19689] Updated weights for policy 0, policy_version 3578 (0.0022) +[2023-02-25 04:22:23,093][00684] Fps is (10 sec: 4916.5, 60 sec: 3891.2, 300 sec: 3984.9). Total num frames: 14659584. Throughput: 0: 916.4. Samples: 2663580. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:22:23,095][00684] Avg episode reward: [(0, '8.484')] +[2023-02-25 04:22:23,105][19675] Saving new best policy, reward=8.484! +[2023-02-25 04:22:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3984.9). Total num frames: 14675968. Throughput: 0: 938.4. Samples: 2666824. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:22:28,095][00684] Avg episode reward: [(0, '8.496')] +[2023-02-25 04:22:28,108][19675] Saving new best policy, reward=8.496! +[2023-02-25 04:22:33,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3984.9). Total num frames: 14692352. Throughput: 0: 960.8. Samples: 2671952. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:22:33,109][00684] Avg episode reward: [(0, '7.642')] +[2023-02-25 04:22:33,369][19689] Updated weights for policy 0, policy_version 3588 (0.0012) +[2023-02-25 04:22:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3971.0). Total num frames: 14712832. Throughput: 0: 977.9. Samples: 2677156. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:22:38,102][00684] Avg episode reward: [(0, '8.038')] +[2023-02-25 04:22:42,781][19689] Updated weights for policy 0, policy_version 3598 (0.0014) +[2023-02-25 04:22:43,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3891.4, 300 sec: 3971.0). Total num frames: 14737408. Throughput: 0: 998.3. Samples: 2680572. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:22:43,102][00684] Avg episode reward: [(0, '7.987')] +[2023-02-25 04:22:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4027.9, 300 sec: 3984.9). Total num frames: 14761984. Throughput: 0: 1023.3. Samples: 2688284. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:22:48,102][00684] Avg episode reward: [(0, '8.349')] +[2023-02-25 04:22:51,954][19689] Updated weights for policy 0, policy_version 3608 (0.0012) +[2023-02-25 04:22:53,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 14778368. Throughput: 0: 1043.7. Samples: 2694364. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:22:53,102][00684] Avg episode reward: [(0, '9.322')] +[2023-02-25 04:22:53,104][19675] Saving new best policy, reward=9.322! +[2023-02-25 04:22:58,095][00684] Fps is (10 sec: 3685.8, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 14798848. Throughput: 0: 1044.3. Samples: 2696976. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:22:58,098][00684] Avg episode reward: [(0, '9.587')] +[2023-02-25 04:22:58,113][19675] Saving new best policy, reward=9.587! +[2023-02-25 04:23:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3971.0). Total num frames: 14815232. Throughput: 0: 1026.5. Samples: 2702184. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:23:03,095][00684] Avg episode reward: [(0, '9.920')] +[2023-02-25 04:23:03,104][19675] Saving new best policy, reward=9.920! +[2023-02-25 04:23:03,425][19689] Updated weights for policy 0, policy_version 3618 (0.0014) +[2023-02-25 04:23:08,093][00684] Fps is (10 sec: 4096.7, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 14839808. Throughput: 0: 1014.5. Samples: 2709232. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:23:08,096][00684] Avg episode reward: [(0, '9.407')] +[2023-02-25 04:23:11,298][19689] Updated weights for policy 0, policy_version 3628 (0.0012) +[2023-02-25 04:23:13,093][00684] Fps is (10 sec: 5324.8, 60 sec: 4301.0, 300 sec: 3984.9). Total num frames: 14868480. Throughput: 0: 1027.5. Samples: 2713060. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:23:13,096][00684] Avg episode reward: [(0, '10.195')] +[2023-02-25 04:23:13,098][19675] Saving new best policy, reward=10.195! +[2023-02-25 04:23:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3984.9). Total num frames: 14884864. Throughput: 0: 1053.4. Samples: 2719356. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:23:18,095][00684] Avg episode reward: [(0, '11.120')] +[2023-02-25 04:23:18,113][19675] Saving new best policy, reward=11.120! +[2023-02-25 04:23:22,769][19689] Updated weights for policy 0, policy_version 3638 (0.0014) +[2023-02-25 04:23:23,095][00684] Fps is (10 sec: 3276.1, 60 sec: 4027.6, 300 sec: 3984.9). Total num frames: 14901248. Throughput: 0: 1050.4. Samples: 2724424. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:23:23,098][00684] Avg episode reward: [(0, '11.814')] +[2023-02-25 04:23:23,103][19675] Saving new best policy, reward=11.814! +[2023-02-25 04:23:28,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 14917632. Throughput: 0: 1028.4. Samples: 2726852. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:23:28,100][00684] Avg episode reward: [(0, '12.906')] +[2023-02-25 04:23:28,115][19675] Saving new best policy, reward=12.906! +[2023-02-25 04:23:32,457][19689] Updated weights for policy 0, policy_version 3648 (0.0012) +[2023-02-25 04:23:33,094][00684] Fps is (10 sec: 4506.3, 60 sec: 4232.5, 300 sec: 3971.0). Total num frames: 14946304. Throughput: 0: 1005.9. Samples: 2733552. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:23:33,098][00684] Avg episode reward: [(0, '13.053')] +[2023-02-25 04:23:33,105][19675] Saving new best policy, reward=13.053! +[2023-02-25 04:23:38,093][00684] Fps is (10 sec: 4915.1, 60 sec: 4232.5, 300 sec: 3984.9). Total num frames: 14966784. Throughput: 0: 1037.9. Samples: 2741072. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:23:38,100][00684] Avg episode reward: [(0, '11.539')] +[2023-02-25 04:23:42,289][19689] Updated weights for policy 0, policy_version 3658 (0.0013) +[2023-02-25 04:23:43,093][00684] Fps is (10 sec: 3686.6, 60 sec: 4096.0, 300 sec: 3971.0). Total num frames: 14983168. Throughput: 0: 1038.2. Samples: 2743692. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:23:43,097][00684] Avg episode reward: [(0, '10.612')] +[2023-02-25 04:23:48,093][00684] Fps is (10 sec: 3686.5, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 15003648. Throughput: 0: 1035.9. Samples: 2748800. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:23:48,095][00684] Avg episode reward: [(0, '11.467')] +[2023-02-25 04:23:48,115][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003663_15003648.pth... +[2023-02-25 04:23:48,350][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003427_14036992.pth +[2023-02-25 04:23:53,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 15020032. Throughput: 0: 998.4. Samples: 2754160. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:23:53,096][00684] Avg episode reward: [(0, '11.094')] +[2023-02-25 04:23:53,347][19689] Updated weights for policy 0, policy_version 3668 (0.0013) +[2023-02-25 04:23:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.1, 300 sec: 3957.2). Total num frames: 15044608. Throughput: 0: 999.4. Samples: 2758032. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:23:58,100][00684] Avg episode reward: [(0, '10.823')] +[2023-02-25 04:24:03,096][00684] Fps is (10 sec: 4094.9, 60 sec: 4095.8, 300 sec: 3943.2). Total num frames: 15060992. Throughput: 0: 995.5. Samples: 2764156. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:24:03,098][00684] Avg episode reward: [(0, '10.538')] +[2023-02-25 04:24:03,545][19689] Updated weights for policy 0, policy_version 3678 (0.0012) +[2023-02-25 04:24:08,093][00684] Fps is (10 sec: 3276.7, 60 sec: 3959.4, 300 sec: 3943.3). Total num frames: 15077376. Throughput: 0: 975.1. Samples: 2768300. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:24:08,101][00684] Avg episode reward: [(0, '10.129')] +[2023-02-25 04:24:13,093][00684] Fps is (10 sec: 2867.9, 60 sec: 3686.4, 300 sec: 3929.4). Total num frames: 15089664. Throughput: 0: 964.4. Samples: 2770252. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:24:13,096][00684] Avg episode reward: [(0, '9.850')] +[2023-02-25 04:24:17,909][19689] Updated weights for policy 0, policy_version 3688 (0.0027) +[2023-02-25 04:24:18,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3901.6). Total num frames: 15106048. Throughput: 0: 913.0. Samples: 2774636. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:24:18,096][00684] Avg episode reward: [(0, '10.833')] +[2023-02-25 04:24:23,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3754.8, 300 sec: 3929.4). Total num frames: 15126528. Throughput: 0: 882.0. Samples: 2780764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:24:23,102][00684] Avg episode reward: [(0, '11.463')] +[2023-02-25 04:24:26,502][19689] Updated weights for policy 0, policy_version 3698 (0.0029) +[2023-02-25 04:24:28,095][00684] Fps is (10 sec: 4504.9, 60 sec: 3891.1, 300 sec: 3957.2). Total num frames: 15151104. Throughput: 0: 908.9. Samples: 2784592. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:24:28,098][00684] Avg episode reward: [(0, '12.050')] +[2023-02-25 04:24:33,093][00684] Fps is (10 sec: 4915.1, 60 sec: 3823.0, 300 sec: 3998.8). Total num frames: 15175680. Throughput: 0: 958.0. Samples: 2791908. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:24:33,097][00684] Avg episode reward: [(0, '11.688')] +[2023-02-25 04:24:36,512][19689] Updated weights for policy 0, policy_version 3708 (0.0015) +[2023-02-25 04:24:38,093][00684] Fps is (10 sec: 4096.6, 60 sec: 3754.7, 300 sec: 3984.9). Total num frames: 15192064. Throughput: 0: 953.1. Samples: 2797048. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:24:38,101][00684] Avg episode reward: [(0, '11.846')] +[2023-02-25 04:24:43,093][00684] Fps is (10 sec: 3276.9, 60 sec: 3754.7, 300 sec: 3971.0). Total num frames: 15208448. Throughput: 0: 926.0. Samples: 2799704. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:24:43,096][00684] Avg episode reward: [(0, '12.108')] +[2023-02-25 04:24:47,073][19689] Updated weights for policy 0, policy_version 3718 (0.0021) +[2023-02-25 04:24:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3822.9, 300 sec: 3957.2). Total num frames: 15233024. Throughput: 0: 923.2. Samples: 2805696. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:24:48,095][00684] Avg episode reward: [(0, '12.388')] +[2023-02-25 04:24:53,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 3984.9). Total num frames: 15257600. Throughput: 0: 999.8. Samples: 2813292. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:24:53,096][00684] Avg episode reward: [(0, '11.984')] +[2023-02-25 04:24:55,513][19689] Updated weights for policy 0, policy_version 3728 (0.0014) +[2023-02-25 04:24:58,094][00684] Fps is (10 sec: 4505.1, 60 sec: 3891.1, 300 sec: 3998.8). Total num frames: 15278080. Throughput: 0: 1034.6. Samples: 2816808. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:24:58,099][00684] Avg episode reward: [(0, '12.638')] +[2023-02-25 04:25:03,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.4, 300 sec: 3998.8). Total num frames: 15294464. Throughput: 0: 1053.6. Samples: 2822048. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:25:03,097][00684] Avg episode reward: [(0, '12.902')] +[2023-02-25 04:25:07,014][19689] Updated weights for policy 0, policy_version 3738 (0.0011) +[2023-02-25 04:25:08,093][00684] Fps is (10 sec: 3686.8, 60 sec: 3959.5, 300 sec: 3985.0). Total num frames: 15314944. Throughput: 0: 1036.0. Samples: 2827384. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:25:08,096][00684] Avg episode reward: [(0, '13.953')] +[2023-02-25 04:25:08,114][19675] Saving new best policy, reward=13.953! +[2023-02-25 04:25:13,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4096.0, 300 sec: 3957.2). Total num frames: 15335424. Throughput: 0: 1023.4. Samples: 2830644. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:25:13,103][00684] Avg episode reward: [(0, '13.503')] +[2023-02-25 04:25:15,418][19689] Updated weights for policy 0, policy_version 3748 (0.0017) +[2023-02-25 04:25:18,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 3998.8). Total num frames: 15364096. Throughput: 0: 1033.3. Samples: 2838404. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:25:18,096][00684] Avg episode reward: [(0, '14.020')] +[2023-02-25 04:25:18,109][19675] Saving new best policy, reward=14.020! +[2023-02-25 04:25:23,096][00684] Fps is (10 sec: 4504.5, 60 sec: 4232.3, 300 sec: 3998.8). Total num frames: 15380480. Throughput: 0: 1057.8. Samples: 2844652. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:25:23,099][00684] Avg episode reward: [(0, '14.379')] +[2023-02-25 04:25:23,107][19675] Saving new best policy, reward=14.379! +[2023-02-25 04:25:25,734][19689] Updated weights for policy 0, policy_version 3758 (0.0012) +[2023-02-25 04:25:28,094][00684] Fps is (10 sec: 3276.5, 60 sec: 4096.1, 300 sec: 3984.9). Total num frames: 15396864. Throughput: 0: 1053.6. Samples: 2847116. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:25:28,097][00684] Avg episode reward: [(0, '14.592')] +[2023-02-25 04:25:28,116][19675] Saving new best policy, reward=14.592! +[2023-02-25 04:25:33,094][00684] Fps is (10 sec: 3686.9, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 15417344. Throughput: 0: 1035.1. Samples: 2852276. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:25:33,102][00684] Avg episode reward: [(0, '14.330')] +[2023-02-25 04:25:36,174][19689] Updated weights for policy 0, policy_version 3768 (0.0017) +[2023-02-25 04:25:38,093][00684] Fps is (10 sec: 4505.9, 60 sec: 4164.3, 300 sec: 3971.0). Total num frames: 15441920. Throughput: 0: 1020.9. Samples: 2859232. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:25:38,102][00684] Avg episode reward: [(0, '13.961')] +[2023-02-25 04:25:43,095][00684] Fps is (10 sec: 4914.7, 60 sec: 4300.6, 300 sec: 3998.8). Total num frames: 15466496. Throughput: 0: 1028.9. Samples: 2863108. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:25:43,097][00684] Avg episode reward: [(0, '14.298')] +[2023-02-25 04:25:44,768][19689] Updated weights for policy 0, policy_version 3778 (0.0012) +[2023-02-25 04:25:48,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 3998.8). Total num frames: 15482880. Throughput: 0: 1056.0. Samples: 2869568. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:25:48,098][00684] Avg episode reward: [(0, '15.134')] +[2023-02-25 04:25:48,181][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003781_15486976.pth... +[2023-02-25 04:25:48,353][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003545_14520320.pth +[2023-02-25 04:25:48,369][19675] Saving new best policy, reward=15.134! +[2023-02-25 04:25:53,095][00684] Fps is (10 sec: 3277.0, 60 sec: 4027.6, 300 sec: 3998.8). Total num frames: 15499264. Throughput: 0: 1048.0. Samples: 2874544. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:25:53,098][00684] Avg episode reward: [(0, '15.376')] +[2023-02-25 04:25:53,104][19675] Saving new best policy, reward=15.376! +[2023-02-25 04:25:56,789][19689] Updated weights for policy 0, policy_version 3788 (0.0012) +[2023-02-25 04:25:58,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.8, 300 sec: 3971.0). Total num frames: 15519744. Throughput: 0: 1031.9. Samples: 2877080. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:25:58,096][00684] Avg episode reward: [(0, '16.800')] +[2023-02-25 04:25:58,110][19675] Saving new best policy, reward=16.800! +[2023-02-25 04:26:03,093][00684] Fps is (10 sec: 4506.4, 60 sec: 4164.3, 300 sec: 3971.1). Total num frames: 15544320. Throughput: 0: 1007.0. Samples: 2883720. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:26:03,098][00684] Avg episode reward: [(0, '17.039')] +[2023-02-25 04:26:03,105][19675] Saving new best policy, reward=17.039! +[2023-02-25 04:26:05,192][19689] Updated weights for policy 0, policy_version 3798 (0.0012) +[2023-02-25 04:26:08,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.5, 300 sec: 3998.8). Total num frames: 15568896. Throughput: 0: 1037.7. Samples: 2891344. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:26:08,096][00684] Avg episode reward: [(0, '18.367')] +[2023-02-25 04:26:08,108][19675] Saving new best policy, reward=18.367! +[2023-02-25 04:26:13,095][00684] Fps is (10 sec: 4095.2, 60 sec: 4164.1, 300 sec: 3998.8). Total num frames: 15585280. Throughput: 0: 1040.4. Samples: 2893936. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:26:13,102][00684] Avg episode reward: [(0, '17.858')] +[2023-02-25 04:26:15,755][19689] Updated weights for policy 0, policy_version 3808 (0.0014) +[2023-02-25 04:26:18,095][00684] Fps is (10 sec: 3685.7, 60 sec: 4027.6, 300 sec: 3998.8). Total num frames: 15605760. Throughput: 0: 1042.7. Samples: 2899196. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:26:18,100][00684] Avg episode reward: [(0, '18.985')] +[2023-02-25 04:26:18,117][19675] Saving new best policy, reward=18.985! +[2023-02-25 04:26:23,094][00684] Fps is (10 sec: 2867.5, 60 sec: 3891.3, 300 sec: 3943.3). Total num frames: 15613952. Throughput: 0: 985.9. Samples: 2903596. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:26:23,099][00684] Avg episode reward: [(0, '19.003')] +[2023-02-25 04:26:23,104][19675] Saving new best policy, reward=19.003! +[2023-02-25 04:26:28,093][00684] Fps is (10 sec: 2867.7, 60 sec: 3959.5, 300 sec: 3943.3). Total num frames: 15634432. Throughput: 0: 949.8. Samples: 2905848. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:26:28,098][00684] Avg episode reward: [(0, '19.524')] +[2023-02-25 04:26:28,111][19675] Saving new best policy, reward=19.524! +[2023-02-25 04:26:29,249][19689] Updated weights for policy 0, policy_version 3818 (0.0017) +[2023-02-25 04:26:33,093][00684] Fps is (10 sec: 3686.7, 60 sec: 3891.3, 300 sec: 3929.4). Total num frames: 15650816. Throughput: 0: 919.6. Samples: 2910948. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:26:33,096][00684] Avg episode reward: [(0, '19.173')] +[2023-02-25 04:26:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3754.7, 300 sec: 3943.3). Total num frames: 15667200. Throughput: 0: 925.2. Samples: 2916176. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:26:38,097][00684] Avg episode reward: [(0, '19.071')] +[2023-02-25 04:26:40,558][19689] Updated weights for policy 0, policy_version 3828 (0.0012) +[2023-02-25 04:26:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.5, 300 sec: 3957.2). Total num frames: 15687680. Throughput: 0: 926.8. Samples: 2918788. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:26:43,096][00684] Avg episode reward: [(0, '17.827')] +[2023-02-25 04:26:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3957.2). Total num frames: 15704064. Throughput: 0: 895.8. Samples: 2924032. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:26:48,100][00684] Avg episode reward: [(0, '16.210')] +[2023-02-25 04:26:51,029][19689] Updated weights for policy 0, policy_version 3838 (0.0015) +[2023-02-25 04:26:53,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3984.9). Total num frames: 15728640. Throughput: 0: 881.2. Samples: 2930996. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:26:53,096][00684] Avg episode reward: [(0, '17.195')] +[2023-02-25 04:26:58,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 3998.8). Total num frames: 15753216. Throughput: 0: 910.2. Samples: 2934892. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:26:58,101][00684] Avg episode reward: [(0, '17.806')] +[2023-02-25 04:26:59,491][19689] Updated weights for policy 0, policy_version 3848 (0.0015) +[2023-02-25 04:27:03,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3754.7, 300 sec: 3984.9). Total num frames: 15769600. Throughput: 0: 933.9. Samples: 2941220. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:27:03,098][00684] Avg episode reward: [(0, '18.172')] +[2023-02-25 04:27:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 3998.8). Total num frames: 15790080. Throughput: 0: 954.2. Samples: 2946536. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:27:08,100][00684] Avg episode reward: [(0, '17.230')] +[2023-02-25 04:27:11,224][19689] Updated weights for policy 0, policy_version 3858 (0.0012) +[2023-02-25 04:27:13,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3754.8, 300 sec: 3984.9). Total num frames: 15810560. Throughput: 0: 961.7. Samples: 2949124. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:27:13,096][00684] Avg episode reward: [(0, '16.953')] +[2023-02-25 04:27:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3823.1, 300 sec: 3984.9). Total num frames: 15835136. Throughput: 0: 1004.9. Samples: 2956168. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:27:18,096][00684] Avg episode reward: [(0, '17.582')] +[2023-02-25 04:27:19,427][19689] Updated weights for policy 0, policy_version 3868 (0.0012) +[2023-02-25 04:27:23,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4027.8, 300 sec: 3998.8). Total num frames: 15855616. Throughput: 0: 1057.2. Samples: 2963748. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:27:23,102][00684] Avg episode reward: [(0, '17.758')] +[2023-02-25 04:27:28,095][00684] Fps is (10 sec: 3685.6, 60 sec: 3959.3, 300 sec: 3998.8). Total num frames: 15872000. Throughput: 0: 1056.0. Samples: 2966312. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:27:28,106][00684] Avg episode reward: [(0, '18.371')] +[2023-02-25 04:27:30,600][19689] Updated weights for policy 0, policy_version 3878 (0.0014) +[2023-02-25 04:27:33,093][00684] Fps is (10 sec: 3686.3, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 15892480. Throughput: 0: 1053.5. Samples: 2971440. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:27:33,096][00684] Avg episode reward: [(0, '20.217')] +[2023-02-25 04:27:33,098][19675] Saving new best policy, reward=20.217! +[2023-02-25 04:27:38,093][00684] Fps is (10 sec: 4096.9, 60 sec: 4096.0, 300 sec: 3984.9). Total num frames: 15912960. Throughput: 0: 1024.1. Samples: 2977080. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:27:38,096][00684] Avg episode reward: [(0, '19.672')] +[2023-02-25 04:27:40,324][19689] Updated weights for policy 0, policy_version 3888 (0.0020) +[2023-02-25 04:27:43,095][00684] Fps is (10 sec: 4504.7, 60 sec: 4164.1, 300 sec: 3984.9). Total num frames: 15937536. Throughput: 0: 1022.2. Samples: 2980892. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:27:43,098][00684] Avg episode reward: [(0, '18.742')] +[2023-02-25 04:27:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4012.7). Total num frames: 15962112. Throughput: 0: 1052.3. Samples: 2988572. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:27:48,096][00684] Avg episode reward: [(0, '19.156')] +[2023-02-25 04:27:48,107][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003897_15962112.pth... +[2023-02-25 04:27:48,266][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003663_15003648.pth +[2023-02-25 04:27:49,447][19689] Updated weights for policy 0, policy_version 3898 (0.0013) +[2023-02-25 04:27:53,098][00684] Fps is (10 sec: 4095.0, 60 sec: 4163.9, 300 sec: 3998.8). Total num frames: 15978496. Throughput: 0: 1047.6. Samples: 2993684. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:27:53,100][00684] Avg episode reward: [(0, '18.626')] +[2023-02-25 04:27:58,093][00684] Fps is (10 sec: 3276.7, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 15994880. Throughput: 0: 1048.0. Samples: 2996284. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:27:58,097][00684] Avg episode reward: [(0, '18.994')] +[2023-02-25 04:28:00,884][19689] Updated weights for policy 0, policy_version 3908 (0.0029) +[2023-02-25 04:28:03,093][00684] Fps is (10 sec: 4098.0, 60 sec: 4164.3, 300 sec: 3998.8). Total num frames: 16019456. Throughput: 0: 1017.8. Samples: 3001968. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:28:03,096][00684] Avg episode reward: [(0, '18.809')] +[2023-02-25 04:28:08,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4232.5, 300 sec: 3984.9). Total num frames: 16044032. Throughput: 0: 1022.1. Samples: 3009744. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:28:08,096][00684] Avg episode reward: [(0, '20.071')] +[2023-02-25 04:28:08,570][19689] Updated weights for policy 0, policy_version 3918 (0.0012) +[2023-02-25 04:28:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 3998.8). Total num frames: 16064512. Throughput: 0: 1050.8. Samples: 3013596. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:28:13,100][00684] Avg episode reward: [(0, '20.455')] +[2023-02-25 04:28:13,106][19675] Saving new best policy, reward=20.455! +[2023-02-25 04:28:18,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 16080896. Throughput: 0: 1051.0. Samples: 3018736. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:28:18,095][00684] Avg episode reward: [(0, '21.088')] +[2023-02-25 04:28:18,115][19675] Saving new best policy, reward=21.088! +[2023-02-25 04:28:19,762][19689] Updated weights for policy 0, policy_version 3928 (0.0011) +[2023-02-25 04:28:23,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 16097280. Throughput: 0: 1037.4. Samples: 3023764. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:28:23,101][00684] Avg episode reward: [(0, '21.660')] +[2023-02-25 04:28:23,108][19675] Saving new best policy, reward=21.660! +[2023-02-25 04:28:28,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4164.4, 300 sec: 3984.9). Total num frames: 16121856. Throughput: 0: 1016.8. Samples: 3026648. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:28:28,096][00684] Avg episode reward: [(0, '21.652')] +[2023-02-25 04:28:29,324][19689] Updated weights for policy 0, policy_version 3938 (0.0026) +[2023-02-25 04:28:33,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4232.5, 300 sec: 3998.8). Total num frames: 16146432. Throughput: 0: 1015.9. Samples: 3034288. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:28:33,103][00684] Avg episode reward: [(0, '22.106')] +[2023-02-25 04:28:33,106][19675] Saving new best policy, reward=22.106! +[2023-02-25 04:28:38,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 16166912. Throughput: 0: 1045.9. Samples: 3040744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:28:38,099][00684] Avg episode reward: [(0, '21.871')] +[2023-02-25 04:28:39,463][19689] Updated weights for policy 0, policy_version 3948 (0.0013) +[2023-02-25 04:28:43,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.9, 300 sec: 3984.9). Total num frames: 16179200. Throughput: 0: 1038.9. Samples: 3043036. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:28:43,098][00684] Avg episode reward: [(0, '22.102')] +[2023-02-25 04:28:48,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 16191488. Throughput: 0: 1002.9. Samples: 3047100. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:28:48,101][00684] Avg episode reward: [(0, '21.912')] +[2023-02-25 04:28:53,094][00684] Fps is (10 sec: 2867.0, 60 sec: 3823.2, 300 sec: 3943.3). Total num frames: 16207872. Throughput: 0: 918.7. Samples: 3051088. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:28:53,096][00684] Avg episode reward: [(0, '22.072')] +[2023-02-25 04:28:54,512][19689] Updated weights for policy 0, policy_version 3958 (0.0017) +[2023-02-25 04:28:58,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3823.0, 300 sec: 3943.3). Total num frames: 16224256. Throughput: 0: 885.2. Samples: 3053428. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:28:58,095][00684] Avg episode reward: [(0, '22.480')] +[2023-02-25 04:28:58,109][19675] Saving new best policy, reward=22.480! +[2023-02-25 04:29:03,095][00684] Fps is (10 sec: 4095.7, 60 sec: 3822.8, 300 sec: 3971.0). Total num frames: 16248832. Throughput: 0: 925.4. Samples: 3060380. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:29:03,097][00684] Avg episode reward: [(0, '23.474')] +[2023-02-25 04:29:03,108][19675] Saving new best policy, reward=23.474! +[2023-02-25 04:29:03,442][19689] Updated weights for policy 0, policy_version 3968 (0.0013) +[2023-02-25 04:29:08,093][00684] Fps is (10 sec: 4095.9, 60 sec: 3686.4, 300 sec: 3984.9). Total num frames: 16265216. Throughput: 0: 948.4. Samples: 3066444. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:29:08,099][00684] Avg episode reward: [(0, '23.377')] +[2023-02-25 04:29:13,093][00684] Fps is (10 sec: 3686.9, 60 sec: 3686.4, 300 sec: 3998.8). Total num frames: 16285696. Throughput: 0: 942.8. Samples: 3069076. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:29:13,096][00684] Avg episode reward: [(0, '23.254')] +[2023-02-25 04:29:14,836][19689] Updated weights for policy 0, policy_version 3978 (0.0011) +[2023-02-25 04:29:18,093][00684] Fps is (10 sec: 3686.5, 60 sec: 3686.4, 300 sec: 3984.9). Total num frames: 16302080. Throughput: 0: 889.7. Samples: 3074324. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:29:18,098][00684] Avg episode reward: [(0, '21.993')] +[2023-02-25 04:29:23,094][00684] Fps is (10 sec: 4095.7, 60 sec: 3822.9, 300 sec: 3984.9). Total num frames: 16326656. Throughput: 0: 902.6. Samples: 3081360. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:29:23,105][00684] Avg episode reward: [(0, '18.447')] +[2023-02-25 04:29:23,670][19689] Updated weights for policy 0, policy_version 3988 (0.0012) +[2023-02-25 04:29:28,093][00684] Fps is (10 sec: 5324.8, 60 sec: 3891.2, 300 sec: 3998.8). Total num frames: 16355328. Throughput: 0: 939.2. Samples: 3085300. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:29:28,100][00684] Avg episode reward: [(0, '17.336')] +[2023-02-25 04:29:33,100][00684] Fps is (10 sec: 4502.8, 60 sec: 3754.2, 300 sec: 3998.7). Total num frames: 16371712. Throughput: 0: 988.2. Samples: 3091576. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-25 04:29:33,107][00684] Avg episode reward: [(0, '17.769')] +[2023-02-25 04:29:33,571][19689] Updated weights for policy 0, policy_version 3998 (0.0012) +[2023-02-25 04:29:38,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 3998.8). Total num frames: 16388096. Throughput: 0: 1015.9. Samples: 3096804. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:29:38,098][00684] Avg episode reward: [(0, '18.904')] +[2023-02-25 04:29:43,093][00684] Fps is (10 sec: 3688.9, 60 sec: 3822.9, 300 sec: 3984.9). Total num frames: 16408576. Throughput: 0: 1021.2. Samples: 3099380. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:29:43,096][00684] Avg episode reward: [(0, '19.393')] +[2023-02-25 04:29:44,077][19689] Updated weights for policy 0, policy_version 4008 (0.0013) +[2023-02-25 04:29:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 16437248. Throughput: 0: 1024.0. Samples: 3106460. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:29:48,096][00684] Avg episode reward: [(0, '21.009')] +[2023-02-25 04:29:48,105][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004013_16437248.pth... +[2023-02-25 04:29:48,250][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003781_15486976.pth +[2023-02-25 04:29:52,227][19689] Updated weights for policy 0, policy_version 4018 (0.0012) +[2023-02-25 04:29:53,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4164.3, 300 sec: 3998.8). Total num frames: 16457728. Throughput: 0: 1057.2. Samples: 3114020. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:29:53,102][00684] Avg episode reward: [(0, '21.928')] +[2023-02-25 04:29:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 16478208. Throughput: 0: 1055.0. Samples: 3116552. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:29:58,099][00684] Avg episode reward: [(0, '22.131')] +[2023-02-25 04:30:03,096][00684] Fps is (10 sec: 3685.2, 60 sec: 4095.9, 300 sec: 3998.8). Total num frames: 16494592. Throughput: 0: 1053.8. Samples: 3121748. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) +[2023-02-25 04:30:03,101][00684] Avg episode reward: [(0, '22.553')] +[2023-02-25 04:30:04,716][19689] Updated weights for policy 0, policy_version 4028 (0.0013) +[2023-02-25 04:30:08,093][00684] Fps is (10 sec: 3276.7, 60 sec: 4096.0, 300 sec: 3984.9). Total num frames: 16510976. Throughput: 0: 1022.1. Samples: 3127352. Policy #0 lag: (min: 0.0, avg: 1.5, max: 3.0) +[2023-02-25 04:30:08,096][00684] Avg episode reward: [(0, '21.500')] +[2023-02-25 04:30:12,831][19689] Updated weights for policy 0, policy_version 4038 (0.0013) +[2023-02-25 04:30:13,093][00684] Fps is (10 sec: 4507.0, 60 sec: 4232.5, 300 sec: 3984.9). Total num frames: 16539648. Throughput: 0: 1021.4. Samples: 3131264. Policy #0 lag: (min: 0.0, avg: 1.4, max: 3.0) +[2023-02-25 04:30:13,096][00684] Avg episode reward: [(0, '22.444')] +[2023-02-25 04:30:18,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4300.8, 300 sec: 3998.8). Total num frames: 16560128. Throughput: 0: 1052.6. Samples: 3138936. Policy #0 lag: (min: 0.0, avg: 1.3, max: 3.0) +[2023-02-25 04:30:18,101][00684] Avg episode reward: [(0, '22.401')] +[2023-02-25 04:30:23,014][19689] Updated weights for policy 0, policy_version 4048 (0.0011) +[2023-02-25 04:30:23,093][00684] Fps is (10 sec: 4096.1, 60 sec: 4232.6, 300 sec: 4012.7). Total num frames: 16580608. Throughput: 0: 1052.4. Samples: 3144164. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:30:23,096][00684] Avg episode reward: [(0, '22.314')] +[2023-02-25 04:30:28,094][00684] Fps is (10 sec: 3686.3, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 16596992. Throughput: 0: 1052.5. Samples: 3146744. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:30:28,099][00684] Avg episode reward: [(0, '21.295')] +[2023-02-25 04:30:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.5, 300 sec: 3984.9). Total num frames: 16617472. Throughput: 0: 1018.4. Samples: 3152288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:30:33,105][00684] Avg episode reward: [(0, '22.401')] +[2023-02-25 04:30:33,300][19689] Updated weights for policy 0, policy_version 4058 (0.0022) +[2023-02-25 04:30:38,096][00684] Fps is (10 sec: 4504.6, 60 sec: 4232.4, 300 sec: 3984.9). Total num frames: 16642048. Throughput: 0: 1020.5. Samples: 3159944. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:30:38,102][00684] Avg episode reward: [(0, '21.942')] +[2023-02-25 04:30:41,719][19689] Updated weights for policy 0, policy_version 4068 (0.0013) +[2023-02-25 04:30:43,094][00684] Fps is (10 sec: 4914.6, 60 sec: 4300.7, 300 sec: 4012.7). Total num frames: 16666624. Throughput: 0: 1047.2. Samples: 3163676. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:30:43,097][00684] Avg episode reward: [(0, '20.022')] +[2023-02-25 04:30:48,093][00684] Fps is (10 sec: 4097.0, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 16683008. Throughput: 0: 1053.4. Samples: 3169148. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:30:48,099][00684] Avg episode reward: [(0, '20.463')] +[2023-02-25 04:30:53,093][00684] Fps is (10 sec: 3277.2, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 16699392. Throughput: 0: 1045.4. Samples: 3174396. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:30:53,096][00684] Avg episode reward: [(0, '21.061')] +[2023-02-25 04:30:53,483][19689] Updated weights for policy 0, policy_version 4078 (0.0017) +[2023-02-25 04:30:58,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 16719872. Throughput: 0: 1017.1. Samples: 3177032. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:30:58,096][00684] Avg episode reward: [(0, '20.992')] +[2023-02-25 04:31:02,432][19689] Updated weights for policy 0, policy_version 4088 (0.0012) +[2023-02-25 04:31:03,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.5, 300 sec: 3984.9). Total num frames: 16744448. Throughput: 0: 1011.3. Samples: 3184444. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:31:03,096][00684] Avg episode reward: [(0, '20.941')] +[2023-02-25 04:31:08,095][00684] Fps is (10 sec: 4095.2, 60 sec: 4164.1, 300 sec: 3984.9). Total num frames: 16760832. Throughput: 0: 998.9. Samples: 3189116. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:31:08,101][00684] Avg episode reward: [(0, '22.386')] +[2023-02-25 04:31:13,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3891.2, 300 sec: 3957.2). Total num frames: 16773120. Throughput: 0: 987.7. Samples: 3191188. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:31:13,101][00684] Avg episode reward: [(0, '24.152')] +[2023-02-25 04:31:13,107][19675] Saving new best policy, reward=24.152! +[2023-02-25 04:31:16,751][19689] Updated weights for policy 0, policy_version 4098 (0.0019) +[2023-02-25 04:31:18,093][00684] Fps is (10 sec: 2458.1, 60 sec: 3754.7, 300 sec: 3971.0). Total num frames: 16785408. Throughput: 0: 953.5. Samples: 3195196. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:31:18,096][00684] Avg episode reward: [(0, '23.117')] +[2023-02-25 04:31:23,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3686.4, 300 sec: 3957.2). Total num frames: 16801792. Throughput: 0: 891.7. Samples: 3200068. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:31:23,100][00684] Avg episode reward: [(0, '23.792')] +[2023-02-25 04:31:27,300][19689] Updated weights for policy 0, policy_version 4108 (0.0026) +[2023-02-25 04:31:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3823.0, 300 sec: 3984.9). Total num frames: 16826368. Throughput: 0: 883.0. Samples: 3203412. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:31:28,098][00684] Avg episode reward: [(0, '23.544')] +[2023-02-25 04:31:33,093][00684] Fps is (10 sec: 5324.8, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 16855040. Throughput: 0: 933.8. Samples: 3211168. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:31:33,096][00684] Avg episode reward: [(0, '22.182')] +[2023-02-25 04:31:36,247][19689] Updated weights for policy 0, policy_version 4118 (0.0015) +[2023-02-25 04:31:38,101][00684] Fps is (10 sec: 4502.3, 60 sec: 3822.6, 300 sec: 4012.6). Total num frames: 16871424. Throughput: 0: 951.6. Samples: 3217224. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:31:38,103][00684] Avg episode reward: [(0, '20.606')] +[2023-02-25 04:31:43,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.5, 300 sec: 4012.7). Total num frames: 16887808. Throughput: 0: 950.5. Samples: 3219804. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:31:43,096][00684] Avg episode reward: [(0, '20.843')] +[2023-02-25 04:31:47,945][19689] Updated weights for policy 0, policy_version 4128 (0.0012) +[2023-02-25 04:31:48,093][00684] Fps is (10 sec: 3689.1, 60 sec: 3754.7, 300 sec: 3998.8). Total num frames: 16908288. Throughput: 0: 902.5. Samples: 3225056. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:31:48,096][00684] Avg episode reward: [(0, '22.585')] +[2023-02-25 04:31:48,110][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004128_16908288.pth... +[2023-02-25 04:31:48,308][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000003897_15962112.pth +[2023-02-25 04:31:53,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 3998.8). Total num frames: 16932864. Throughput: 0: 955.2. Samples: 3232096. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:31:53,096][00684] Avg episode reward: [(0, '22.447')] +[2023-02-25 04:31:55,637][19689] Updated weights for policy 0, policy_version 4138 (0.0015) +[2023-02-25 04:31:58,093][00684] Fps is (10 sec: 4915.3, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 16957440. Throughput: 0: 995.3. Samples: 3235976. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:31:58,096][00684] Avg episode reward: [(0, '23.956')] +[2023-02-25 04:32:03,097][00684] Fps is (10 sec: 4094.4, 60 sec: 3822.7, 300 sec: 4012.6). Total num frames: 16973824. Throughput: 0: 1044.5. Samples: 3242204. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:32:03,100][00684] Avg episode reward: [(0, '24.412')] +[2023-02-25 04:32:03,102][19675] Saving new best policy, reward=24.412! +[2023-02-25 04:32:06,570][19689] Updated weights for policy 0, policy_version 4148 (0.0017) +[2023-02-25 04:32:08,095][00684] Fps is (10 sec: 3685.8, 60 sec: 3891.2, 300 sec: 4012.7). Total num frames: 16994304. Throughput: 0: 1051.8. Samples: 3247400. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:32:08,099][00684] Avg episode reward: [(0, '24.025')] +[2023-02-25 04:32:13,093][00684] Fps is (10 sec: 4097.6, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 17014784. Throughput: 0: 1036.5. Samples: 3250056. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:32:13,102][00684] Avg episode reward: [(0, '23.233')] +[2023-02-25 04:32:15,990][19689] Updated weights for policy 0, policy_version 4158 (0.0014) +[2023-02-25 04:32:18,093][00684] Fps is (10 sec: 4506.4, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 17039360. Throughput: 0: 1021.2. Samples: 3257124. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:32:18,096][00684] Avg episode reward: [(0, '22.732')] +[2023-02-25 04:32:23,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4369.1, 300 sec: 4040.5). Total num frames: 17063936. Throughput: 0: 1060.5. Samples: 3264940. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:32:23,098][00684] Avg episode reward: [(0, '21.915')] +[2023-02-25 04:32:25,383][19689] Updated weights for policy 0, policy_version 4168 (0.0016) +[2023-02-25 04:32:28,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4232.5, 300 sec: 4026.6). Total num frames: 17080320. Throughput: 0: 1061.1. Samples: 3267552. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:32:28,096][00684] Avg episode reward: [(0, '20.334')] +[2023-02-25 04:32:33,102][00684] Fps is (10 sec: 3683.2, 60 sec: 4095.4, 300 sec: 4026.5). Total num frames: 17100800. Throughput: 0: 1059.1. Samples: 3272724. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:32:33,109][00684] Avg episode reward: [(0, '20.356')] +[2023-02-25 04:32:36,812][19689] Updated weights for policy 0, policy_version 4178 (0.0012) +[2023-02-25 04:32:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.5, 300 sec: 3998.8). Total num frames: 17117184. Throughput: 0: 1026.9. Samples: 3278308. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:32:38,096][00684] Avg episode reward: [(0, '21.138')] +[2023-02-25 04:32:43,093][00684] Fps is (10 sec: 4099.6, 60 sec: 4232.5, 300 sec: 3998.8). Total num frames: 17141760. Throughput: 0: 1027.2. Samples: 3282200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:32:43,100][00684] Avg episode reward: [(0, '22.584')] +[2023-02-25 04:32:44,767][19689] Updated weights for policy 0, policy_version 4188 (0.0011) +[2023-02-25 04:32:48,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4026.6). Total num frames: 17166336. Throughput: 0: 1062.1. Samples: 3289996. Policy #0 lag: (min: 0.0, avg: 0.3, max: 2.0) +[2023-02-25 04:32:48,096][00684] Avg episode reward: [(0, '23.722')] +[2023-02-25 04:32:53,095][00684] Fps is (10 sec: 4095.3, 60 sec: 4164.1, 300 sec: 4026.6). Total num frames: 17182720. Throughput: 0: 1061.3. Samples: 3295160. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:32:53,097][00684] Avg episode reward: [(0, '23.964')] +[2023-02-25 04:32:55,901][19689] Updated weights for policy 0, policy_version 4198 (0.0015) +[2023-02-25 04:32:58,093][00684] Fps is (10 sec: 3276.7, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 17199104. Throughput: 0: 1060.8. Samples: 3297792. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:32:58,096][00684] Avg episode reward: [(0, '23.151')] +[2023-02-25 04:33:03,093][00684] Fps is (10 sec: 4096.7, 60 sec: 4164.5, 300 sec: 3998.8). Total num frames: 17223680. Throughput: 0: 1025.5. Samples: 3303272. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:33:03,101][00684] Avg episode reward: [(0, '23.045')] +[2023-02-25 04:33:05,264][19689] Updated weights for policy 0, policy_version 4208 (0.0019) +[2023-02-25 04:33:08,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4232.7, 300 sec: 4012.7). Total num frames: 17248256. Throughput: 0: 1021.8. Samples: 3310920. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:33:08,095][00684] Avg episode reward: [(0, '23.209')] +[2023-02-25 04:33:13,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4040.5). Total num frames: 17272832. Throughput: 0: 1050.6. Samples: 3314828. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:33:13,095][00684] Avg episode reward: [(0, '22.582')] +[2023-02-25 04:33:14,566][19689] Updated weights for policy 0, policy_version 4218 (0.0012) +[2023-02-25 04:33:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4164.3, 300 sec: 4040.5). Total num frames: 17289216. Throughput: 0: 1055.5. Samples: 3320212. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:33:18,096][00684] Avg episode reward: [(0, '23.175')] +[2023-02-25 04:33:23,094][00684] Fps is (10 sec: 2867.0, 60 sec: 3959.4, 300 sec: 3998.8). Total num frames: 17301504. Throughput: 0: 1041.3. Samples: 3325168. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:33:23,100][00684] Avg episode reward: [(0, '23.565')] +[2023-02-25 04:33:27,710][19689] Updated weights for policy 0, policy_version 4228 (0.0030) +[2023-02-25 04:33:28,094][00684] Fps is (10 sec: 2867.1, 60 sec: 3959.4, 300 sec: 3971.0). Total num frames: 17317888. Throughput: 0: 1000.8. Samples: 3327236. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:33:28,098][00684] Avg episode reward: [(0, '23.581')] +[2023-02-25 04:33:33,093][00684] Fps is (10 sec: 3277.1, 60 sec: 3891.8, 300 sec: 3957.2). Total num frames: 17334272. Throughput: 0: 936.2. Samples: 3332124. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:33:33,098][00684] Avg episode reward: [(0, '25.125')] +[2023-02-25 04:33:33,103][19675] Saving new best policy, reward=25.125! +[2023-02-25 04:33:38,096][00684] Fps is (10 sec: 3685.5, 60 sec: 3959.3, 300 sec: 3984.9). Total num frames: 17354752. Throughput: 0: 943.9. Samples: 3337636. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:33:38,099][00684] Avg episode reward: [(0, '23.917')] +[2023-02-25 04:33:38,885][19689] Updated weights for policy 0, policy_version 4238 (0.0012) +[2023-02-25 04:33:43,096][00684] Fps is (10 sec: 3685.5, 60 sec: 3822.8, 300 sec: 3998.8). Total num frames: 17371136. Throughput: 0: 943.8. Samples: 3340264. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:33:43,098][00684] Avg episode reward: [(0, '24.087')] +[2023-02-25 04:33:48,102][00684] Fps is (10 sec: 3274.8, 60 sec: 3685.9, 300 sec: 3998.7). Total num frames: 17387520. Throughput: 0: 939.3. Samples: 3345548. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:33:48,105][00684] Avg episode reward: [(0, '24.122')] +[2023-02-25 04:33:48,114][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004245_17387520.pth... +[2023-02-25 04:33:48,376][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004013_16437248.pth +[2023-02-25 04:33:50,808][19689] Updated weights for policy 0, policy_version 4248 (0.0037) +[2023-02-25 04:33:53,093][00684] Fps is (10 sec: 3687.2, 60 sec: 3754.8, 300 sec: 4012.7). Total num frames: 17408000. Throughput: 0: 890.6. Samples: 3350996. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:33:53,105][00684] Avg episode reward: [(0, '24.714')] +[2023-02-25 04:33:58,093][00684] Fps is (10 sec: 4919.5, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 17436672. Throughput: 0: 890.1. Samples: 3354884. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:33:58,095][00684] Avg episode reward: [(0, '24.669')] +[2023-02-25 04:33:59,280][19689] Updated weights for policy 0, policy_version 4258 (0.0011) +[2023-02-25 04:34:03,093][00684] Fps is (10 sec: 4915.3, 60 sec: 3891.2, 300 sec: 4040.5). Total num frames: 17457152. Throughput: 0: 939.2. Samples: 3362476. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:34:03,100][00684] Avg episode reward: [(0, '24.969')] +[2023-02-25 04:34:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 4026.6). Total num frames: 17473536. Throughput: 0: 947.8. Samples: 3367820. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:34:08,096][00684] Avg episode reward: [(0, '25.858')] +[2023-02-25 04:34:08,115][19675] Saving new best policy, reward=25.858! +[2023-02-25 04:34:09,798][19689] Updated weights for policy 0, policy_version 4268 (0.0012) +[2023-02-25 04:34:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3618.1, 300 sec: 4026.6). Total num frames: 17489920. Throughput: 0: 957.5. Samples: 3370324. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:34:13,096][00684] Avg episode reward: [(0, '26.613')] +[2023-02-25 04:34:13,101][19675] Saving new best policy, reward=26.613! +[2023-02-25 04:34:18,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3686.4, 300 sec: 4012.7). Total num frames: 17510400. Throughput: 0: 967.2. Samples: 3375648. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:34:18,099][00684] Avg episode reward: [(0, '24.420')] +[2023-02-25 04:34:19,862][19689] Updated weights for policy 0, policy_version 4278 (0.0021) +[2023-02-25 04:34:23,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 4012.7). Total num frames: 17539072. Throughput: 0: 1013.1. Samples: 3383224. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:34:23,095][00684] Avg episode reward: [(0, '24.625')] +[2023-02-25 04:34:28,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4027.8, 300 sec: 4026.7). Total num frames: 17559552. Throughput: 0: 1041.8. Samples: 3387144. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:34:28,102][00684] Avg episode reward: [(0, '23.453')] +[2023-02-25 04:34:28,561][19689] Updated weights for policy 0, policy_version 4288 (0.0012) +[2023-02-25 04:34:33,093][00684] Fps is (10 sec: 3686.3, 60 sec: 4027.7, 300 sec: 4026.6). Total num frames: 17575936. Throughput: 0: 1047.3. Samples: 3392668. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:34:33,096][00684] Avg episode reward: [(0, '23.363')] +[2023-02-25 04:34:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.9, 300 sec: 4026.6). Total num frames: 17596416. Throughput: 0: 1044.4. Samples: 3397992. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:34:38,098][00684] Avg episode reward: [(0, '21.939')] +[2023-02-25 04:34:40,355][19689] Updated weights for policy 0, policy_version 4298 (0.0021) +[2023-02-25 04:34:43,093][00684] Fps is (10 sec: 4096.1, 60 sec: 4096.2, 300 sec: 3998.8). Total num frames: 17616896. Throughput: 0: 1016.5. Samples: 3400628. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:34:43,096][00684] Avg episode reward: [(0, '24.336')] +[2023-02-25 04:34:48,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4233.2, 300 sec: 4012.7). Total num frames: 17641472. Throughput: 0: 1019.5. Samples: 3408352. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:34:48,101][00684] Avg episode reward: [(0, '24.681')] +[2023-02-25 04:34:48,172][19689] Updated weights for policy 0, policy_version 4308 (0.0019) +[2023-02-25 04:34:53,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4026.6). Total num frames: 17666048. Throughput: 0: 1055.9. Samples: 3415336. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:34:53,097][00684] Avg episode reward: [(0, '24.396')] +[2023-02-25 04:34:58,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 4026.6). Total num frames: 17682432. Throughput: 0: 1059.0. Samples: 3417980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:34:58,096][00684] Avg episode reward: [(0, '23.231')] +[2023-02-25 04:34:58,837][19689] Updated weights for policy 0, policy_version 4318 (0.0011) +[2023-02-25 04:35:03,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 4026.6). Total num frames: 17698816. Throughput: 0: 1056.3. Samples: 3423180. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:35:03,098][00684] Avg episode reward: [(0, '24.060')] +[2023-02-25 04:35:08,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4164.3, 300 sec: 4012.7). Total num frames: 17723392. Throughput: 0: 1031.2. Samples: 3429628. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:35:08,100][00684] Avg episode reward: [(0, '21.615')] +[2023-02-25 04:35:08,793][19689] Updated weights for policy 0, policy_version 4328 (0.0016) +[2023-02-25 04:35:13,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4026.6). Total num frames: 17747968. Throughput: 0: 1029.7. Samples: 3433480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:35:13,095][00684] Avg episode reward: [(0, '22.454')] +[2023-02-25 04:35:17,507][19689] Updated weights for policy 0, policy_version 4338 (0.0012) +[2023-02-25 04:35:18,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4300.8, 300 sec: 4026.6). Total num frames: 17768448. Throughput: 0: 1064.2. Samples: 3440556. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:35:18,098][00684] Avg episode reward: [(0, '23.416')] +[2023-02-25 04:35:23,099][00684] Fps is (10 sec: 3684.2, 60 sec: 4095.6, 300 sec: 4026.5). Total num frames: 17784832. Throughput: 0: 1059.2. Samples: 3445664. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:35:23,102][00684] Avg episode reward: [(0, '24.643')] +[2023-02-25 04:35:28,094][00684] Fps is (10 sec: 3686.1, 60 sec: 4095.9, 300 sec: 4026.6). Total num frames: 17805312. Throughput: 0: 1058.3. Samples: 3448252. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:35:28,099][00684] Avg episode reward: [(0, '25.096')] +[2023-02-25 04:35:29,335][19689] Updated weights for policy 0, policy_version 4348 (0.0026) +[2023-02-25 04:35:33,093][00684] Fps is (10 sec: 4508.3, 60 sec: 4232.6, 300 sec: 4026.6). Total num frames: 17829888. Throughput: 0: 1024.2. Samples: 3454440. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:35:33,096][00684] Avg episode reward: [(0, '24.613')] +[2023-02-25 04:35:37,238][19689] Updated weights for policy 0, policy_version 4358 (0.0017) +[2023-02-25 04:35:38,093][00684] Fps is (10 sec: 4915.6, 60 sec: 4300.8, 300 sec: 4026.6). Total num frames: 17854464. Throughput: 0: 1040.4. Samples: 3462152. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:35:38,101][00684] Avg episode reward: [(0, '25.155')] +[2023-02-25 04:35:43,097][00684] Fps is (10 sec: 4094.4, 60 sec: 4232.2, 300 sec: 4026.5). Total num frames: 17870848. Throughput: 0: 1051.6. Samples: 3465304. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:35:43,100][00684] Avg episode reward: [(0, '24.074')] +[2023-02-25 04:35:48,093][00684] Fps is (10 sec: 2867.2, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 17883136. Throughput: 0: 1027.7. Samples: 3469428. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:35:48,098][00684] Avg episode reward: [(0, '24.098')] +[2023-02-25 04:35:48,111][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004366_17883136.pth... +[2023-02-25 04:35:48,412][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004128_16908288.pth +[2023-02-25 04:35:50,670][19689] Updated weights for policy 0, policy_version 4368 (0.0011) +[2023-02-25 04:35:53,095][00684] Fps is (10 sec: 2458.2, 60 sec: 3822.8, 300 sec: 3984.9). Total num frames: 17895424. Throughput: 0: 972.9. Samples: 3473412. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:35:53,103][00684] Avg episode reward: [(0, '23.864')] +[2023-02-25 04:35:58,093][00684] Fps is (10 sec: 2867.2, 60 sec: 3822.9, 300 sec: 3957.2). Total num frames: 17911808. Throughput: 0: 932.7. Samples: 3475452. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0) +[2023-02-25 04:35:58,097][00684] Avg episode reward: [(0, '23.927')] +[2023-02-25 04:36:02,902][19689] Updated weights for policy 0, policy_version 4378 (0.0012) +[2023-02-25 04:36:03,093][00684] Fps is (10 sec: 3687.0, 60 sec: 3891.2, 300 sec: 3971.1). Total num frames: 17932288. Throughput: 0: 890.0. Samples: 3480608. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:36:03,102][00684] Avg episode reward: [(0, '24.370')] +[2023-02-25 04:36:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3891.2, 300 sec: 4012.7). Total num frames: 17956864. Throughput: 0: 951.6. Samples: 3488480. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:36:08,096][00684] Avg episode reward: [(0, '23.188')] +[2023-02-25 04:36:11,580][19689] Updated weights for policy 0, policy_version 4388 (0.0012) +[2023-02-25 04:36:13,096][00684] Fps is (10 sec: 4504.2, 60 sec: 3822.7, 300 sec: 4040.4). Total num frames: 17977344. Throughput: 0: 965.2. Samples: 3491688. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:36:13,099][00684] Avg episode reward: [(0, '24.105')] +[2023-02-25 04:36:18,093][00684] Fps is (10 sec: 3686.3, 60 sec: 3754.7, 300 sec: 4040.5). Total num frames: 17993728. Throughput: 0: 943.6. Samples: 3496904. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:36:18,101][00684] Avg episode reward: [(0, '23.843')] +[2023-02-25 04:36:23,093][00684] Fps is (10 sec: 3277.8, 60 sec: 3755.0, 300 sec: 4012.7). Total num frames: 18010112. Throughput: 0: 885.5. Samples: 3502000. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:36:23,103][00684] Avg episode reward: [(0, '23.364')] +[2023-02-25 04:36:23,370][19689] Updated weights for policy 0, policy_version 4398 (0.0012) +[2023-02-25 04:36:28,094][00684] Fps is (10 sec: 4505.3, 60 sec: 3891.2, 300 sec: 4012.7). Total num frames: 18038784. Throughput: 0: 896.0. Samples: 3505620. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:36:28,096][00684] Avg episode reward: [(0, '25.746')] +[2023-02-25 04:36:31,235][19689] Updated weights for policy 0, policy_version 4408 (0.0011) +[2023-02-25 04:36:33,096][00684] Fps is (10 sec: 5323.5, 60 sec: 3891.0, 300 sec: 4040.5). Total num frames: 18063360. Throughput: 0: 975.2. Samples: 3513316. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:36:33,098][00684] Avg episode reward: [(0, '27.481')] +[2023-02-25 04:36:33,105][19675] Saving new best policy, reward=27.481! +[2023-02-25 04:36:38,093][00684] Fps is (10 sec: 4096.3, 60 sec: 3754.7, 300 sec: 4040.5). Total num frames: 18079744. Throughput: 0: 1017.5. Samples: 3519196. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:36:38,101][00684] Avg episode reward: [(0, '28.238')] +[2023-02-25 04:36:38,115][19675] Saving new best policy, reward=28.238! +[2023-02-25 04:36:42,502][19689] Updated weights for policy 0, policy_version 4418 (0.0012) +[2023-02-25 04:36:43,093][00684] Fps is (10 sec: 3277.6, 60 sec: 3754.9, 300 sec: 4026.6). Total num frames: 18096128. Throughput: 0: 1029.2. Samples: 3521768. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:36:43,098][00684] Avg episode reward: [(0, '27.384')] +[2023-02-25 04:36:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3891.2, 300 sec: 4012.7). Total num frames: 18116608. Throughput: 0: 1030.6. Samples: 3526984. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:36:48,096][00684] Avg episode reward: [(0, '27.090')] +[2023-02-25 04:36:51,983][19689] Updated weights for policy 0, policy_version 4428 (0.0016) +[2023-02-25 04:36:53,096][00684] Fps is (10 sec: 4504.4, 60 sec: 4095.9, 300 sec: 4012.7). Total num frames: 18141184. Throughput: 0: 1017.5. Samples: 3534268. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:36:53,099][00684] Avg episode reward: [(0, '27.460')] +[2023-02-25 04:36:58,094][00684] Fps is (10 sec: 4914.9, 60 sec: 4232.5, 300 sec: 4040.5). Total num frames: 18165760. Throughput: 0: 1028.6. Samples: 3537972. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:36:58,096][00684] Avg episode reward: [(0, '25.937')] +[2023-02-25 04:37:01,744][19689] Updated weights for policy 0, policy_version 4438 (0.0012) +[2023-02-25 04:37:03,093][00684] Fps is (10 sec: 4097.1, 60 sec: 4164.3, 300 sec: 4026.6). Total num frames: 18182144. Throughput: 0: 1046.2. Samples: 3543984. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:37:03,096][00684] Avg episode reward: [(0, '25.163')] +[2023-02-25 04:37:08,093][00684] Fps is (10 sec: 3277.0, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 18198528. Throughput: 0: 1047.8. Samples: 3549152. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:37:08,100][00684] Avg episode reward: [(0, '27.138')] +[2023-02-25 04:37:12,748][19689] Updated weights for policy 0, policy_version 4448 (0.0013) +[2023-02-25 04:37:13,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.9, 300 sec: 3998.8). Total num frames: 18219008. Throughput: 0: 1025.4. Samples: 3551764. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:37:13,105][00684] Avg episode reward: [(0, '28.622')] +[2023-02-25 04:37:13,113][19675] Saving new best policy, reward=28.622! +[2023-02-25 04:37:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 3998.8). Total num frames: 18243584. Throughput: 0: 1013.8. Samples: 3558936. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:37:18,096][00684] Avg episode reward: [(0, '28.158')] +[2023-02-25 04:37:20,867][19689] Updated weights for policy 0, policy_version 4458 (0.0016) +[2023-02-25 04:37:23,098][00684] Fps is (10 sec: 4912.9, 60 sec: 4300.5, 300 sec: 4026.5). Total num frames: 18268160. Throughput: 0: 1046.9. Samples: 3566312. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:37:23,100][00684] Avg episode reward: [(0, '27.676')] +[2023-02-25 04:37:28,094][00684] Fps is (10 sec: 4095.7, 60 sec: 4096.0, 300 sec: 4012.8). Total num frames: 18284544. Throughput: 0: 1048.1. Samples: 3568932. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:37:28,099][00684] Avg episode reward: [(0, '27.792')] +[2023-02-25 04:37:32,076][19689] Updated weights for policy 0, policy_version 4468 (0.0012) +[2023-02-25 04:37:33,093][00684] Fps is (10 sec: 3688.1, 60 sec: 4027.9, 300 sec: 4026.6). Total num frames: 18305024. Throughput: 0: 1049.1. Samples: 3574192. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:37:33,105][00684] Avg episode reward: [(0, '27.740')] +[2023-02-25 04:37:38,093][00684] Fps is (10 sec: 4096.3, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 18325504. Throughput: 0: 1020.0. Samples: 3580164. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:37:38,096][00684] Avg episode reward: [(0, '27.715')] +[2023-02-25 04:37:41,063][19689] Updated weights for policy 0, policy_version 4478 (0.0016) +[2023-02-25 04:37:43,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 18350080. Throughput: 0: 1021.7. Samples: 3583948. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:37:43,103][00684] Avg episode reward: [(0, '28.041')] +[2023-02-25 04:37:48,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 4026.6). Total num frames: 18370560. Throughput: 0: 1053.0. Samples: 3591368. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:37:48,099][00684] Avg episode reward: [(0, '27.712')] +[2023-02-25 04:37:48,109][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004485_18370560.pth... +[2023-02-25 04:37:48,311][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004245_17387520.pth +[2023-02-25 04:37:50,771][19689] Updated weights for policy 0, policy_version 4488 (0.0021) +[2023-02-25 04:37:53,099][00684] Fps is (10 sec: 3684.1, 60 sec: 4095.8, 300 sec: 4026.5). Total num frames: 18386944. Throughput: 0: 1052.7. Samples: 3596528. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:37:53,108][00684] Avg episode reward: [(0, '28.429')] +[2023-02-25 04:37:58,094][00684] Fps is (10 sec: 3685.9, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 18407424. Throughput: 0: 1052.9. Samples: 3599144. Policy #0 lag: (min: 0.0, avg: 1.2, max: 2.0) +[2023-02-25 04:37:58,096][00684] Avg episode reward: [(0, '28.923')] +[2023-02-25 04:37:58,105][19675] Saving new best policy, reward=28.923! +[2023-02-25 04:38:02,446][19689] Updated weights for policy 0, policy_version 4498 (0.0016) +[2023-02-25 04:38:03,093][00684] Fps is (10 sec: 3688.7, 60 sec: 4027.7, 300 sec: 3984.9). Total num frames: 18423808. Throughput: 0: 1008.4. Samples: 3604316. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:38:03,096][00684] Avg episode reward: [(0, '27.620')] +[2023-02-25 04:38:08,093][00684] Fps is (10 sec: 3277.2, 60 sec: 4027.7, 300 sec: 3957.2). Total num frames: 18440192. Throughput: 0: 961.0. Samples: 3609552. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:38:08,099][00684] Avg episode reward: [(0, '27.040')] +[2023-02-25 04:38:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 3957.2). Total num frames: 18456576. Throughput: 0: 959.8. Samples: 3612120. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:38:13,096][00684] Avg episode reward: [(0, '26.060')] +[2023-02-25 04:38:15,504][19689] Updated weights for policy 0, policy_version 4508 (0.0012) +[2023-02-25 04:38:18,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 18472960. Throughput: 0: 935.9. Samples: 3616308. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:38:18,099][00684] Avg episode reward: [(0, '27.470')] +[2023-02-25 04:38:23,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.7, 300 sec: 3971.0). Total num frames: 18489344. Throughput: 0: 917.8. Samples: 3621464. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:38:23,099][00684] Avg episode reward: [(0, '28.149')] +[2023-02-25 04:38:26,696][19689] Updated weights for policy 0, policy_version 4518 (0.0017) +[2023-02-25 04:38:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3754.7, 300 sec: 3984.9). Total num frames: 18509824. Throughput: 0: 893.0. Samples: 3624132. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:38:28,095][00684] Avg episode reward: [(0, '27.927')] +[2023-02-25 04:38:33,095][00684] Fps is (10 sec: 4914.3, 60 sec: 3891.1, 300 sec: 4012.7). Total num frames: 18538496. Throughput: 0: 897.7. Samples: 3631768. Policy #0 lag: (min: 0.0, avg: 0.8, max: 3.0) +[2023-02-25 04:38:33,098][00684] Avg episode reward: [(0, '29.237')] +[2023-02-25 04:38:33,107][19675] Saving new best policy, reward=29.237! +[2023-02-25 04:38:34,668][19689] Updated weights for policy 0, policy_version 4528 (0.0021) +[2023-02-25 04:38:38,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 4026.6). Total num frames: 18558976. Throughput: 0: 937.6. Samples: 3638716. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:38:38,098][00684] Avg episode reward: [(0, '29.283')] +[2023-02-25 04:38:38,118][19675] Saving new best policy, reward=29.283! +[2023-02-25 04:38:43,093][00684] Fps is (10 sec: 3687.1, 60 sec: 3754.7, 300 sec: 4026.7). Total num frames: 18575360. Throughput: 0: 936.6. Samples: 3641288. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:38:43,096][00684] Avg episode reward: [(0, '28.641')] +[2023-02-25 04:38:46,260][19689] Updated weights for policy 0, policy_version 4538 (0.0014) +[2023-02-25 04:38:48,093][00684] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 4012.7). Total num frames: 18591744. Throughput: 0: 937.0. Samples: 3646480. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:38:48,099][00684] Avg episode reward: [(0, '28.157')] +[2023-02-25 04:38:53,093][00684] Fps is (10 sec: 4096.0, 60 sec: 3823.3, 300 sec: 3998.8). Total num frames: 18616320. Throughput: 0: 959.9. Samples: 3652748. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:38:53,102][00684] Avg episode reward: [(0, '27.748')] +[2023-02-25 04:38:55,188][19689] Updated weights for policy 0, policy_version 4548 (0.0013) +[2023-02-25 04:38:58,093][00684] Fps is (10 sec: 4915.3, 60 sec: 3891.3, 300 sec: 4012.7). Total num frames: 18640896. Throughput: 0: 988.1. Samples: 3656584. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:38:58,096][00684] Avg episode reward: [(0, '27.911')] +[2023-02-25 04:39:03,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 18661376. Throughput: 0: 1054.8. Samples: 3663772. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:39:03,096][00684] Avg episode reward: [(0, '27.252')] +[2023-02-25 04:39:04,843][19689] Updated weights for policy 0, policy_version 4558 (0.0013) +[2023-02-25 04:39:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 18677760. Throughput: 0: 1054.9. Samples: 3668936. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:39:08,099][00684] Avg episode reward: [(0, '26.842')] +[2023-02-25 04:39:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3959.5, 300 sec: 4012.7). Total num frames: 18694144. Throughput: 0: 1053.1. Samples: 3671520. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:39:13,096][00684] Avg episode reward: [(0, '27.223')] +[2023-02-25 04:39:15,935][19689] Updated weights for policy 0, policy_version 4568 (0.0025) +[2023-02-25 04:39:18,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.0, 300 sec: 3998.8). Total num frames: 18718720. Throughput: 0: 1016.8. Samples: 3677520. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:39:18,095][00684] Avg episode reward: [(0, '26.840')] +[2023-02-25 04:39:23,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 18743296. Throughput: 0: 1033.4. Samples: 3685220. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:39:23,098][00684] Avg episode reward: [(0, '27.358')] +[2023-02-25 04:39:23,978][19689] Updated weights for policy 0, policy_version 4578 (0.0011) +[2023-02-25 04:39:28,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 4026.6). Total num frames: 18763776. Throughput: 0: 1051.9. Samples: 3688624. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:39:28,098][00684] Avg episode reward: [(0, '27.493')] +[2023-02-25 04:39:33,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4027.9, 300 sec: 4012.7). Total num frames: 18780160. Throughput: 0: 1051.7. Samples: 3693804. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:39:33,100][00684] Avg episode reward: [(0, '27.346')] +[2023-02-25 04:39:35,730][19689] Updated weights for policy 0, policy_version 4588 (0.0020) +[2023-02-25 04:39:38,093][00684] Fps is (10 sec: 3686.3, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 18800640. Throughput: 0: 1028.3. Samples: 3699020. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:39:38,096][00684] Avg episode reward: [(0, '28.278')] +[2023-02-25 04:39:43,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.3, 300 sec: 4012.7). Total num frames: 18825216. Throughput: 0: 1018.3. Samples: 3702408. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:39:43,096][00684] Avg episode reward: [(0, '27.272')] +[2023-02-25 04:39:44,605][19689] Updated weights for policy 0, policy_version 4598 (0.0012) +[2023-02-25 04:39:48,093][00684] Fps is (10 sec: 4915.3, 60 sec: 4300.8, 300 sec: 4012.7). Total num frames: 18849792. Throughput: 0: 1031.6. Samples: 3710196. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:39:48,096][00684] Avg episode reward: [(0, '25.786')] +[2023-02-25 04:39:48,118][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004602_18849792.pth... +[2023-02-25 04:39:48,274][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004366_17883136.pth +[2023-02-25 04:39:53,097][00684] Fps is (10 sec: 4094.4, 60 sec: 4164.0, 300 sec: 4012.6). Total num frames: 18866176. Throughput: 0: 1051.0. Samples: 3716236. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:39:53,100][00684] Avg episode reward: [(0, '24.904')] +[2023-02-25 04:39:54,506][19689] Updated weights for policy 0, policy_version 4608 (0.0012) +[2023-02-25 04:39:58,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 4026.6). Total num frames: 18886656. Throughput: 0: 1053.8. Samples: 3718940. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:39:58,101][00684] Avg episode reward: [(0, '24.780')] +[2023-02-25 04:40:03,093][00684] Fps is (10 sec: 3687.8, 60 sec: 4027.7, 300 sec: 3998.8). Total num frames: 18903040. Throughput: 0: 1037.2. Samples: 3724192. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:03,103][00684] Avg episode reward: [(0, '24.765')] +[2023-02-25 04:40:04,938][19689] Updated weights for policy 0, policy_version 4618 (0.0012) +[2023-02-25 04:40:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 18931712. Throughput: 0: 1023.6. Samples: 3731284. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:40:08,095][00684] Avg episode reward: [(0, '25.829')] +[2023-02-25 04:40:12,985][19689] Updated weights for policy 0, policy_version 4628 (0.0011) +[2023-02-25 04:40:13,093][00684] Fps is (10 sec: 5324.8, 60 sec: 4369.1, 300 sec: 4026.6). Total num frames: 18956288. Throughput: 0: 1033.6. Samples: 3735136. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:13,099][00684] Avg episode reward: [(0, '26.663')] +[2023-02-25 04:40:18,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4232.5, 300 sec: 4026.7). Total num frames: 18972672. Throughput: 0: 1059.7. Samples: 3741492. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:18,096][00684] Avg episode reward: [(0, '26.513')] +[2023-02-25 04:40:23,097][00684] Fps is (10 sec: 3275.6, 60 sec: 4095.7, 300 sec: 4012.7). Total num frames: 18989056. Throughput: 0: 1052.4. Samples: 3746384. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:23,100][00684] Avg episode reward: [(0, '26.525')] +[2023-02-25 04:40:25,796][19689] Updated weights for policy 0, policy_version 4638 (0.0016) +[2023-02-25 04:40:28,097][00684] Fps is (10 sec: 2866.1, 60 sec: 3959.2, 300 sec: 3971.0). Total num frames: 19001344. Throughput: 0: 1021.7. Samples: 3748388. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:40:28,102][00684] Avg episode reward: [(0, '27.330')] +[2023-02-25 04:40:33,094][00684] Fps is (10 sec: 2867.9, 60 sec: 3959.4, 300 sec: 3943.3). Total num frames: 19017728. Throughput: 0: 948.2. Samples: 3752864. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:40:33,102][00684] Avg episode reward: [(0, '26.996')] +[2023-02-25 04:40:38,093][00684] Fps is (10 sec: 3278.1, 60 sec: 3891.2, 300 sec: 3943.3). Total num frames: 19034112. Throughput: 0: 933.3. Samples: 3758232. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:38,098][00684] Avg episode reward: [(0, '27.376')] +[2023-02-25 04:40:38,266][19689] Updated weights for policy 0, policy_version 4648 (0.0019) +[2023-02-25 04:40:43,098][00684] Fps is (10 sec: 3685.1, 60 sec: 3822.6, 300 sec: 3971.0). Total num frames: 19054592. Throughput: 0: 943.8. Samples: 3761416. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:40:43,101][00684] Avg episode reward: [(0, '26.838')] +[2023-02-25 04:40:48,096][00684] Fps is (10 sec: 4094.7, 60 sec: 3754.5, 300 sec: 3998.8). Total num frames: 19075072. Throughput: 0: 946.2. Samples: 3766776. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:48,098][00684] Avg episode reward: [(0, '27.504')] +[2023-02-25 04:40:49,619][19689] Updated weights for policy 0, policy_version 4658 (0.0034) +[2023-02-25 04:40:53,093][00684] Fps is (10 sec: 3688.1, 60 sec: 3754.9, 300 sec: 3998.8). Total num frames: 19091456. Throughput: 0: 899.3. Samples: 3771752. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:40:53,095][00684] Avg episode reward: [(0, '27.261')] +[2023-02-25 04:40:58,093][00684] Fps is (10 sec: 4097.3, 60 sec: 3822.9, 300 sec: 4012.7). Total num frames: 19116032. Throughput: 0: 889.1. Samples: 3775144. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:40:58,096][00684] Avg episode reward: [(0, '27.903')] +[2023-02-25 04:40:58,537][19689] Updated weights for policy 0, policy_version 4668 (0.0019) +[2023-02-25 04:41:03,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 4012.7). Total num frames: 19140608. Throughput: 0: 918.0. Samples: 3782800. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:41:03,096][00684] Avg episode reward: [(0, '29.258')] +[2023-02-25 04:41:07,961][19689] Updated weights for policy 0, policy_version 4678 (0.0015) +[2023-02-25 04:41:08,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 4012.7). Total num frames: 19161088. Throughput: 0: 946.9. Samples: 3788992. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:41:08,097][00684] Avg episode reward: [(0, '29.324')] +[2023-02-25 04:41:08,112][19675] Saving new best policy, reward=29.324! +[2023-02-25 04:41:13,098][00684] Fps is (10 sec: 3684.7, 60 sec: 3686.1, 300 sec: 4012.6). Total num frames: 19177472. Throughput: 0: 959.0. Samples: 3791544. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:41:13,101][00684] Avg episode reward: [(0, '31.569')] +[2023-02-25 04:41:13,103][19675] Saving new best policy, reward=31.569! +[2023-02-25 04:41:18,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3686.4, 300 sec: 4012.7). Total num frames: 19193856. Throughput: 0: 974.5. Samples: 3796716. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:41:18,096][00684] Avg episode reward: [(0, '30.023')] +[2023-02-25 04:41:19,254][19689] Updated weights for policy 0, policy_version 4688 (0.0014) +[2023-02-25 04:41:23,093][00684] Fps is (10 sec: 4098.0, 60 sec: 3823.2, 300 sec: 3998.8). Total num frames: 19218432. Throughput: 0: 1013.9. Samples: 3803856. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:41:23,096][00684] Avg episode reward: [(0, '30.666')] +[2023-02-25 04:41:27,078][19689] Updated weights for policy 0, policy_version 4698 (0.0017) +[2023-02-25 04:41:28,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4028.0, 300 sec: 3998.8). Total num frames: 19243008. Throughput: 0: 1028.6. Samples: 3807696. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:41:28,098][00684] Avg episode reward: [(0, '29.792')] +[2023-02-25 04:41:33,094][00684] Fps is (10 sec: 4505.1, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 19263488. Throughput: 0: 1047.2. Samples: 3813896. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:41:33,101][00684] Avg episode reward: [(0, '30.087')] +[2023-02-25 04:41:38,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 19279872. Throughput: 0: 1055.1. Samples: 3819232. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:41:38,098][00684] Avg episode reward: [(0, '29.390')] +[2023-02-25 04:41:38,784][19689] Updated weights for policy 0, policy_version 4708 (0.0017) +[2023-02-25 04:41:43,093][00684] Fps is (10 sec: 3686.8, 60 sec: 4096.3, 300 sec: 4012.7). Total num frames: 19300352. Throughput: 0: 1037.1. Samples: 3821812. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:41:43,098][00684] Avg episode reward: [(0, '28.269')] +[2023-02-25 04:41:47,646][19689] Updated weights for policy 0, policy_version 4718 (0.0012) +[2023-02-25 04:41:48,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4164.5, 300 sec: 4012.7). Total num frames: 19324928. Throughput: 0: 1027.0. Samples: 3829016. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:41:48,096][00684] Avg episode reward: [(0, '27.967')] +[2023-02-25 04:41:48,116][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004718_19324928.pth... +[2023-02-25 04:41:48,313][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004485_18370560.pth +[2023-02-25 04:41:53,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4300.8, 300 sec: 4012.7). Total num frames: 19349504. Throughput: 0: 1054.0. Samples: 3836420. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:41:53,096][00684] Avg episode reward: [(0, '28.666')] +[2023-02-25 04:41:57,686][19689] Updated weights for policy 0, policy_version 4728 (0.0011) +[2023-02-25 04:41:58,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4164.3, 300 sec: 4012.7). Total num frames: 19365888. Throughput: 0: 1053.3. Samples: 3838936. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:41:58,097][00684] Avg episode reward: [(0, '28.066')] +[2023-02-25 04:42:03,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 19382272. Throughput: 0: 1054.1. Samples: 3844152. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:42:03,096][00684] Avg episode reward: [(0, '26.528')] +[2023-02-25 04:42:07,901][19689] Updated weights for policy 0, policy_version 4738 (0.0015) +[2023-02-25 04:42:08,093][00684] Fps is (10 sec: 4096.1, 60 sec: 4096.0, 300 sec: 4026.6). Total num frames: 19406848. Throughput: 0: 1029.6. Samples: 3850188. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0) +[2023-02-25 04:42:08,096][00684] Avg episode reward: [(0, '26.538')] +[2023-02-25 04:42:13,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.9, 300 sec: 4026.6). Total num frames: 19431424. Throughput: 0: 1028.6. Samples: 3853984. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:42:13,096][00684] Avg episode reward: [(0, '27.976')] +[2023-02-25 04:42:16,126][19689] Updated weights for policy 0, policy_version 4748 (0.0012) +[2023-02-25 04:42:18,093][00684] Fps is (10 sec: 4505.6, 60 sec: 4300.8, 300 sec: 4012.8). Total num frames: 19451904. Throughput: 0: 1057.4. Samples: 3861480. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:42:18,095][00684] Avg episode reward: [(0, '28.478')] +[2023-02-25 04:42:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4164.3, 300 sec: 4012.7). Total num frames: 19468288. Throughput: 0: 1052.3. Samples: 3866584. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0) +[2023-02-25 04:42:23,097][00684] Avg episode reward: [(0, '28.275')] +[2023-02-25 04:42:28,087][19689] Updated weights for policy 0, policy_version 4758 (0.0013) +[2023-02-25 04:42:28,093][00684] Fps is (10 sec: 3686.3, 60 sec: 4096.0, 300 sec: 4012.7). Total num frames: 19488768. Throughput: 0: 1054.0. Samples: 3869240. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:42:28,097][00684] Avg episode reward: [(0, '27.955')] +[2023-02-25 04:42:33,093][00684] Fps is (10 sec: 4096.0, 60 sec: 4096.1, 300 sec: 4012.7). Total num frames: 19509248. Throughput: 0: 1021.9. Samples: 3875000. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:42:33,096][00684] Avg episode reward: [(0, '28.915')] +[2023-02-25 04:42:36,727][19689] Updated weights for policy 0, policy_version 4768 (0.0011) +[2023-02-25 04:42:38,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4232.5, 300 sec: 4012.7). Total num frames: 19533824. Throughput: 0: 1030.2. Samples: 3882780. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:42:38,096][00684] Avg episode reward: [(0, '28.731')] +[2023-02-25 04:42:43,097][00684] Fps is (10 sec: 4503.9, 60 sec: 4232.3, 300 sec: 4012.6). Total num frames: 19554304. Throughput: 0: 1059.7. Samples: 3886628. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:42:43,105][00684] Avg episode reward: [(0, '26.972')] +[2023-02-25 04:42:47,764][19689] Updated weights for policy 0, policy_version 4778 (0.0013) +[2023-02-25 04:42:48,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 4012.8). Total num frames: 19570688. Throughput: 0: 1040.1. Samples: 3890956. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:42:48,096][00684] Avg episode reward: [(0, '27.164')] +[2023-02-25 04:42:53,097][00684] Fps is (10 sec: 2867.2, 60 sec: 3891.0, 300 sec: 3984.9). Total num frames: 19582976. Throughput: 0: 995.4. Samples: 3894984. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:42:53,099][00684] Avg episode reward: [(0, '27.403')] +[2023-02-25 04:42:58,093][00684] Fps is (10 sec: 2457.6, 60 sec: 3822.9, 300 sec: 3971.0). Total num frames: 19595264. Throughput: 0: 957.4. Samples: 3897068. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:42:58,098][00684] Avg episode reward: [(0, '26.949')] +[2023-02-25 04:43:02,247][19689] Updated weights for policy 0, policy_version 4788 (0.0011) +[2023-02-25 04:43:03,093][00684] Fps is (10 sec: 3278.0, 60 sec: 3891.2, 300 sec: 3984.9). Total num frames: 19615744. Throughput: 0: 887.6. Samples: 3901424. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:43:03,097][00684] Avg episode reward: [(0, '28.212')] +[2023-02-25 04:43:08,094][00684] Fps is (10 sec: 4505.4, 60 sec: 3891.2, 300 sec: 4012.7). Total num frames: 19640320. Throughput: 0: 945.0. Samples: 3909108. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0) +[2023-02-25 04:43:08,101][00684] Avg episode reward: [(0, '29.073')] +[2023-02-25 04:43:10,011][19689] Updated weights for policy 0, policy_version 4798 (0.0011) +[2023-02-25 04:43:13,093][00684] Fps is (10 sec: 4505.6, 60 sec: 3822.9, 300 sec: 4026.6). Total num frames: 19660800. Throughput: 0: 972.0. Samples: 3912980. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:43:13,100][00684] Avg episode reward: [(0, '29.198')] +[2023-02-25 04:43:18,093][00684] Fps is (10 sec: 4096.2, 60 sec: 3822.9, 300 sec: 4040.5). Total num frames: 19681280. Throughput: 0: 963.2. Samples: 3918344. Policy #0 lag: (min: 0.0, avg: 1.2, max: 3.0) +[2023-02-25 04:43:18,102][00684] Avg episode reward: [(0, '29.732')] +[2023-02-25 04:43:21,696][19689] Updated weights for policy 0, policy_version 4808 (0.0012) +[2023-02-25 04:43:23,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 4026.6). Total num frames: 19697664. Throughput: 0: 903.6. Samples: 3923444. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:43:23,096][00684] Avg episode reward: [(0, '29.045')] +[2023-02-25 04:43:28,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3822.9, 300 sec: 3998.8). Total num frames: 19718144. Throughput: 0: 883.4. Samples: 3926376. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:43:28,097][00684] Avg episode reward: [(0, '28.461')] +[2023-02-25 04:43:30,474][19689] Updated weights for policy 0, policy_version 4818 (0.0019) +[2023-02-25 04:43:33,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3959.5, 300 sec: 4026.6). Total num frames: 19746816. Throughput: 0: 960.4. Samples: 3934176. Policy #0 lag: (min: 0.0, avg: 1.1, max: 2.0) +[2023-02-25 04:43:33,096][00684] Avg episode reward: [(0, '26.610')] +[2023-02-25 04:43:38,093][00684] Fps is (10 sec: 4915.2, 60 sec: 3891.2, 300 sec: 4040.5). Total num frames: 19767296. Throughput: 0: 1016.0. Samples: 3940700. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:43:38,103][00684] Avg episode reward: [(0, '26.878')] +[2023-02-25 04:43:40,225][19689] Updated weights for policy 0, policy_version 4828 (0.0016) +[2023-02-25 04:43:43,093][00684] Fps is (10 sec: 3686.4, 60 sec: 3823.2, 300 sec: 4040.5). Total num frames: 19783680. Throughput: 0: 1025.9. Samples: 3943232. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:43:43,097][00684] Avg episode reward: [(0, '28.009')] +[2023-02-25 04:43:48,093][00684] Fps is (10 sec: 3276.8, 60 sec: 3822.9, 300 sec: 4012.7). Total num frames: 19800064. Throughput: 0: 1046.2. Samples: 3948504. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:43:48,102][00684] Avg episode reward: [(0, '27.775')] +[2023-02-25 04:43:48,122][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004834_19800064.pth... +[2023-02-25 04:43:48,310][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004602_18849792.pth +[2023-02-25 04:43:50,993][19689] Updated weights for policy 0, policy_version 4838 (0.0019) +[2023-02-25 04:43:53,093][00684] Fps is (10 sec: 4095.9, 60 sec: 4028.0, 300 sec: 4012.7). Total num frames: 19824640. Throughput: 0: 1021.4. Samples: 3955072. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0) +[2023-02-25 04:43:53,096][00684] Avg episode reward: [(0, '28.257')] +[2023-02-25 04:43:58,093][00684] Fps is (10 sec: 4915.2, 60 sec: 4232.5, 300 sec: 4026.6). Total num frames: 19849216. Throughput: 0: 1019.0. Samples: 3958836. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:43:58,096][00684] Avg episode reward: [(0, '29.812')] +[2023-02-25 04:43:58,988][19689] Updated weights for policy 0, policy_version 4848 (0.0016) +[2023-02-25 04:44:03,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4232.5, 300 sec: 4040.5). Total num frames: 19869696. Throughput: 0: 1052.5. Samples: 3965708. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:44:03,097][00684] Avg episode reward: [(0, '30.395')] +[2023-02-25 04:44:08,093][00684] Fps is (10 sec: 3686.4, 60 sec: 4096.0, 300 sec: 4040.5). Total num frames: 19886080. Throughput: 0: 1055.6. Samples: 3970944. Policy #0 lag: (min: 0.0, avg: 1.0, max: 3.0) +[2023-02-25 04:44:08,101][00684] Avg episode reward: [(0, '29.707')] +[2023-02-25 04:44:11,173][19689] Updated weights for policy 0, policy_version 4858 (0.0019) +[2023-02-25 04:44:13,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 4012.7). Total num frames: 19902464. Throughput: 0: 1047.1. Samples: 3973496. Policy #0 lag: (min: 0.0, avg: 1.1, max: 3.0) +[2023-02-25 04:44:13,096][00684] Avg episode reward: [(0, '29.893')] +[2023-02-25 04:44:18,093][00684] Fps is (10 sec: 4505.7, 60 sec: 4164.3, 300 sec: 4026.6). Total num frames: 19931136. Throughput: 0: 1020.1. Samples: 3980080. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:44:18,100][00684] Avg episode reward: [(0, '28.989')] +[2023-02-25 04:44:19,420][19689] Updated weights for policy 0, policy_version 4868 (0.0013) +[2023-02-25 04:44:23,093][00684] Fps is (10 sec: 5324.8, 60 sec: 4300.8, 300 sec: 4040.5). Total num frames: 19955712. Throughput: 0: 1045.7. Samples: 3987756. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:44:23,099][00684] Avg episode reward: [(0, '29.504')] +[2023-02-25 04:44:28,098][00684] Fps is (10 sec: 4094.1, 60 sec: 4232.2, 300 sec: 4040.4). Total num frames: 19972096. Throughput: 0: 1053.0. Samples: 3990624. Policy #0 lag: (min: 0.0, avg: 0.9, max: 3.0) +[2023-02-25 04:44:28,100][00684] Avg episode reward: [(0, '28.371')] +[2023-02-25 04:44:29,635][19689] Updated weights for policy 0, policy_version 4878 (0.0016) +[2023-02-25 04:44:33,093][00684] Fps is (10 sec: 3276.8, 60 sec: 4027.7, 300 sec: 4026.6). Total num frames: 19988480. Throughput: 0: 1050.9. Samples: 3995796. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0) +[2023-02-25 04:44:33,096][00684] Avg episode reward: [(0, '28.463')] +[2023-02-25 04:44:36,638][19675] Stopping Batcher_0... +[2023-02-25 04:44:36,639][19675] Loop batcher_evt_loop terminating... +[2023-02-25 04:44:36,639][00684] Component Batcher_0 stopped! +[2023-02-25 04:44:36,667][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2023-02-25 04:44:36,761][19689] Weights refcount: 2 0 +[2023-02-25 04:44:36,807][19675] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004718_19324928.pth +[2023-02-25 04:44:36,818][19675] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2023-02-25 04:44:36,830][19689] Stopping InferenceWorker_p0-w0... +[2023-02-25 04:44:36,831][00684] Component InferenceWorker_p0-w0 stopped! +[2023-02-25 04:44:36,838][19689] Loop inference_proc0-0_evt_loop terminating... +[2023-02-25 04:44:36,988][19675] Stopping LearnerWorker_p0... +[2023-02-25 04:44:36,989][19675] Loop learner_proc0_evt_loop terminating... +[2023-02-25 04:44:36,988][00684] Component LearnerWorker_p0 stopped! +[2023-02-25 04:44:37,124][19704] Stopping RolloutWorker_w5... +[2023-02-25 04:44:37,124][19704] Loop rollout_proc5_evt_loop terminating... +[2023-02-25 04:44:37,124][00684] Component RolloutWorker_w5 stopped! +[2023-02-25 04:44:37,154][19690] Stopping RolloutWorker_w1... +[2023-02-25 04:44:37,154][19690] Loop rollout_proc1_evt_loop terminating... +[2023-02-25 04:44:37,155][00684] Component RolloutWorker_w1 stopped! +[2023-02-25 04:44:37,158][19706] Stopping RolloutWorker_w7... +[2023-02-25 04:44:37,158][19706] Loop rollout_proc7_evt_loop terminating... +[2023-02-25 04:44:37,161][00684] Component RolloutWorker_w7 stopped! +[2023-02-25 04:44:37,169][19693] Stopping RolloutWorker_w3... +[2023-02-25 04:44:37,170][19693] Loop rollout_proc3_evt_loop terminating... +[2023-02-25 04:44:37,172][00684] Component RolloutWorker_w3 stopped! +[2023-02-25 04:44:37,254][00684] Component RolloutWorker_w4 stopped! +[2023-02-25 04:44:37,261][19698] Stopping RolloutWorker_w4... +[2023-02-25 04:44:37,261][19698] Loop rollout_proc4_evt_loop terminating... +[2023-02-25 04:44:37,282][00684] Component RolloutWorker_w0 stopped! +[2023-02-25 04:44:37,290][19691] Stopping RolloutWorker_w0... +[2023-02-25 04:44:37,298][19691] Loop rollout_proc0_evt_loop terminating... +[2023-02-25 04:44:37,316][00684] Component RolloutWorker_w2 stopped! +[2023-02-25 04:44:37,320][19696] Stopping RolloutWorker_w2... +[2023-02-25 04:44:37,322][19696] Loop rollout_proc2_evt_loop terminating... +[2023-02-25 04:44:37,365][00684] Component RolloutWorker_w6 stopped! +[2023-02-25 04:44:37,368][00684] Waiting for process learner_proc0 to stop... +[2023-02-25 04:44:37,370][19708] Stopping RolloutWorker_w6... +[2023-02-25 04:44:37,371][19708] Loop rollout_proc6_evt_loop terminating... +[2023-02-25 04:44:41,098][00684] Waiting for process inference_proc0-0 to join... +[2023-02-25 04:44:41,106][00684] Waiting for process rollout_proc0 to join... +[2023-02-25 04:44:41,108][00684] Waiting for process rollout_proc1 to join... +[2023-02-25 04:44:41,110][00684] Waiting for process rollout_proc2 to join... +[2023-02-25 04:44:41,115][00684] Waiting for process rollout_proc3 to join... +[2023-02-25 04:44:41,116][00684] Waiting for process rollout_proc4 to join... +[2023-02-25 04:44:41,117][00684] Waiting for process rollout_proc5 to join... +[2023-02-25 04:44:41,120][00684] Waiting for process rollout_proc6 to join... +[2023-02-25 04:44:41,122][00684] Waiting for process rollout_proc7 to join... +[2023-02-25 04:44:41,123][00684] Batcher 0 profile tree view: +batching: 99.0041, releasing_batches: 0.1043 +[2023-02-25 04:44:41,125][00684] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0242 + wait_policy_total: 2696.6617 +update_model: 21.0148 + weight_update: 0.0016 +one_step: 0.0042 + handle_policy_step: 1287.0192 + deserialize: 44.4042, stack: 7.7983, obs_to_device_normalize: 300.6142, forward: 595.8544, send_messages: 54.7742 + prepare_outputs: 220.2918 + to_cpu: 139.6361 +[2023-02-25 04:44:41,126][00684] Learner 0 profile tree view: +misc: 0.0228, prepare_batch: 54.7782 +train: 317.0710 + epoch_init: 0.0342, minibatch_init: 0.0843, losses_postprocess: 2.1447, kl_divergence: 2.3055, after_optimizer: 9.6031 + calculate_losses: 109.0414 + losses_init: 0.0210, forward_head: 7.4978, bptt_initial: 69.9707, tail: 4.6536, advantages_returns: 1.1959, losses: 15.0731 + bptt: 9.2778 + bptt_forward_core: 8.8851 + update: 191.1126 + clip: 5.6527 +[2023-02-25 04:44:41,127][00684] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 1.1775, enqueue_policy_requests: 580.6333, env_step: 3217.8790, overhead: 88.0695, complete_rollouts: 16.0739 +save_policy_outputs: 76.7979 + split_output_tensors: 37.0821 +[2023-02-25 04:44:41,128][00684] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 1.2492, enqueue_policy_requests: 578.3958, env_step: 3228.4729, overhead: 86.4633, complete_rollouts: 17.2280 +save_policy_outputs: 76.4612 + split_output_tensors: 35.8391 +[2023-02-25 04:44:41,130][00684] Loop Runner_EvtLoop terminating... +[2023-02-25 04:44:41,131][00684] Runner profile tree view: +main_loop: 4131.2405 +[2023-02-25 04:44:41,132][00684] Collected {0: 20004864}, FPS: 3872.7 +[2023-02-25 04:44:41,224][00684] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 04:44:41,226][00684] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 04:44:41,227][00684] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 04:44:41,230][00684] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 04:44:41,231][00684] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 04:44:41,233][00684] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 04:44:41,234][00684] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 04:44:41,235][00684] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 04:44:41,240][00684] Adding new argument 'push_to_hub'=False that is not in the saved config file! +[2023-02-25 04:44:41,241][00684] Adding new argument 'hf_repository'=None that is not in the saved config file! +[2023-02-25 04:44:41,242][00684] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 04:44:41,244][00684] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 04:44:41,245][00684] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 04:44:41,246][00684] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 04:44:41,247][00684] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 04:44:41,278][00684] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 04:44:41,288][00684] RunningMeanStd input shape: (1,) +[2023-02-25 04:44:41,320][00684] ConvEncoder: input_channels=3 +[2023-02-25 04:44:41,504][00684] Conv encoder output size: 512 +[2023-02-25 04:44:41,507][00684] Policy head output size: 512 +[2023-02-25 04:44:41,631][00684] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2023-02-25 04:44:42,455][00684] Num frames 100... +[2023-02-25 04:44:42,566][00684] Num frames 200... +[2023-02-25 04:44:42,682][00684] Num frames 300... +[2023-02-25 04:44:42,802][00684] Num frames 400... +[2023-02-25 04:44:42,922][00684] Num frames 500... +[2023-02-25 04:44:43,036][00684] Num frames 600... +[2023-02-25 04:44:43,166][00684] Num frames 700... +[2023-02-25 04:44:43,281][00684] Num frames 800... +[2023-02-25 04:44:43,406][00684] Num frames 900... +[2023-02-25 04:44:43,521][00684] Num frames 1000... +[2023-02-25 04:44:43,642][00684] Num frames 1100... +[2023-02-25 04:44:43,770][00684] Num frames 1200... +[2023-02-25 04:44:43,896][00684] Num frames 1300... +[2023-02-25 04:44:44,027][00684] Num frames 1400... +[2023-02-25 04:44:44,148][00684] Num frames 1500... +[2023-02-25 04:44:44,262][00684] Num frames 1600... +[2023-02-25 04:44:44,380][00684] Num frames 1700... +[2023-02-25 04:44:44,493][00684] Num frames 1800... +[2023-02-25 04:44:44,609][00684] Num frames 1900... +[2023-02-25 04:44:44,690][00684] Avg episode rewards: #0: 47.199, true rewards: #0: 19.200 +[2023-02-25 04:44:44,691][00684] Avg episode reward: 47.199, avg true_objective: 19.200 +[2023-02-25 04:44:44,788][00684] Num frames 2000... +[2023-02-25 04:44:44,903][00684] Num frames 2100... +[2023-02-25 04:44:45,028][00684] Num frames 2200... +[2023-02-25 04:44:45,143][00684] Num frames 2300... +[2023-02-25 04:44:45,262][00684] Num frames 2400... +[2023-02-25 04:44:45,387][00684] Num frames 2500... +[2023-02-25 04:44:45,508][00684] Num frames 2600... +[2023-02-25 04:44:45,628][00684] Num frames 2700... +[2023-02-25 04:44:45,747][00684] Num frames 2800... +[2023-02-25 04:44:45,865][00684] Num frames 2900... +[2023-02-25 04:44:45,986][00684] Num frames 3000... +[2023-02-25 04:44:46,105][00684] Num frames 3100... +[2023-02-25 04:44:46,226][00684] Num frames 3200... +[2023-02-25 04:44:46,346][00684] Num frames 3300... +[2023-02-25 04:44:46,459][00684] Num frames 3400... +[2023-02-25 04:44:46,577][00684] Num frames 3500... +[2023-02-25 04:44:46,689][00684] Num frames 3600... +[2023-02-25 04:44:46,801][00684] Num frames 3700... +[2023-02-25 04:44:46,919][00684] Num frames 3800... +[2023-02-25 04:44:47,048][00684] Num frames 3900... +[2023-02-25 04:44:47,168][00684] Num frames 4000... +[2023-02-25 04:44:47,247][00684] Avg episode rewards: #0: 53.099, true rewards: #0: 20.100 +[2023-02-25 04:44:47,248][00684] Avg episode reward: 53.099, avg true_objective: 20.100 +[2023-02-25 04:44:47,347][00684] Num frames 4100... +[2023-02-25 04:44:47,483][00684] Num frames 4200... +[2023-02-25 04:44:47,650][00684] Num frames 4300... +[2023-02-25 04:44:47,808][00684] Num frames 4400... +[2023-02-25 04:44:47,981][00684] Num frames 4500... +[2023-02-25 04:44:48,143][00684] Num frames 4600... +[2023-02-25 04:44:48,310][00684] Num frames 4700... +[2023-02-25 04:44:48,468][00684] Num frames 4800... +[2023-02-25 04:44:48,634][00684] Num frames 4900... +[2023-02-25 04:44:48,838][00684] Avg episode rewards: #0: 43.963, true rewards: #0: 16.630 +[2023-02-25 04:44:48,844][00684] Avg episode reward: 43.963, avg true_objective: 16.630 +[2023-02-25 04:44:48,864][00684] Num frames 5000... +[2023-02-25 04:44:49,026][00684] Num frames 5100... +[2023-02-25 04:44:49,184][00684] Num frames 5200... +[2023-02-25 04:44:49,347][00684] Num frames 5300... +[2023-02-25 04:44:49,516][00684] Num frames 5400... +[2023-02-25 04:44:49,677][00684] Num frames 5500... +[2023-02-25 04:44:49,845][00684] Num frames 5600... +[2023-02-25 04:44:49,952][00684] Avg episode rewards: #0: 36.572, true rewards: #0: 14.073 +[2023-02-25 04:44:49,955][00684] Avg episode reward: 36.572, avg true_objective: 14.073 +[2023-02-25 04:44:50,084][00684] Num frames 5700... +[2023-02-25 04:44:50,245][00684] Num frames 5800... +[2023-02-25 04:44:50,415][00684] Num frames 5900... +[2023-02-25 04:44:50,579][00684] Num frames 6000... +[2023-02-25 04:44:50,742][00684] Num frames 6100... +[2023-02-25 04:44:50,904][00684] Num frames 6200... +[2023-02-25 04:44:51,068][00684] Num frames 6300... +[2023-02-25 04:44:51,186][00684] Num frames 6400... +[2023-02-25 04:44:51,300][00684] Num frames 6500... +[2023-02-25 04:44:51,420][00684] Num frames 6600... +[2023-02-25 04:44:51,532][00684] Num frames 6700... +[2023-02-25 04:44:51,651][00684] Num frames 6800... +[2023-02-25 04:44:51,769][00684] Num frames 6900... +[2023-02-25 04:44:51,893][00684] Num frames 7000... +[2023-02-25 04:44:52,019][00684] Num frames 7100... +[2023-02-25 04:44:52,149][00684] Num frames 7200... +[2023-02-25 04:44:52,274][00684] Num frames 7300... +[2023-02-25 04:44:52,390][00684] Num frames 7400... +[2023-02-25 04:44:52,516][00684] Num frames 7500... +[2023-02-25 04:44:52,637][00684] Num frames 7600... +[2023-02-25 04:44:52,737][00684] Avg episode rewards: #0: 39.675, true rewards: #0: 15.276 +[2023-02-25 04:44:52,740][00684] Avg episode reward: 39.675, avg true_objective: 15.276 +[2023-02-25 04:44:52,810][00684] Num frames 7700... +[2023-02-25 04:44:52,923][00684] Num frames 7800... +[2023-02-25 04:44:53,041][00684] Num frames 7900... +[2023-02-25 04:44:53,164][00684] Num frames 8000... +[2023-02-25 04:44:53,279][00684] Num frames 8100... +[2023-02-25 04:44:53,394][00684] Num frames 8200... +[2023-02-25 04:44:53,510][00684] Num frames 8300... +[2023-02-25 04:44:53,633][00684] Num frames 8400... +[2023-02-25 04:44:53,699][00684] Avg episode rewards: #0: 35.509, true rewards: #0: 14.010 +[2023-02-25 04:44:53,701][00684] Avg episode reward: 35.509, avg true_objective: 14.010 +[2023-02-25 04:44:53,822][00684] Num frames 8500... +[2023-02-25 04:44:53,937][00684] Num frames 8600... +[2023-02-25 04:44:54,058][00684] Num frames 8700... +[2023-02-25 04:44:54,205][00684] Num frames 8800... +[2023-02-25 04:44:54,323][00684] Num frames 8900... +[2023-02-25 04:44:54,441][00684] Num frames 9000... +[2023-02-25 04:44:54,562][00684] Num frames 9100... +[2023-02-25 04:44:54,681][00684] Num frames 9200... +[2023-02-25 04:44:54,793][00684] Num frames 9300... +[2023-02-25 04:44:54,913][00684] Num frames 9400... +[2023-02-25 04:44:55,079][00684] Avg episode rewards: #0: 33.848, true rewards: #0: 13.563 +[2023-02-25 04:44:55,080][00684] Avg episode reward: 33.848, avg true_objective: 13.563 +[2023-02-25 04:44:55,091][00684] Num frames 9500... +[2023-02-25 04:44:55,221][00684] Num frames 9600... +[2023-02-25 04:44:55,337][00684] Num frames 9700... +[2023-02-25 04:44:55,452][00684] Num frames 9800... +[2023-02-25 04:44:55,565][00684] Num frames 9900... +[2023-02-25 04:44:55,686][00684] Num frames 10000... +[2023-02-25 04:44:55,814][00684] Num frames 10100... +[2023-02-25 04:44:55,938][00684] Num frames 10200... +[2023-02-25 04:44:56,060][00684] Num frames 10300... +[2023-02-25 04:44:56,189][00684] Num frames 10400... +[2023-02-25 04:44:56,311][00684] Num frames 10500... +[2023-02-25 04:44:56,431][00684] Num frames 10600... +[2023-02-25 04:44:56,554][00684] Num frames 10700... +[2023-02-25 04:44:56,679][00684] Num frames 10800... +[2023-02-25 04:44:56,803][00684] Num frames 10900... +[2023-02-25 04:44:56,924][00684] Num frames 11000... +[2023-02-25 04:44:57,050][00684] Num frames 11100... +[2023-02-25 04:44:57,171][00684] Num frames 11200... +[2023-02-25 04:44:57,298][00684] Num frames 11300... +[2023-02-25 04:44:57,413][00684] Num frames 11400... +[2023-02-25 04:44:57,534][00684] Num frames 11500... +[2023-02-25 04:44:57,702][00684] Avg episode rewards: #0: 36.617, true rewards: #0: 14.493 +[2023-02-25 04:44:57,704][00684] Avg episode reward: 36.617, avg true_objective: 14.493 +[2023-02-25 04:44:57,716][00684] Num frames 11600... +[2023-02-25 04:44:57,831][00684] Num frames 11700... +[2023-02-25 04:44:57,954][00684] Num frames 11800... +[2023-02-25 04:44:58,077][00684] Num frames 11900... +[2023-02-25 04:44:58,199][00684] Num frames 12000... +[2023-02-25 04:44:58,334][00684] Num frames 12100... +[2023-02-25 04:44:58,449][00684] Num frames 12200... +[2023-02-25 04:44:58,571][00684] Num frames 12300... +[2023-02-25 04:44:58,695][00684] Num frames 12400... +[2023-02-25 04:44:58,817][00684] Num frames 12500... +[2023-02-25 04:44:58,937][00684] Num frames 12600... +[2023-02-25 04:44:59,056][00684] Num frames 12700... +[2023-02-25 04:44:59,185][00684] Num frames 12800... +[2023-02-25 04:44:59,315][00684] Num frames 12900... +[2023-02-25 04:44:59,435][00684] Num frames 13000... +[2023-02-25 04:44:59,551][00684] Num frames 13100... +[2023-02-25 04:44:59,666][00684] Num frames 13200... +[2023-02-25 04:44:59,787][00684] Num frames 13300... +[2023-02-25 04:44:59,870][00684] Avg episode rewards: #0: 37.358, true rewards: #0: 14.803 +[2023-02-25 04:44:59,872][00684] Avg episode reward: 37.358, avg true_objective: 14.803 +[2023-02-25 04:44:59,963][00684] Num frames 13400... +[2023-02-25 04:45:00,080][00684] Num frames 13500... +[2023-02-25 04:45:00,199][00684] Num frames 13600... +[2023-02-25 04:45:00,324][00684] Num frames 13700... +[2023-02-25 04:45:00,447][00684] Num frames 13800... +[2023-02-25 04:45:00,568][00684] Num frames 13900... +[2023-02-25 04:45:00,689][00684] Num frames 14000... +[2023-02-25 04:45:00,808][00684] Num frames 14100... +[2023-02-25 04:45:00,931][00684] Num frames 14200... +[2023-02-25 04:45:01,070][00684] Num frames 14300... +[2023-02-25 04:45:01,252][00684] Num frames 14400... +[2023-02-25 04:45:01,361][00684] Avg episode rewards: #0: 36.627, true rewards: #0: 14.427 +[2023-02-25 04:45:01,363][00684] Avg episode reward: 36.627, avg true_objective: 14.427 +[2023-02-25 04:46:28,447][00684] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2023-02-25 04:46:29,060][00684] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2023-02-25 04:46:29,062][00684] Overriding arg 'num_workers' with value 1 passed from command line +[2023-02-25 04:46:29,064][00684] Adding new argument 'no_render'=True that is not in the saved config file! +[2023-02-25 04:46:29,067][00684] Adding new argument 'save_video'=True that is not in the saved config file! +[2023-02-25 04:46:29,069][00684] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2023-02-25 04:46:29,070][00684] Adding new argument 'video_name'=None that is not in the saved config file! +[2023-02-25 04:46:29,075][00684] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2023-02-25 04:46:29,077][00684] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2023-02-25 04:46:29,078][00684] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2023-02-25 04:46:29,079][00684] Adding new argument 'hf_repository'='menoua/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2023-02-25 04:46:29,080][00684] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2023-02-25 04:46:29,082][00684] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2023-02-25 04:46:29,085][00684] Adding new argument 'train_script'=None that is not in the saved config file! +[2023-02-25 04:46:29,086][00684] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2023-02-25 04:46:29,087][00684] Using frameskip 1 and render_action_repeat=4 for evaluation +[2023-02-25 04:46:29,113][00684] RunningMeanStd input shape: (3, 72, 128) +[2023-02-25 04:46:29,115][00684] RunningMeanStd input shape: (1,) +[2023-02-25 04:46:29,134][00684] ConvEncoder: input_channels=3 +[2023-02-25 04:46:29,192][00684] Conv encoder output size: 512 +[2023-02-25 04:46:29,194][00684] Policy head output size: 512 +[2023-02-25 04:46:29,224][00684] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000004884_20004864.pth... +[2023-02-25 04:46:30,169][00684] Num frames 100... +[2023-02-25 04:46:30,329][00684] Num frames 200... +[2023-02-25 04:46:30,487][00684] Num frames 300... +[2023-02-25 04:46:30,643][00684] Num frames 400... +[2023-02-25 04:46:30,819][00684] Num frames 500... +[2023-02-25 04:46:30,977][00684] Num frames 600... +[2023-02-25 04:46:31,141][00684] Num frames 700... +[2023-02-25 04:46:31,306][00684] Num frames 800... +[2023-02-25 04:46:31,469][00684] Num frames 900... +[2023-02-25 04:46:31,626][00684] Num frames 1000... +[2023-02-25 04:46:31,729][00684] Avg episode rewards: #0: 25.260, true rewards: #0: 10.260 +[2023-02-25 04:46:31,730][00684] Avg episode reward: 25.260, avg true_objective: 10.260 +[2023-02-25 04:46:31,850][00684] Num frames 1100... +[2023-02-25 04:46:32,014][00684] Num frames 1200... +[2023-02-25 04:46:32,185][00684] Num frames 1300... +[2023-02-25 04:46:32,355][00684] Num frames 1400... +[2023-02-25 04:46:32,519][00684] Num frames 1500... +[2023-02-25 04:46:32,686][00684] Num frames 1600... +[2023-02-25 04:46:32,849][00684] Num frames 1700... +[2023-02-25 04:46:33,021][00684] Num frames 1800... +[2023-02-25 04:46:33,149][00684] Num frames 1900... +[2023-02-25 04:46:33,271][00684] Num frames 2000... +[2023-02-25 04:46:33,389][00684] Num frames 2100... +[2023-02-25 04:46:33,503][00684] Num frames 2200... +[2023-02-25 04:46:33,623][00684] Num frames 2300... +[2023-02-25 04:46:33,744][00684] Num frames 2400... +[2023-02-25 04:46:33,867][00684] Num frames 2500... +[2023-02-25 04:46:34,040][00684] Avg episode rewards: #0: 31.980, true rewards: #0: 12.980 +[2023-02-25 04:46:34,043][00684] Avg episode reward: 31.980, avg true_objective: 12.980 +[2023-02-25 04:46:34,051][00684] Num frames 2600... +[2023-02-25 04:46:34,176][00684] Num frames 2700... +[2023-02-25 04:46:34,291][00684] Num frames 2800... +[2023-02-25 04:46:34,414][00684] Num frames 2900... +[2023-02-25 04:46:34,529][00684] Num frames 3000... +[2023-02-25 04:46:34,646][00684] Num frames 3100... +[2023-02-25 04:46:34,759][00684] Num frames 3200... +[2023-02-25 04:46:34,871][00684] Num frames 3300... +[2023-02-25 04:46:34,990][00684] Num frames 3400... +[2023-02-25 04:46:35,103][00684] Num frames 3500... +[2023-02-25 04:46:35,226][00684] Num frames 3600... +[2023-02-25 04:46:35,344][00684] Num frames 3700... +[2023-02-25 04:46:35,465][00684] Num frames 3800... +[2023-02-25 04:46:35,584][00684] Num frames 3900... +[2023-02-25 04:46:35,705][00684] Num frames 4000... +[2023-02-25 04:46:35,826][00684] Num frames 4100... +[2023-02-25 04:46:35,953][00684] Num frames 4200... +[2023-02-25 04:46:36,069][00684] Num frames 4300... +[2023-02-25 04:46:36,197][00684] Num frames 4400... +[2023-02-25 04:46:36,316][00684] Num frames 4500... +[2023-02-25 04:46:36,434][00684] Num frames 4600... +[2023-02-25 04:46:36,504][00684] Avg episode rewards: #0: 39.706, true rewards: #0: 15.373 +[2023-02-25 04:46:36,505][00684] Avg episode reward: 39.706, avg true_objective: 15.373 +[2023-02-25 04:46:36,615][00684] Num frames 4700... +[2023-02-25 04:46:36,727][00684] Num frames 4800... +[2023-02-25 04:46:36,848][00684] Num frames 4900... +[2023-02-25 04:46:36,977][00684] Num frames 5000... +[2023-02-25 04:46:37,095][00684] Num frames 5100... +[2023-02-25 04:46:37,214][00684] Num frames 5200... +[2023-02-25 04:46:37,328][00684] Num frames 5300... +[2023-02-25 04:46:37,444][00684] Num frames 5400... +[2023-02-25 04:46:37,565][00684] Num frames 5500... +[2023-02-25 04:46:37,687][00684] Num frames 5600... +[2023-02-25 04:46:37,805][00684] Num frames 5700... +[2023-02-25 04:46:37,918][00684] Num frames 5800... +[2023-02-25 04:46:38,039][00684] Num frames 5900... +[2023-02-25 04:46:38,162][00684] Num frames 6000... +[2023-02-25 04:46:38,283][00684] Num frames 6100... +[2023-02-25 04:46:38,397][00684] Num frames 6200... +[2023-02-25 04:46:38,510][00684] Num frames 6300... +[2023-02-25 04:46:38,625][00684] Num frames 6400... +[2023-02-25 04:46:38,740][00684] Num frames 6500... +[2023-02-25 04:46:38,855][00684] Num frames 6600... +[2023-02-25 04:46:38,968][00684] Num frames 6700... +[2023-02-25 04:46:39,038][00684] Avg episode rewards: #0: 46.029, true rewards: #0: 16.780 +[2023-02-25 04:46:39,039][00684] Avg episode reward: 46.029, avg true_objective: 16.780 +[2023-02-25 04:46:39,139][00684] Num frames 6800... +[2023-02-25 04:46:39,258][00684] Num frames 6900... +[2023-02-25 04:46:39,371][00684] Num frames 7000... +[2023-02-25 04:46:39,481][00684] Num frames 7100... +[2023-02-25 04:46:39,592][00684] Num frames 7200... +[2023-02-25 04:46:39,708][00684] Num frames 7300... +[2023-02-25 04:46:39,827][00684] Num frames 7400... +[2023-02-25 04:46:39,946][00684] Num frames 7500... +[2023-02-25 04:46:40,088][00684] Avg episode rewards: #0: 40.352, true rewards: #0: 15.152 +[2023-02-25 04:46:40,090][00684] Avg episode reward: 40.352, avg true_objective: 15.152 +[2023-02-25 04:46:40,123][00684] Num frames 7600... +[2023-02-25 04:46:40,250][00684] Num frames 7700... +[2023-02-25 04:46:40,368][00684] Num frames 7800... +[2023-02-25 04:46:40,485][00684] Num frames 7900... +[2023-02-25 04:46:40,597][00684] Num frames 8000... +[2023-02-25 04:46:40,719][00684] Num frames 8100... +[2023-02-25 04:46:40,838][00684] Num frames 8200... +[2023-02-25 04:46:40,955][00684] Num frames 8300... +[2023-02-25 04:46:41,075][00684] Num frames 8400... +[2023-02-25 04:46:41,192][00684] Num frames 8500... +[2023-02-25 04:46:41,317][00684] Num frames 8600... +[2023-02-25 04:46:41,431][00684] Num frames 8700... +[2023-02-25 04:46:41,544][00684] Num frames 8800... +[2023-02-25 04:46:41,657][00684] Num frames 8900... +[2023-02-25 04:46:41,754][00684] Avg episode rewards: #0: 39.050, true rewards: #0: 14.883 +[2023-02-25 04:46:41,755][00684] Avg episode reward: 39.050, avg true_objective: 14.883 +[2023-02-25 04:46:41,841][00684] Num frames 9000... +[2023-02-25 04:46:41,959][00684] Num frames 9100... +[2023-02-25 04:46:42,074][00684] Num frames 9200... +[2023-02-25 04:46:42,190][00684] Num frames 9300... +[2023-02-25 04:46:42,316][00684] Num frames 9400... +[2023-02-25 04:46:42,384][00684] Avg episode rewards: #0: 34.728, true rewards: #0: 13.443 +[2023-02-25 04:46:42,386][00684] Avg episode reward: 34.728, avg true_objective: 13.443 +[2023-02-25 04:46:42,494][00684] Num frames 9500... +[2023-02-25 04:46:42,606][00684] Num frames 9600... +[2023-02-25 04:46:42,721][00684] Num frames 9700... +[2023-02-25 04:46:42,840][00684] Num frames 9800... +[2023-02-25 04:46:42,960][00684] Num frames 9900... +[2023-02-25 04:46:43,106][00684] Num frames 10000... +[2023-02-25 04:46:43,284][00684] Num frames 10100... +[2023-02-25 04:46:43,450][00684] Num frames 10200... +[2023-02-25 04:46:43,611][00684] Num frames 10300... +[2023-02-25 04:46:43,766][00684] Num frames 10400... +[2023-02-25 04:46:43,922][00684] Num frames 10500... +[2023-02-25 04:46:44,075][00684] Num frames 10600... +[2023-02-25 04:46:44,239][00684] Num frames 10700... +[2023-02-25 04:46:44,403][00684] Num frames 10800... +[2023-02-25 04:46:44,569][00684] Num frames 10900... +[2023-02-25 04:46:44,766][00684] Avg episode rewards: #0: 34.855, true rewards: #0: 13.730 +[2023-02-25 04:46:44,769][00684] Avg episode reward: 34.855, avg true_objective: 13.730 +[2023-02-25 04:46:44,798][00684] Num frames 11000... +[2023-02-25 04:46:44,966][00684] Num frames 11100... +[2023-02-25 04:46:45,131][00684] Num frames 11200... +[2023-02-25 04:46:45,295][00684] Num frames 11300... +[2023-02-25 04:46:45,460][00684] Num frames 11400... +[2023-02-25 04:46:45,670][00684] Avg episode rewards: #0: 31.773, true rewards: #0: 12.773 +[2023-02-25 04:46:45,672][00684] Avg episode reward: 31.773, avg true_objective: 12.773 +[2023-02-25 04:46:45,682][00684] Num frames 11500... +[2023-02-25 04:46:45,847][00684] Num frames 11600... +[2023-02-25 04:46:46,009][00684] Num frames 11700... +[2023-02-25 04:46:46,126][00684] Num frames 11800... +[2023-02-25 04:46:46,248][00684] Num frames 11900... +[2023-02-25 04:46:46,368][00684] Num frames 12000... +[2023-02-25 04:46:46,510][00684] Avg episode rewards: #0: 29.869, true rewards: #0: 12.069 +[2023-02-25 04:46:46,512][00684] Avg episode reward: 29.869, avg true_objective: 12.069 +[2023-02-25 04:47:57,178][00684] Replay video saved to /content/train_dir/default_experiment/replay.mp4!