diff --git "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" --- "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" +++ "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" @@ -19025,3 +19025,1358 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit {'eval_loss': inf, 'eval_wer': 0.2217630412043662, 'eval_runtime': 708.1909, 'eval_samples_per_second': 22.622, 'eval_steps_per_second': 1.415, 'epoch': 4.01} Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8657, 'learning_rate': 1.6262953367875646e-05, 'epoch': 4.04} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8631, 'learning_rate': 1.577720207253886e-05, 'epoch': 4.07} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8658, 'learning_rate': 1.529630829015544e-05, 'epoch': 4.1} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.855, 'learning_rate': 1.4810556994818651e-05, 'epoch': 4.13} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8502, 'learning_rate': 1.4324805699481864e-05, 'epoch': 4.16} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8625, 'learning_rate': 1.3839054404145076e-05, 'epoch': 4.19} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8598, 'learning_rate': 1.335330310880829e-05, 'epoch': 4.21} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8574, 'learning_rate': 1.2867551813471502e-05, 'epoch': 4.24} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8575, 'learning_rate': 1.2381800518134713e-05, 'epoch': 4.27} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8567, 'learning_rate': 1.1896049222797926e-05, 'epoch': 4.3} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'eval_loss': inf, 'eval_wer': 0.21933736756656164, 'eval_runtime': 705.2463, 'eval_samples_per_second': 22.717, 'eval_steps_per_second': 1.421, 'epoch': 4.3} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s]