diff --git "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" --- "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" +++ "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" @@ -20380,3 +20380,1355 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit {'eval_loss': inf, 'eval_wer': 0.21933736756656164, 'eval_runtime': 705.2463, 'eval_samples_per_second': 22.717, 'eval_steps_per_second': 1.421, 'epoch': 4.3} Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8474, 'learning_rate': 1.1410297927461138e-05, 'epoch': 4.33} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8519, 'learning_rate': 1.0924546632124352e-05, 'epoch': 4.36} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8487, 'learning_rate': 1.0438795336787564e-05, 'epoch': 4.39} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8479, 'learning_rate': 9.953044041450777e-06, 'epoch': 4.42} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8463, 'learning_rate': 9.467292746113987e-06, 'epoch': 4.44} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8459, 'learning_rate': 8.981541450777202e-06, 'epoch': 4.47} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.853, 'learning_rate': 8.500647668393781e-06, 'epoch': 4.5} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8456, 'learning_rate': 8.014896373056994e-06, 'epoch': 4.53} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8501, 'learning_rate': 7.529145077720206e-06, 'epoch': 4.56} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8488, 'learning_rate': 7.0433937823834195e-06, 'epoch': 4.59} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'eval_loss': inf, 'eval_wer': 0.2187084892160197, 'eval_runtime': 702.6461, 'eval_samples_per_second': 22.801, 'eval_steps_per_second': 1.426, 'epoch': 4.59} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s]