diff --git "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" --- "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" +++ "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" @@ -17666,3 +17666,1362 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit {'eval_loss': inf, 'eval_wer': 0.22629353217867845, 'eval_runtime': 704.6479, 'eval_samples_per_second': 22.736, 'eval_steps_per_second': 1.422, 'epoch': 3.73} Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8725, 'learning_rate': 2.1110751295336784e-05, 'epoch': 3.76} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8678, 'learning_rate': 2.0625e-05, 'epoch': 3.78} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8618, 'learning_rate': 2.0139248704663212e-05, 'epoch': 3.81} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8646, 'learning_rate': 1.9658354922279792e-05, 'epoch': 3.84} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8746, 'learning_rate': 1.9172603626943005e-05, 'epoch': 3.87} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.863, 'learning_rate': 1.8686852331606217e-05, 'epoch': 3.9} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.862, 'learning_rate': 1.8205958549222797e-05, 'epoch': 3.93} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8678, 'learning_rate': 1.772020725388601e-05, 'epoch': 3.96} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8694, 'learning_rate': 1.723445595854922e-05, 'epoch': 3.99} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8787, 'learning_rate': 1.6748704663212434e-05, 'epoch': 4.01} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16�� | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'eval_loss': inf, 'eval_wer': 0.2217630412043662, 'eval_runtime': 708.1909, 'eval_samples_per_second': 22.622, 'eval_steps_per_second': 1.415, 'epoch': 4.01} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s]