diff --git "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" --- "a/wandb/run-20220203_135844-2tzexn1o/files/output.log" +++ "b/wandb/run-20220203_135844-2tzexn1o/files/output.log" @@ -21732,3 +21732,1353 @@ Deleting older checkpoint [checkpoint-8000] due to args.save_total_limit {'eval_loss': inf, 'eval_wer': 0.2187084892160197, 'eval_runtime': 702.6461, 'eval_samples_per_second': 22.801, 'eval_steps_per_second': 1.426, 'epoch': 4.59} Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8491, 'learning_rate': 6.557642487046632e-06, 'epoch': 4.62} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8415, 'learning_rate': 6.0718911917098434e-06, 'epoch': 4.64} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8467, 'learning_rate': 5.586139896373057e-06, 'epoch': 4.67} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8356, 'learning_rate': 5.100388601036269e-06, 'epoch': 4.7} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8444, 'learning_rate': 4.6146373056994814e-06, 'epoch': 4.73} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8485, 'learning_rate': 4.128886010362694e-06, 'epoch': 4.76} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8424, 'learning_rate': 3.6431347150259062e-06, 'epoch': 4.79} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8435, 'learning_rate': 3.1573834196891186e-06, 'epoch': 4.82} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8444, 'learning_rate': 2.6716321243523314e-06, 'epoch': 4.85} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'loss': 0.8359, 'learning_rate': 2.185880829015544e-06, 'epoch': 4.87} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] +{'eval_loss': inf, 'eval_wer': 0.2171683789697946, 'eval_runtime': 698.574, 'eval_samples_per_second': 22.934, 'eval_steps_per_second': 1.434, 'epoch': 4.87} + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s] + Batch size = 16▌ | 81/1002 [00:50<10:06, 1.52it/s]