{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.6775106082036775, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 4.10262393951416, "eval_runtime": 154.9611, "eval_samples_per_second": 36.499, "eval_steps_per_second": 4.562, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 3.3531899452209473, "eval_runtime": 152.1716, "eval_samples_per_second": 37.169, "eval_steps_per_second": 4.646, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 3.2421672344207764, "eval_runtime": 153.7959, "eval_samples_per_second": 36.776, "eval_steps_per_second": 4.597, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 3.0640172958374023, "eval_runtime": 153.0843, "eval_samples_per_second": 36.947, "eval_steps_per_second": 4.618, "eval_wer": 1.0, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 5.922334671020508, "learning_rate": 0.0002964, "loss": 3.608, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 3.2418580055236816, "eval_runtime": 154.0057, "eval_samples_per_second": 36.726, "eval_steps_per_second": 4.591, "eval_wer": 1.0, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 3.225853204727173, "eval_runtime": 154.2356, "eval_samples_per_second": 36.671, "eval_steps_per_second": 4.584, "eval_wer": 1.0, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 3.090954303741455, "eval_runtime": 152.9981, "eval_samples_per_second": 36.968, "eval_steps_per_second": 4.621, "eval_wer": 1.0, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 2.974919557571411, "eval_runtime": 154.1252, "eval_samples_per_second": 36.697, "eval_steps_per_second": 4.587, "eval_wer": 1.0, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 2.970212697982788, "eval_runtime": 154.8782, "eval_samples_per_second": 36.519, "eval_steps_per_second": 4.565, "eval_wer": 1.0, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 1.7771501541137695, "learning_rate": 0.0002294285714285714, "loss": 3.0077, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 2.977414846420288, "eval_runtime": 154.2248, "eval_samples_per_second": 36.674, "eval_steps_per_second": 4.584, "eval_wer": 1.0, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 2.9689409732818604, "eval_runtime": 154.7038, "eval_samples_per_second": 36.56, "eval_steps_per_second": 4.57, "eval_wer": 1.0, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 2.96563458442688, "eval_runtime": 155.7804, "eval_samples_per_second": 36.308, "eval_steps_per_second": 4.538, "eval_wer": 1.0, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 2.998167037963867, "eval_runtime": 154.8051, "eval_samples_per_second": 36.536, "eval_steps_per_second": 4.567, "eval_wer": 1.0, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 2.9851789474487305, "eval_runtime": 157.281, "eval_samples_per_second": 35.961, "eval_steps_per_second": 4.495, "eval_wer": 1.0, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 1.3770530223846436, "learning_rate": 0.0001584285714285714, "loss": 2.9933, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 2.983766555786133, "eval_runtime": 154.5498, "eval_samples_per_second": 36.597, "eval_steps_per_second": 4.575, "eval_wer": 1.0, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 2.9718267917633057, "eval_runtime": 154.4047, "eval_samples_per_second": 36.631, "eval_steps_per_second": 4.579, "eval_wer": 1.0, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 2.9711246490478516, "eval_runtime": 156.2998, "eval_samples_per_second": 36.187, "eval_steps_per_second": 4.523, "eval_wer": 1.0, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 2.9617552757263184, "eval_runtime": 156.1331, "eval_samples_per_second": 36.226, "eval_steps_per_second": 4.528, "eval_wer": 1.0, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 2.9542484283447266, "eval_runtime": 155.1742, "eval_samples_per_second": 36.449, "eval_steps_per_second": 4.556, "eval_wer": 1.0, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 2.6072192192077637, "learning_rate": 8.742857142857142e-05, "loss": 3.0924, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 2.9699807167053223, "eval_runtime": 156.7707, "eval_samples_per_second": 36.078, "eval_steps_per_second": 4.51, "eval_wer": 1.0, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 2.9556074142456055, "eval_runtime": 156.4942, "eval_samples_per_second": 36.142, "eval_steps_per_second": 4.518, "eval_wer": 1.0, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 2.947584629058838, "eval_runtime": 156.7459, "eval_samples_per_second": 36.084, "eval_steps_per_second": 4.51, "eval_wer": 1.0, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 2.9511587619781494, "eval_runtime": 156.6674, "eval_samples_per_second": 36.102, "eval_steps_per_second": 4.513, "eval_wer": 1.0, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 2.949030637741089, "eval_runtime": 156.7295, "eval_samples_per_second": 36.088, "eval_steps_per_second": 4.511, "eval_wer": 1.0, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 5.350417613983154, "learning_rate": 1.6285714285714283e-05, "loss": 2.9765, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 2.9975638389587402, "eval_runtime": 157.876, "eval_samples_per_second": 35.826, "eval_steps_per_second": 4.478, "eval_wer": 1.0, "step": 2500 }, { "epoch": 3.6775106082036775, "eval_loss": 2.9536802768707275, "eval_runtime": 157.8256, "eval_samples_per_second": 35.837, "eval_steps_per_second": 4.48, "eval_wer": 1.0, "step": 2600 }, { "epoch": 3.6775106082036775, "step": 2600, "total_flos": 9.931497341662648e+18, "train_loss": 3.129935314471905, "train_runtime": 6863.5717, "train_samples_per_second": 12.122, "train_steps_per_second": 0.379 } ], "logging_steps": 500, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.931497341662648e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }