{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.99521531100478, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.92, "learning_rate": 3.2333333333333334e-06, "loss": 13.5304, "step": 100 }, { "epoch": 3.84, "learning_rate": 6.566666666666667e-06, "loss": 3.5412, "step": 200 }, { "epoch": 5.77, "learning_rate": 9.9e-06, "loss": 3.1425, "step": 300 }, { "epoch": 7.69, "learning_rate": 9.80204081632653e-06, "loss": 2.0204, "step": 400 }, { "epoch": 9.61, "learning_rate": 9.59795918367347e-06, "loss": 1.5794, "step": 500 }, { "epoch": 9.61, "eval_loss": 0.9859886169433594, "eval_runtime": 65.6063, "eval_samples_per_second": 16.95, "eval_steps_per_second": 4.237, "eval_wer": 0.9582062780269058, "step": 500 }, { "epoch": 11.54, "learning_rate": 9.39387755102041e-06, "loss": 1.4015, "step": 600 }, { "epoch": 13.46, "learning_rate": 9.189795918367347e-06, "loss": 1.2803, "step": 700 }, { "epoch": 15.38, "learning_rate": 8.985714285714287e-06, "loss": 1.1969, "step": 800 }, { "epoch": 17.31, "learning_rate": 8.781632653061225e-06, "loss": 1.1458, "step": 900 }, { "epoch": 19.23, "learning_rate": 8.577551020408163e-06, "loss": 1.0985, "step": 1000 }, { "epoch": 19.23, "eval_loss": 0.5948446989059448, "eval_runtime": 63.7089, "eval_samples_per_second": 17.454, "eval_steps_per_second": 4.364, "eval_wer": 0.7533632286995515, "step": 1000 }, { "epoch": 21.15, "learning_rate": 8.373469387755104e-06, "loss": 1.0639, "step": 1100 }, { "epoch": 23.08, "learning_rate": 8.169387755102042e-06, "loss": 1.0214, "step": 1200 }, { "epoch": 25.0, "learning_rate": 7.96530612244898e-06, "loss": 1.0047, "step": 1300 }, { "epoch": 26.92, "learning_rate": 7.76122448979592e-06, "loss": 0.971, "step": 1400 }, { "epoch": 28.84, "learning_rate": 7.557142857142857e-06, "loss": 0.9551, "step": 1500 }, { "epoch": 28.84, "eval_loss": 0.5892378091812134, "eval_runtime": 64.7699, "eval_samples_per_second": 17.168, "eval_steps_per_second": 4.292, "eval_wer": 0.7391928251121076, "step": 1500 }, { "epoch": 30.77, "learning_rate": 7.353061224489797e-06, "loss": 0.9387, "step": 1600 }, { "epoch": 32.69, "learning_rate": 7.148979591836735e-06, "loss": 0.9272, "step": 1700 }, { "epoch": 34.61, "learning_rate": 6.944897959183674e-06, "loss": 0.9192, "step": 1800 }, { "epoch": 36.54, "learning_rate": 6.740816326530613e-06, "loss": 0.892, "step": 1900 }, { "epoch": 38.46, "learning_rate": 6.5367346938775515e-06, "loss": 0.8699, "step": 2000 }, { "epoch": 38.46, "eval_loss": 0.5926958322525024, "eval_runtime": 64.9144, "eval_samples_per_second": 17.13, "eval_steps_per_second": 4.283, "eval_wer": 0.72, "step": 2000 }, { "epoch": 40.38, "learning_rate": 6.332653061224491e-06, "loss": 0.8779, "step": 2100 }, { "epoch": 42.31, "learning_rate": 6.128571428571429e-06, "loss": 0.8511, "step": 2200 }, { "epoch": 44.23, "learning_rate": 5.924489795918368e-06, "loss": 0.8532, "step": 2300 }, { "epoch": 46.15, "learning_rate": 5.720408163265306e-06, "loss": 0.8442, "step": 2400 }, { "epoch": 48.08, "learning_rate": 5.516326530612245e-06, "loss": 0.8372, "step": 2500 }, { "epoch": 48.08, "eval_loss": 0.5938696265220642, "eval_runtime": 64.5641, "eval_samples_per_second": 17.223, "eval_steps_per_second": 4.306, "eval_wer": 0.73847533632287, "step": 2500 }, { "epoch": 50.0, "learning_rate": 5.312244897959185e-06, "loss": 0.8112, "step": 2600 }, { "epoch": 51.92, "learning_rate": 5.108163265306123e-06, "loss": 0.8165, "step": 2700 }, { "epoch": 53.84, "learning_rate": 4.904081632653061e-06, "loss": 0.8045, "step": 2800 }, { "epoch": 55.77, "learning_rate": 4.7e-06, "loss": 0.7974, "step": 2900 }, { "epoch": 57.69, "learning_rate": 4.4959183673469394e-06, "loss": 0.7794, "step": 3000 }, { "epoch": 57.69, "eval_loss": 0.5800846815109253, "eval_runtime": 63.4747, "eval_samples_per_second": 17.519, "eval_steps_per_second": 4.38, "eval_wer": 0.7388340807174888, "step": 3000 }, { "epoch": 59.61, "learning_rate": 4.291836734693878e-06, "loss": 0.7752, "step": 3100 }, { "epoch": 61.54, "learning_rate": 4.087755102040817e-06, "loss": 0.7765, "step": 3200 }, { "epoch": 63.46, "learning_rate": 3.8836734693877556e-06, "loss": 0.7584, "step": 3300 }, { "epoch": 65.38, "learning_rate": 3.6795918367346943e-06, "loss": 0.7589, "step": 3400 }, { "epoch": 67.31, "learning_rate": 3.475510204081633e-06, "loss": 0.7565, "step": 3500 }, { "epoch": 67.31, "eval_loss": 0.582656979560852, "eval_runtime": 71.1025, "eval_samples_per_second": 15.639, "eval_steps_per_second": 3.91, "eval_wer": 0.7183856502242153, "step": 3500 }, { "epoch": 69.23, "learning_rate": 3.2714285714285717e-06, "loss": 0.7595, "step": 3600 }, { "epoch": 71.15, "learning_rate": 3.0673469387755104e-06, "loss": 0.747, "step": 3700 }, { "epoch": 73.08, "learning_rate": 2.863265306122449e-06, "loss": 0.7373, "step": 3800 }, { "epoch": 75.0, "learning_rate": 2.6591836734693882e-06, "loss": 0.7268, "step": 3900 }, { "epoch": 76.92, "learning_rate": 2.455102040816327e-06, "loss": 0.7303, "step": 4000 }, { "epoch": 76.92, "eval_loss": 0.580017626285553, "eval_runtime": 63.495, "eval_samples_per_second": 17.513, "eval_steps_per_second": 4.378, "eval_wer": 0.7296860986547086, "step": 4000 }, { "epoch": 78.84, "learning_rate": 2.2510204081632656e-06, "loss": 0.7239, "step": 4100 }, { "epoch": 80.77, "learning_rate": 2.0469387755102044e-06, "loss": 0.7201, "step": 4200 }, { "epoch": 82.69, "learning_rate": 1.8428571428571428e-06, "loss": 0.7185, "step": 4300 }, { "epoch": 84.61, "learning_rate": 1.6387755102040818e-06, "loss": 0.7149, "step": 4400 }, { "epoch": 86.54, "learning_rate": 1.4346938775510205e-06, "loss": 0.7128, "step": 4500 }, { "epoch": 86.54, "eval_loss": 0.5977014899253845, "eval_runtime": 64.1516, "eval_samples_per_second": 17.334, "eval_steps_per_second": 4.333, "eval_wer": 0.7149775784753363, "step": 4500 }, { "epoch": 88.46, "learning_rate": 1.2306122448979594e-06, "loss": 0.705, "step": 4600 }, { "epoch": 90.38, "learning_rate": 1.026530612244898e-06, "loss": 0.7129, "step": 4700 }, { "epoch": 92.31, "learning_rate": 8.224489795918368e-07, "loss": 0.7076, "step": 4800 }, { "epoch": 94.23, "learning_rate": 6.183673469387755e-07, "loss": 0.7126, "step": 4900 }, { "epoch": 96.15, "learning_rate": 4.142857142857143e-07, "loss": 0.6972, "step": 5000 }, { "epoch": 96.15, "eval_loss": 0.5900700092315674, "eval_runtime": 63.2743, "eval_samples_per_second": 17.574, "eval_steps_per_second": 4.394, "eval_wer": 0.7176681614349776, "step": 5000 }, { "epoch": 98.08, "learning_rate": 2.1020408163265306e-07, "loss": 0.7001, "step": 5100 }, { "epoch": 100.0, "learning_rate": 6.1224489795918365e-09, "loss": 0.6965, "step": 5200 }, { "epoch": 100.0, "step": 5200, "total_flos": 1.4653197993726655e+20, "train_loss": 1.2273276989276594, "train_runtime": 32753.7935, "train_samples_per_second": 10.185, "train_steps_per_second": 0.159 } ], "max_steps": 5200, "num_train_epochs": 100, "total_flos": 1.4653197993726655e+20, "trial_name": null, "trial_params": null }