{ "best_metric": null, "best_model_checkpoint": null, "epoch": 500.0, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.33, "learning_rate": 5.82e-05, "loss": 11.9523, "step": 100 }, { "epoch": 16.67, "learning_rate": 0.0001182, "loss": 3.7399, "step": 200 }, { "epoch": 25.0, "learning_rate": 0.00017819999999999997, "loss": 3.074, "step": 300 }, { "epoch": 33.33, "learning_rate": 0.0002382, "loss": 2.8995, "step": 400 }, { "epoch": 41.67, "learning_rate": 0.0002982, "loss": 2.2627, "step": 500 }, { "epoch": 41.67, "eval_loss": 1.4175914525985718, "eval_runtime": 5.6701, "eval_samples_per_second": 25.044, "eval_steps_per_second": 0.882, "eval_wer": 0.8282476024411508, "step": 500 }, { "epoch": 50.0, "learning_rate": 0.0002947090909090909, "loss": 1.5352, "step": 600 }, { "epoch": 58.33, "learning_rate": 0.00028925454545454543, "loss": 1.2177, "step": 700 }, { "epoch": 66.67, "learning_rate": 0.00028379999999999996, "loss": 0.9821, "step": 800 }, { "epoch": 75.0, "learning_rate": 0.0002783454545454545, "loss": 0.8097, "step": 900 }, { "epoch": 83.33, "learning_rate": 0.0002728909090909091, "loss": 0.6692, "step": 1000 }, { "epoch": 83.33, "eval_loss": 1.3905961513519287, "eval_runtime": 5.5949, "eval_samples_per_second": 25.38, "eval_steps_per_second": 0.894, "eval_wer": 0.7375762859633828, "step": 1000 }, { "epoch": 91.67, "learning_rate": 0.0002674363636363636, "loss": 0.5587, "step": 1100 }, { "epoch": 100.0, "learning_rate": 0.0002619818181818182, "loss": 0.4692, "step": 1200 }, { "epoch": 108.33, "learning_rate": 0.0002565272727272727, "loss": 0.3949, "step": 1300 }, { "epoch": 116.67, "learning_rate": 0.00025107272727272725, "loss": 0.3484, "step": 1400 }, { "epoch": 125.0, "learning_rate": 0.0002456181818181818, "loss": 0.2874, "step": 1500 }, { "epoch": 125.0, "eval_loss": 1.791098952293396, "eval_runtime": 5.4161, "eval_samples_per_second": 26.218, "eval_steps_per_second": 0.923, "eval_wer": 0.7733217088055798, "step": 1500 }, { "epoch": 133.33, "learning_rate": 0.00024016363636363634, "loss": 0.2444, "step": 1600 }, { "epoch": 141.67, "learning_rate": 0.0002347090909090909, "loss": 0.2249, "step": 1700 }, { "epoch": 150.0, "learning_rate": 0.00022925454545454543, "loss": 0.1996, "step": 1800 }, { "epoch": 158.33, "learning_rate": 0.0002238, "loss": 0.1676, "step": 1900 }, { "epoch": 166.67, "learning_rate": 0.00021834545454545452, "loss": 0.1504, "step": 2000 }, { "epoch": 166.67, "eval_loss": 1.761537790298462, "eval_runtime": 5.7426, "eval_samples_per_second": 24.728, "eval_steps_per_second": 0.871, "eval_wer": 0.8169136878814298, "step": 2000 }, { "epoch": 175.0, "learning_rate": 0.00021289090909090905, "loss": 0.1356, "step": 2100 }, { "epoch": 183.33, "learning_rate": 0.0002074363636363636, "loss": 0.1207, "step": 2200 }, { "epoch": 191.67, "learning_rate": 0.00020198181818181814, "loss": 0.119, "step": 2300 }, { "epoch": 200.0, "learning_rate": 0.0001965272727272727, "loss": 0.1085, "step": 2400 }, { "epoch": 208.33, "learning_rate": 0.00019107272727272723, "loss": 0.102, "step": 2500 }, { "epoch": 208.33, "eval_loss": 1.9240303039550781, "eval_runtime": 5.3949, "eval_samples_per_second": 26.321, "eval_steps_per_second": 0.927, "eval_wer": 0.8456843940714909, "step": 2500 }, { "epoch": 216.67, "learning_rate": 0.0001856181818181818, "loss": 0.0899, "step": 2600 }, { "epoch": 225.0, "learning_rate": 0.00018016363636363632, "loss": 0.0838, "step": 2700 }, { "epoch": 233.33, "learning_rate": 0.0001747090909090909, "loss": 0.0825, "step": 2800 }, { "epoch": 241.67, "learning_rate": 0.00016925454545454547, "loss": 0.0699, "step": 2900 }, { "epoch": 250.0, "learning_rate": 0.0001638, "loss": 0.071, "step": 3000 }, { "epoch": 250.0, "eval_loss": 1.9720656871795654, "eval_runtime": 5.5146, "eval_samples_per_second": 25.75, "eval_steps_per_second": 0.907, "eval_wer": 0.7741935483870968, "step": 3000 }, { "epoch": 258.33, "learning_rate": 0.00015834545454545456, "loss": 0.0616, "step": 3100 }, { "epoch": 266.67, "learning_rate": 0.00015289090909090909, "loss": 0.0564, "step": 3200 }, { "epoch": 275.0, "learning_rate": 0.00014743636363636362, "loss": 0.0538, "step": 3300 }, { "epoch": 283.33, "learning_rate": 0.00014198181818181818, "loss": 0.047, "step": 3400 }, { "epoch": 291.67, "learning_rate": 0.0001365272727272727, "loss": 0.046, "step": 3500 }, { "epoch": 291.67, "eval_loss": 2.0821120738983154, "eval_runtime": 5.458, "eval_samples_per_second": 26.017, "eval_steps_per_second": 0.916, "eval_wer": 0.8326068003487358, "step": 3500 }, { "epoch": 300.0, "learning_rate": 0.00013107272727272727, "loss": 0.0408, "step": 3600 }, { "epoch": 308.33, "learning_rate": 0.0001256181818181818, "loss": 0.0401, "step": 3700 }, { "epoch": 316.67, "learning_rate": 0.00012016363636363635, "loss": 0.037, "step": 3800 }, { "epoch": 325.0, "learning_rate": 0.0001147090909090909, "loss": 0.0362, "step": 3900 }, { "epoch": 333.33, "learning_rate": 0.00010925454545454544, "loss": 0.0357, "step": 4000 }, { "epoch": 333.33, "eval_loss": 2.052865505218506, "eval_runtime": 5.4898, "eval_samples_per_second": 25.866, "eval_steps_per_second": 0.911, "eval_wer": 0.8308631211857018, "step": 4000 }, { "epoch": 341.67, "learning_rate": 0.00010379999999999999, "loss": 0.0336, "step": 4100 }, { "epoch": 350.0, "learning_rate": 9.834545454545455e-05, "loss": 0.0283, "step": 4200 }, { "epoch": 358.33, "learning_rate": 9.289090909090909e-05, "loss": 0.0274, "step": 4300 }, { "epoch": 366.67, "learning_rate": 8.743636363636364e-05, "loss": 0.0256, "step": 4400 }, { "epoch": 375.0, "learning_rate": 8.198181818181818e-05, "loss": 0.0241, "step": 4500 }, { "epoch": 375.0, "eval_loss": 2.0744874477386475, "eval_runtime": 5.4725, "eval_samples_per_second": 25.948, "eval_steps_per_second": 0.914, "eval_wer": 0.8029642545771578, "step": 4500 }, { "epoch": 383.33, "learning_rate": 7.652727272727273e-05, "loss": 0.0266, "step": 4600 }, { "epoch": 391.67, "learning_rate": 7.107272727272727e-05, "loss": 0.0243, "step": 4700 }, { "epoch": 400.0, "learning_rate": 6.561818181818182e-05, "loss": 0.0191, "step": 4800 }, { "epoch": 408.33, "learning_rate": 6.016363636363636e-05, "loss": 0.0193, "step": 4900 }, { "epoch": 416.67, "learning_rate": 5.4709090909090905e-05, "loss": 0.0165, "step": 5000 }, { "epoch": 416.67, "eval_loss": 2.2303550243377686, "eval_runtime": 5.5806, "eval_samples_per_second": 25.445, "eval_steps_per_second": 0.896, "eval_wer": 0.8238884045335658, "step": 5000 }, { "epoch": 425.0, "learning_rate": 4.925454545454545e-05, "loss": 0.0195, "step": 5100 }, { "epoch": 433.33, "learning_rate": 4.3799999999999994e-05, "loss": 0.0159, "step": 5200 }, { "epoch": 441.67, "learning_rate": 3.84e-05, "loss": 0.0146, "step": 5300 }, { "epoch": 450.0, "learning_rate": 3.294545454545454e-05, "loss": 0.0145, "step": 5400 }, { "epoch": 458.33, "learning_rate": 2.749090909090909e-05, "loss": 0.0114, "step": 5500 }, { "epoch": 458.33, "eval_loss": 2.157787561416626, "eval_runtime": 5.9361, "eval_samples_per_second": 23.921, "eval_steps_per_second": 0.842, "eval_wer": 0.8430688753269399, "step": 5500 }, { "epoch": 466.67, "learning_rate": 2.2036363636363632e-05, "loss": 0.0132, "step": 5600 }, { "epoch": 475.0, "learning_rate": 1.658181818181818e-05, "loss": 0.0142, "step": 5700 }, { "epoch": 483.33, "learning_rate": 1.1127272727272727e-05, "loss": 0.0132, "step": 5800 }, { "epoch": 491.67, "learning_rate": 5.672727272727272e-06, "loss": 0.0128, "step": 5900 }, { "epoch": 500.0, "learning_rate": 2.1818181818181815e-07, "loss": 0.012, "step": 6000 }, { "epoch": 500.0, "eval_loss": 2.165587902069092, "eval_runtime": 5.7782, "eval_samples_per_second": 24.575, "eval_steps_per_second": 0.865, "eval_wer": 0.8352223190932868, "step": 6000 }, { "epoch": 500.0, "step": 6000, "total_flos": 3.5552286921447256e+19, "train_loss": 0.5668576599160831, "train_runtime": 11637.7564, "train_samples_per_second": 16.24, "train_steps_per_second": 0.516 } ], "max_steps": 6000, "num_train_epochs": 500, "total_flos": 3.5552286921447256e+19, "trial_name": null, "trial_params": null }