|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 500.0, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 5.82e-05, |
|
"loss": 11.9523, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.0001182, |
|
"loss": 3.7399, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.00017819999999999997, |
|
"loss": 3.074, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.0002382, |
|
"loss": 2.8995, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 0.0002982, |
|
"loss": 2.2627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_loss": 1.4175914525985718, |
|
"eval_runtime": 5.6701, |
|
"eval_samples_per_second": 25.044, |
|
"eval_steps_per_second": 0.882, |
|
"eval_wer": 0.8282476024411508, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0002947090909090909, |
|
"loss": 1.5352, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 0.00028925454545454543, |
|
"loss": 1.2177, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 0.00028379999999999996, |
|
"loss": 0.9821, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0002783454545454545, |
|
"loss": 0.8097, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"learning_rate": 0.0002728909090909091, |
|
"loss": 0.6692, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 83.33, |
|
"eval_loss": 1.3905961513519287, |
|
"eval_runtime": 5.5949, |
|
"eval_samples_per_second": 25.38, |
|
"eval_steps_per_second": 0.894, |
|
"eval_wer": 0.7375762859633828, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 91.67, |
|
"learning_rate": 0.0002674363636363636, |
|
"loss": 0.5587, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0002619818181818182, |
|
"loss": 0.4692, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 108.33, |
|
"learning_rate": 0.0002565272727272727, |
|
"loss": 0.3949, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 116.67, |
|
"learning_rate": 0.00025107272727272725, |
|
"loss": 0.3484, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 0.0002456181818181818, |
|
"loss": 0.2874, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 1.791098952293396, |
|
"eval_runtime": 5.4161, |
|
"eval_samples_per_second": 26.218, |
|
"eval_steps_per_second": 0.923, |
|
"eval_wer": 0.7733217088055798, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 0.00024016363636363634, |
|
"loss": 0.2444, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 141.67, |
|
"learning_rate": 0.0002347090909090909, |
|
"loss": 0.2249, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 0.00022925454545454543, |
|
"loss": 0.1996, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 158.33, |
|
"learning_rate": 0.0002238, |
|
"loss": 0.1676, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"learning_rate": 0.00021834545454545452, |
|
"loss": 0.1504, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 166.67, |
|
"eval_loss": 1.761537790298462, |
|
"eval_runtime": 5.7426, |
|
"eval_samples_per_second": 24.728, |
|
"eval_steps_per_second": 0.871, |
|
"eval_wer": 0.8169136878814298, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 0.00021289090909090905, |
|
"loss": 0.1356, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 183.33, |
|
"learning_rate": 0.0002074363636363636, |
|
"loss": 0.1207, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 191.67, |
|
"learning_rate": 0.00020198181818181814, |
|
"loss": 0.119, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.0001965272727272727, |
|
"loss": 0.1085, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"learning_rate": 0.00019107272727272723, |
|
"loss": 0.102, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 208.33, |
|
"eval_loss": 1.9240303039550781, |
|
"eval_runtime": 5.3949, |
|
"eval_samples_per_second": 26.321, |
|
"eval_steps_per_second": 0.927, |
|
"eval_wer": 0.8456843940714909, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 216.67, |
|
"learning_rate": 0.0001856181818181818, |
|
"loss": 0.0899, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"learning_rate": 0.00018016363636363632, |
|
"loss": 0.0838, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"learning_rate": 0.0001747090909090909, |
|
"loss": 0.0825, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 241.67, |
|
"learning_rate": 0.00016925454545454547, |
|
"loss": 0.0699, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 0.0001638, |
|
"loss": 0.071, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 1.9720656871795654, |
|
"eval_runtime": 5.5146, |
|
"eval_samples_per_second": 25.75, |
|
"eval_steps_per_second": 0.907, |
|
"eval_wer": 0.7741935483870968, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 258.33, |
|
"learning_rate": 0.00015834545454545456, |
|
"loss": 0.0616, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 0.00015289090909090909, |
|
"loss": 0.0564, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"learning_rate": 0.00014743636363636362, |
|
"loss": 0.0538, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 283.33, |
|
"learning_rate": 0.00014198181818181818, |
|
"loss": 0.047, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"learning_rate": 0.0001365272727272727, |
|
"loss": 0.046, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 291.67, |
|
"eval_loss": 2.0821120738983154, |
|
"eval_runtime": 5.458, |
|
"eval_samples_per_second": 26.017, |
|
"eval_steps_per_second": 0.916, |
|
"eval_wer": 0.8326068003487358, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"learning_rate": 0.00013107272727272727, |
|
"loss": 0.0408, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 308.33, |
|
"learning_rate": 0.0001256181818181818, |
|
"loss": 0.0401, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 316.67, |
|
"learning_rate": 0.00012016363636363635, |
|
"loss": 0.037, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"learning_rate": 0.0001147090909090909, |
|
"loss": 0.0362, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"learning_rate": 0.00010925454545454544, |
|
"loss": 0.0357, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"eval_loss": 2.052865505218506, |
|
"eval_runtime": 5.4898, |
|
"eval_samples_per_second": 25.866, |
|
"eval_steps_per_second": 0.911, |
|
"eval_wer": 0.8308631211857018, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 341.67, |
|
"learning_rate": 0.00010379999999999999, |
|
"loss": 0.0336, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"learning_rate": 9.834545454545455e-05, |
|
"loss": 0.0283, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 358.33, |
|
"learning_rate": 9.289090909090909e-05, |
|
"loss": 0.0274, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 366.67, |
|
"learning_rate": 8.743636363636364e-05, |
|
"loss": 0.0256, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"learning_rate": 8.198181818181818e-05, |
|
"loss": 0.0241, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_loss": 2.0744874477386475, |
|
"eval_runtime": 5.4725, |
|
"eval_samples_per_second": 25.948, |
|
"eval_steps_per_second": 0.914, |
|
"eval_wer": 0.8029642545771578, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 383.33, |
|
"learning_rate": 7.652727272727273e-05, |
|
"loss": 0.0266, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 391.67, |
|
"learning_rate": 7.107272727272727e-05, |
|
"loss": 0.0243, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 6.561818181818182e-05, |
|
"loss": 0.0191, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 408.33, |
|
"learning_rate": 6.016363636363636e-05, |
|
"loss": 0.0193, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 416.67, |
|
"learning_rate": 5.4709090909090905e-05, |
|
"loss": 0.0165, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 416.67, |
|
"eval_loss": 2.2303550243377686, |
|
"eval_runtime": 5.5806, |
|
"eval_samples_per_second": 25.445, |
|
"eval_steps_per_second": 0.896, |
|
"eval_wer": 0.8238884045335658, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"learning_rate": 4.925454545454545e-05, |
|
"loss": 0.0195, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 433.33, |
|
"learning_rate": 4.3799999999999994e-05, |
|
"loss": 0.0159, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 441.67, |
|
"learning_rate": 3.84e-05, |
|
"loss": 0.0146, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 450.0, |
|
"learning_rate": 3.294545454545454e-05, |
|
"loss": 0.0145, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 458.33, |
|
"learning_rate": 2.749090909090909e-05, |
|
"loss": 0.0114, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 458.33, |
|
"eval_loss": 2.157787561416626, |
|
"eval_runtime": 5.9361, |
|
"eval_samples_per_second": 23.921, |
|
"eval_steps_per_second": 0.842, |
|
"eval_wer": 0.8430688753269399, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 466.67, |
|
"learning_rate": 2.2036363636363632e-05, |
|
"loss": 0.0132, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 475.0, |
|
"learning_rate": 1.658181818181818e-05, |
|
"loss": 0.0142, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 483.33, |
|
"learning_rate": 1.1127272727272727e-05, |
|
"loss": 0.0132, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 491.67, |
|
"learning_rate": 5.672727272727272e-06, |
|
"loss": 0.0128, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"learning_rate": 2.1818181818181815e-07, |
|
"loss": 0.012, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"eval_loss": 2.165587902069092, |
|
"eval_runtime": 5.7782, |
|
"eval_samples_per_second": 24.575, |
|
"eval_steps_per_second": 0.865, |
|
"eval_wer": 0.8352223190932868, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 500.0, |
|
"step": 6000, |
|
"total_flos": 3.5552286921447256e+19, |
|
"train_loss": 0.5668576599160831, |
|
"train_runtime": 11637.7564, |
|
"train_samples_per_second": 16.24, |
|
"train_steps_per_second": 0.516 |
|
} |
|
], |
|
"max_steps": 6000, |
|
"num_train_epochs": 500, |
|
"total_flos": 3.5552286921447256e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|