|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 499.97196261682245, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 25.9068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 12.5481, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 7.2704, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 5.7673, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 4.6694, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"eval_loss": 4.045496463775635, |
|
"eval_runtime": 13.5539, |
|
"eval_samples_per_second": 26.561, |
|
"eval_steps_per_second": 3.32, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 23.07, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 3.9455, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.583, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.5138, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.4479, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 38.45, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.3907, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 38.45, |
|
"eval_loss": 3.283632516860962, |
|
"eval_runtime": 13.4556, |
|
"eval_samples_per_second": 26.755, |
|
"eval_steps_per_second": 3.344, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 42.3, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.3556, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 3.3117, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 3.0309, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.82, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 2.5273, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.67, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 2.0866, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 57.67, |
|
"eval_loss": 1.2787948846817017, |
|
"eval_runtime": 13.4904, |
|
"eval_samples_per_second": 26.686, |
|
"eval_steps_per_second": 3.336, |
|
"eval_wer": 0.7714621716973736, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.52, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 1.8478, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.37, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 1.7048, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.22, |
|
"learning_rate": 6.7425e-05, |
|
"loss": 1.6017, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.07, |
|
"learning_rate": 7.11375e-05, |
|
"loss": 1.4931, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 76.9, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.4106, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 76.9, |
|
"eval_loss": 0.7866009473800659, |
|
"eval_runtime": 13.5793, |
|
"eval_samples_per_second": 26.511, |
|
"eval_steps_per_second": 3.314, |
|
"eval_wer": 0.689141513132105, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.75, |
|
"learning_rate": 7.433863636363635e-05, |
|
"loss": 1.3808, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 84.6, |
|
"learning_rate": 7.365681818181818e-05, |
|
"loss": 1.3114, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 88.45, |
|
"learning_rate": 7.2975e-05, |
|
"loss": 1.2661, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 92.3, |
|
"learning_rate": 7.229318181818181e-05, |
|
"loss": 1.2103, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 7.161136363636363e-05, |
|
"loss": 1.1711, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.655582070350647, |
|
"eval_runtime": 13.3729, |
|
"eval_samples_per_second": 26.92, |
|
"eval_steps_per_second": 3.365, |
|
"eval_wer": 0.6272050176401411, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 99.97, |
|
"learning_rate": 7.092954545454545e-05, |
|
"loss": 1.1192, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 103.82, |
|
"learning_rate": 7.024772727272726e-05, |
|
"loss": 1.0931, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 107.67, |
|
"learning_rate": 6.956590909090908e-05, |
|
"loss": 1.0688, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 111.52, |
|
"learning_rate": 6.88840909090909e-05, |
|
"loss": 1.0559, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 115.37, |
|
"learning_rate": 6.82090909090909e-05, |
|
"loss": 1.038, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 115.37, |
|
"eval_loss": 0.6194556951522827, |
|
"eval_runtime": 15.6261, |
|
"eval_samples_per_second": 23.038, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.5680125441003528, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 119.22, |
|
"learning_rate": 6.752727272727272e-05, |
|
"loss": 1.0038, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 123.07, |
|
"learning_rate": 6.684545454545454e-05, |
|
"loss": 0.9754, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 126.9, |
|
"learning_rate": 6.616363636363636e-05, |
|
"loss": 0.9372, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 130.75, |
|
"learning_rate": 6.548181818181817e-05, |
|
"loss": 0.9232, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 134.6, |
|
"learning_rate": 6.479999999999999e-05, |
|
"loss": 0.8989, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 134.6, |
|
"eval_loss": 0.6562529802322388, |
|
"eval_runtime": 13.2747, |
|
"eval_samples_per_second": 27.119, |
|
"eval_steps_per_second": 3.39, |
|
"eval_wer": 0.560172481379851, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 138.45, |
|
"learning_rate": 6.411818181818182e-05, |
|
"loss": 0.882, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 142.3, |
|
"learning_rate": 6.343636363636364e-05, |
|
"loss": 0.868, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 146.15, |
|
"learning_rate": 6.275454545454545e-05, |
|
"loss": 0.8438, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 149.97, |
|
"learning_rate": 6.207272727272727e-05, |
|
"loss": 0.8209, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 153.82, |
|
"learning_rate": 6.139090909090908e-05, |
|
"loss": 0.8021, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 153.82, |
|
"eval_loss": 0.6643799543380737, |
|
"eval_runtime": 13.2721, |
|
"eval_samples_per_second": 27.125, |
|
"eval_steps_per_second": 3.391, |
|
"eval_wer": 0.5327322618580949, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 157.67, |
|
"learning_rate": 6.07090909090909e-05, |
|
"loss": 0.7682, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 161.52, |
|
"learning_rate": 6.0027272727272725e-05, |
|
"loss": 0.7496, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 165.37, |
|
"learning_rate": 5.934545454545454e-05, |
|
"loss": 0.7427, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 169.22, |
|
"learning_rate": 5.866363636363636e-05, |
|
"loss": 0.7336, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 173.07, |
|
"learning_rate": 5.7981818181818174e-05, |
|
"loss": 0.7161, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 173.07, |
|
"eval_loss": 0.6844300031661987, |
|
"eval_runtime": 13.407, |
|
"eval_samples_per_second": 26.852, |
|
"eval_steps_per_second": 3.356, |
|
"eval_wer": 0.5252842022736182, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 176.9, |
|
"learning_rate": 5.73e-05, |
|
"loss": 0.6994, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 180.75, |
|
"learning_rate": 5.6618181818181806e-05, |
|
"loss": 0.6778, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 184.6, |
|
"learning_rate": 5.593636363636363e-05, |
|
"loss": 0.6709, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 188.45, |
|
"learning_rate": 5.5254545454545446e-05, |
|
"loss": 0.6551, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 192.3, |
|
"learning_rate": 5.457272727272727e-05, |
|
"loss": 0.6449, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 192.3, |
|
"eval_loss": 0.7017893195152283, |
|
"eval_runtime": 13.3622, |
|
"eval_samples_per_second": 26.942, |
|
"eval_steps_per_second": 3.368, |
|
"eval_wer": 0.5331242649941199, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 196.15, |
|
"learning_rate": 5.389090909090909e-05, |
|
"loss": 0.6229, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 199.97, |
|
"learning_rate": 5.32090909090909e-05, |
|
"loss": 0.5977, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 203.82, |
|
"learning_rate": 5.2527272727272725e-05, |
|
"loss": 0.5985, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 207.67, |
|
"learning_rate": 5.185227272727272e-05, |
|
"loss": 0.5819, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 211.52, |
|
"learning_rate": 5.117045454545454e-05, |
|
"loss": 0.5659, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 211.52, |
|
"eval_loss": 0.7450936436653137, |
|
"eval_runtime": 13.5305, |
|
"eval_samples_per_second": 26.606, |
|
"eval_steps_per_second": 3.326, |
|
"eval_wer": 0.546452371618973, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 215.37, |
|
"learning_rate": 5.048863636363636e-05, |
|
"loss": 0.5615, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 219.22, |
|
"learning_rate": 4.980681818181818e-05, |
|
"loss": 0.5475, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 223.07, |
|
"learning_rate": 4.9124999999999995e-05, |
|
"loss": 0.529, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 226.9, |
|
"learning_rate": 4.844318181818181e-05, |
|
"loss": 0.5217, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 230.75, |
|
"learning_rate": 4.7761363636363634e-05, |
|
"loss": 0.5118, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 230.75, |
|
"eval_loss": 0.7857084274291992, |
|
"eval_runtime": 13.2806, |
|
"eval_samples_per_second": 27.107, |
|
"eval_steps_per_second": 3.388, |
|
"eval_wer": 0.5386123088984712, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 234.6, |
|
"learning_rate": 4.707954545454545e-05, |
|
"loss": 0.4996, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 238.45, |
|
"learning_rate": 4.639772727272727e-05, |
|
"loss": 0.4808, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 242.3, |
|
"learning_rate": 4.5715909090909083e-05, |
|
"loss": 0.4632, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 246.15, |
|
"learning_rate": 4.5034090909090907e-05, |
|
"loss": 0.4643, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 249.97, |
|
"learning_rate": 4.435227272727272e-05, |
|
"loss": 0.4385, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 249.97, |
|
"eval_loss": 0.8062427043914795, |
|
"eval_runtime": 13.503, |
|
"eval_samples_per_second": 26.661, |
|
"eval_steps_per_second": 3.333, |
|
"eval_wer": 0.5382203057624461, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 253.82, |
|
"learning_rate": 4.367045454545454e-05, |
|
"loss": 0.439, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 257.67, |
|
"learning_rate": 4.298863636363636e-05, |
|
"loss": 0.4274, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 261.52, |
|
"learning_rate": 4.230681818181818e-05, |
|
"loss": 0.4177, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 265.37, |
|
"learning_rate": 4.1625e-05, |
|
"loss": 0.411, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 269.22, |
|
"learning_rate": 4.094318181818181e-05, |
|
"loss": 0.3984, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 269.22, |
|
"eval_loss": 0.8315810561180115, |
|
"eval_runtime": 13.3889, |
|
"eval_samples_per_second": 26.888, |
|
"eval_steps_per_second": 3.361, |
|
"eval_wer": 0.5621324970599765, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 273.07, |
|
"learning_rate": 4.0261363636363635e-05, |
|
"loss": 0.3935, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 276.9, |
|
"learning_rate": 3.957954545454545e-05, |
|
"loss": 0.3842, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 280.75, |
|
"learning_rate": 3.8897727272727274e-05, |
|
"loss": 0.3756, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 284.6, |
|
"learning_rate": 3.8215909090909084e-05, |
|
"loss": 0.3671, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 288.45, |
|
"learning_rate": 3.754090909090909e-05, |
|
"loss": 0.3666, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 288.45, |
|
"eval_loss": 0.8735576868057251, |
|
"eval_runtime": 13.3799, |
|
"eval_samples_per_second": 26.906, |
|
"eval_steps_per_second": 3.363, |
|
"eval_wer": 0.5503724029792239, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 292.3, |
|
"learning_rate": 3.6859090909090904e-05, |
|
"loss": 0.3454, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 296.15, |
|
"learning_rate": 3.617727272727273e-05, |
|
"loss": 0.3486, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 299.97, |
|
"learning_rate": 3.5495454545454544e-05, |
|
"loss": 0.3347, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 303.82, |
|
"learning_rate": 3.481363636363636e-05, |
|
"loss": 0.3262, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 307.67, |
|
"learning_rate": 3.413181818181818e-05, |
|
"loss": 0.3256, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 307.67, |
|
"eval_loss": 0.9133402705192566, |
|
"eval_runtime": 13.8839, |
|
"eval_samples_per_second": 25.929, |
|
"eval_steps_per_second": 3.241, |
|
"eval_wer": 0.568796550372403, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 311.52, |
|
"learning_rate": 3.345e-05, |
|
"loss": 0.3062, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 315.37, |
|
"learning_rate": 3.2768181818181816e-05, |
|
"loss": 0.3053, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 319.22, |
|
"learning_rate": 3.208636363636363e-05, |
|
"loss": 0.3003, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 323.07, |
|
"learning_rate": 3.140454545454545e-05, |
|
"loss": 0.2975, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 326.9, |
|
"learning_rate": 3.072272727272727e-05, |
|
"loss": 0.289, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 326.9, |
|
"eval_loss": 0.9556481242179871, |
|
"eval_runtime": 13.3982, |
|
"eval_samples_per_second": 26.869, |
|
"eval_steps_per_second": 3.359, |
|
"eval_wer": 0.5684045472363779, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 330.75, |
|
"learning_rate": 3.004090909090909e-05, |
|
"loss": 0.2802, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 334.6, |
|
"learning_rate": 2.9359090909090905e-05, |
|
"loss": 0.2736, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 338.45, |
|
"learning_rate": 2.8677272727272725e-05, |
|
"loss": 0.2683, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 342.3, |
|
"learning_rate": 2.799545454545454e-05, |
|
"loss": 0.2681, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 346.15, |
|
"learning_rate": 2.731363636363636e-05, |
|
"loss": 0.2663, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 346.15, |
|
"eval_loss": 0.9343604445457458, |
|
"eval_runtime": 13.4662, |
|
"eval_samples_per_second": 26.734, |
|
"eval_steps_per_second": 3.342, |
|
"eval_wer": 0.5707565660525284, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 349.97, |
|
"learning_rate": 2.6631818181818177e-05, |
|
"loss": 0.2597, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 353.82, |
|
"learning_rate": 2.5949999999999997e-05, |
|
"loss": 0.2636, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 357.67, |
|
"learning_rate": 2.5268181818181817e-05, |
|
"loss": 0.2538, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 361.52, |
|
"learning_rate": 2.4586363636363637e-05, |
|
"loss": 0.2505, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 365.37, |
|
"learning_rate": 2.3904545454545453e-05, |
|
"loss": 0.2445, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 365.37, |
|
"eval_loss": 0.9471534490585327, |
|
"eval_runtime": 13.371, |
|
"eval_samples_per_second": 26.924, |
|
"eval_steps_per_second": 3.365, |
|
"eval_wer": 0.5589964719717758, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 369.22, |
|
"learning_rate": 2.3222727272727273e-05, |
|
"loss": 0.2397, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 373.07, |
|
"learning_rate": 2.254090909090909e-05, |
|
"loss": 0.2395, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 376.9, |
|
"learning_rate": 2.185909090909091e-05, |
|
"loss": 0.2277, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 380.75, |
|
"learning_rate": 2.1184090909090906e-05, |
|
"loss": 0.2305, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 384.6, |
|
"learning_rate": 2.0509090909090907e-05, |
|
"loss": 0.2289, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 384.6, |
|
"eval_loss": 0.9713197350502014, |
|
"eval_runtime": 13.5048, |
|
"eval_samples_per_second": 26.657, |
|
"eval_steps_per_second": 3.332, |
|
"eval_wer": 0.5672285378283026, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 388.45, |
|
"learning_rate": 1.9827272727272727e-05, |
|
"loss": 0.2139, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 392.3, |
|
"learning_rate": 1.9145454545454543e-05, |
|
"loss": 0.2214, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 396.15, |
|
"learning_rate": 1.846363636363636e-05, |
|
"loss": 0.2053, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 399.97, |
|
"learning_rate": 1.7781818181818183e-05, |
|
"loss": 0.2007, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 403.82, |
|
"learning_rate": 1.71e-05, |
|
"loss": 0.2048, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 403.82, |
|
"eval_loss": 0.9977540969848633, |
|
"eval_runtime": 15.7748, |
|
"eval_samples_per_second": 22.821, |
|
"eval_steps_per_second": 2.853, |
|
"eval_wer": 0.5762446099568796, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 407.67, |
|
"learning_rate": 1.641818181818182e-05, |
|
"loss": 0.1995, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 411.52, |
|
"learning_rate": 1.5736363636363635e-05, |
|
"loss": 0.1994, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 415.37, |
|
"learning_rate": 1.5054545454545453e-05, |
|
"loss": 0.2005, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 419.22, |
|
"learning_rate": 1.4372727272727271e-05, |
|
"loss": 0.1988, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 423.07, |
|
"learning_rate": 1.369090909090909e-05, |
|
"loss": 0.1857, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 423.07, |
|
"eval_loss": 1.0229874849319458, |
|
"eval_runtime": 13.4188, |
|
"eval_samples_per_second": 26.828, |
|
"eval_steps_per_second": 3.354, |
|
"eval_wer": 0.5797726381811055, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 426.9, |
|
"learning_rate": 1.3009090909090907e-05, |
|
"loss": 0.1793, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 430.75, |
|
"learning_rate": 1.2327272727272726e-05, |
|
"loss": 0.1835, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 434.6, |
|
"learning_rate": 1.1645454545454545e-05, |
|
"loss": 0.1729, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 438.45, |
|
"learning_rate": 1.0963636363636363e-05, |
|
"loss": 0.1751, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 442.3, |
|
"learning_rate": 1.0281818181818181e-05, |
|
"loss": 0.1751, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 442.3, |
|
"eval_loss": 1.0409098863601685, |
|
"eval_runtime": 13.4411, |
|
"eval_samples_per_second": 26.784, |
|
"eval_steps_per_second": 3.348, |
|
"eval_wer": 0.5754606036848294, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 446.15, |
|
"learning_rate": 9.6e-06, |
|
"loss": 0.1743, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 449.97, |
|
"learning_rate": 8.918181818181818e-06, |
|
"loss": 0.1704, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 453.82, |
|
"learning_rate": 8.236363636363636e-06, |
|
"loss": 0.1691, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 457.67, |
|
"learning_rate": 7.554545454545453e-06, |
|
"loss": 0.1635, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 461.52, |
|
"learning_rate": 6.872727272727273e-06, |
|
"loss": 0.1688, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 461.52, |
|
"eval_loss": 1.0444588661193848, |
|
"eval_runtime": 13.5066, |
|
"eval_samples_per_second": 26.654, |
|
"eval_steps_per_second": 3.332, |
|
"eval_wer": 0.5727165817326538, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 465.37, |
|
"learning_rate": 6.190909090909091e-06, |
|
"loss": 0.1666, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 469.22, |
|
"learning_rate": 5.509090909090908e-06, |
|
"loss": 0.1619, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 473.07, |
|
"learning_rate": 4.827272727272726e-06, |
|
"loss": 0.161, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 476.9, |
|
"learning_rate": 4.145454545454545e-06, |
|
"loss": 0.1527, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 480.75, |
|
"learning_rate": 3.4636363636363636e-06, |
|
"loss": 0.1633, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 480.75, |
|
"eval_loss": 1.048424482345581, |
|
"eval_runtime": 13.4794, |
|
"eval_samples_per_second": 26.707, |
|
"eval_steps_per_second": 3.338, |
|
"eval_wer": 0.5738925911407291, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 484.6, |
|
"learning_rate": 2.7818181818181817e-06, |
|
"loss": 0.1604, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 488.45, |
|
"learning_rate": 2.1e-06, |
|
"loss": 0.156, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 492.3, |
|
"learning_rate": 1.418181818181818e-06, |
|
"loss": 0.1579, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 496.15, |
|
"learning_rate": 7.363636363636363e-07, |
|
"loss": 0.1557, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 499.97, |
|
"learning_rate": 5.454545454545454e-08, |
|
"loss": 0.1488, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 499.97, |
|
"eval_loss": 1.0443373918533325, |
|
"eval_runtime": 13.3989, |
|
"eval_samples_per_second": 26.868, |
|
"eval_steps_per_second": 3.358, |
|
"eval_wer": 0.5715405723245786, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 499.97, |
|
"step": 13000, |
|
"total_flos": 6.567667376811244e+19, |
|
"train_loss": 1.1417412889187153, |
|
"train_runtime": 29000.6737, |
|
"train_samples_per_second": 14.758, |
|
"train_steps_per_second": 0.448 |
|
} |
|
], |
|
"max_steps": 13000, |
|
"num_train_epochs": 500, |
|
"total_flos": 6.567667376811244e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|