anuragshas's picture
End of training
7faa6b8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 60.0,
"global_step": 4920,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.22,
"learning_rate": 7.35e-06,
"loss": 12.2749,
"step": 100
},
{
"epoch": 2.44,
"learning_rate": 1.485e-05,
"loss": 4.8071,
"step": 200
},
{
"epoch": 3.66,
"learning_rate": 2.2349999999999998e-05,
"loss": 3.5841,
"step": 300
},
{
"epoch": 4.88,
"learning_rate": 2.985e-05,
"loss": 3.1829,
"step": 400
},
{
"epoch": 4.88,
"eval_loss": 3.1227903366088867,
"eval_runtime": 33.8472,
"eval_samples_per_second": 35.247,
"eval_steps_per_second": 2.216,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 6.1,
"learning_rate": 3.735e-05,
"loss": 3.0323,
"step": 500
},
{
"epoch": 7.32,
"learning_rate": 4.484999999999999e-05,
"loss": 2.9603,
"step": 600
},
{
"epoch": 8.54,
"learning_rate": 5.234999999999999e-05,
"loss": 2.8982,
"step": 700
},
{
"epoch": 9.76,
"learning_rate": 5.985e-05,
"loss": 2.8675,
"step": 800
},
{
"epoch": 9.76,
"eval_loss": 2.8616490364074707,
"eval_runtime": 33.8201,
"eval_samples_per_second": 35.275,
"eval_steps_per_second": 2.218,
"eval_wer": 0.9992680427463037,
"step": 800
},
{
"epoch": 10.98,
"learning_rate": 6.735e-05,
"loss": 2.7893,
"step": 900
},
{
"epoch": 12.2,
"learning_rate": 7.484999999999999e-05,
"loss": 2.2812,
"step": 1000
},
{
"epoch": 13.41,
"learning_rate": 7.312499999999999e-05,
"loss": 1.7369,
"step": 1100
},
{
"epoch": 14.63,
"learning_rate": 7.121173469387755e-05,
"loss": 1.583,
"step": 1200
},
{
"epoch": 14.63,
"eval_loss": 0.6392253637313843,
"eval_runtime": 33.9553,
"eval_samples_per_second": 35.134,
"eval_steps_per_second": 2.209,
"eval_wer": 0.6239203630507978,
"step": 1200
},
{
"epoch": 15.85,
"learning_rate": 6.92984693877551e-05,
"loss": 1.4572,
"step": 1300
},
{
"epoch": 17.07,
"learning_rate": 6.738520408163265e-05,
"loss": 1.3366,
"step": 1400
},
{
"epoch": 18.29,
"learning_rate": 6.547193877551019e-05,
"loss": 1.2668,
"step": 1500
},
{
"epoch": 19.51,
"learning_rate": 6.355867346938775e-05,
"loss": 1.1959,
"step": 1600
},
{
"epoch": 19.51,
"eval_loss": 0.36018967628479004,
"eval_runtime": 33.3624,
"eval_samples_per_second": 35.759,
"eval_steps_per_second": 2.248,
"eval_wer": 0.3651002781437564,
"step": 1600
},
{
"epoch": 20.73,
"learning_rate": 6.164540816326531e-05,
"loss": 1.1694,
"step": 1700
},
{
"epoch": 21.95,
"learning_rate": 5.9732142857142846e-05,
"loss": 1.1001,
"step": 1800
},
{
"epoch": 23.17,
"learning_rate": 5.7818877551020404e-05,
"loss": 1.0737,
"step": 1900
},
{
"epoch": 24.39,
"learning_rate": 5.5905612244897955e-05,
"loss": 1.0276,
"step": 2000
},
{
"epoch": 24.39,
"eval_loss": 0.3020930588245392,
"eval_runtime": 33.6823,
"eval_samples_per_second": 35.419,
"eval_steps_per_second": 2.227,
"eval_wer": 0.29805299370516763,
"step": 2000
},
{
"epoch": 25.61,
"learning_rate": 5.3992346938775506e-05,
"loss": 1.0293,
"step": 2100
},
{
"epoch": 26.83,
"learning_rate": 5.207908163265306e-05,
"loss": 0.9966,
"step": 2200
},
{
"epoch": 28.05,
"learning_rate": 5.016581632653061e-05,
"loss": 0.9721,
"step": 2300
},
{
"epoch": 29.27,
"learning_rate": 4.8271683673469385e-05,
"loss": 0.9671,
"step": 2400
},
{
"epoch": 29.27,
"eval_loss": 0.28717485070228577,
"eval_runtime": 33.2455,
"eval_samples_per_second": 35.885,
"eval_steps_per_second": 2.256,
"eval_wer": 0.27389840433318696,
"step": 2400
},
{
"epoch": 30.49,
"learning_rate": 4.635841836734693e-05,
"loss": 0.9536,
"step": 2500
},
{
"epoch": 31.71,
"learning_rate": 4.444515306122448e-05,
"loss": 0.934,
"step": 2600
},
{
"epoch": 32.93,
"learning_rate": 4.253188775510204e-05,
"loss": 0.9136,
"step": 2700
},
{
"epoch": 34.15,
"learning_rate": 4.061862244897959e-05,
"loss": 0.873,
"step": 2800
},
{
"epoch": 34.15,
"eval_loss": 0.259307324886322,
"eval_runtime": 33.4272,
"eval_samples_per_second": 35.689,
"eval_steps_per_second": 2.244,
"eval_wer": 0.24593763724198506,
"step": 2800
},
{
"epoch": 35.37,
"learning_rate": 3.870535714285714e-05,
"loss": 0.8649,
"step": 2900
},
{
"epoch": 36.59,
"learning_rate": 3.679209183673469e-05,
"loss": 0.8464,
"step": 3000
},
{
"epoch": 37.8,
"learning_rate": 3.487882653061224e-05,
"loss": 0.8545,
"step": 3100
},
{
"epoch": 39.02,
"learning_rate": 3.2965561224489794e-05,
"loss": 0.8513,
"step": 3200
},
{
"epoch": 39.02,
"eval_loss": 0.2617344856262207,
"eval_runtime": 33.3367,
"eval_samples_per_second": 35.786,
"eval_steps_per_second": 2.25,
"eval_wer": 0.24725516029863856,
"step": 3200
},
{
"epoch": 40.24,
"learning_rate": 3.1052295918367345e-05,
"loss": 0.8424,
"step": 3300
},
{
"epoch": 41.46,
"learning_rate": 2.9139030612244896e-05,
"loss": 0.8244,
"step": 3400
},
{
"epoch": 42.68,
"learning_rate": 2.7225765306122448e-05,
"loss": 0.8157,
"step": 3500
},
{
"epoch": 43.9,
"learning_rate": 2.53125e-05,
"loss": 0.8132,
"step": 3600
},
{
"epoch": 43.9,
"eval_loss": 0.25475597381591797,
"eval_runtime": 33.5269,
"eval_samples_per_second": 35.583,
"eval_steps_per_second": 2.237,
"eval_wer": 0.2425706338749817,
"step": 3600
},
{
"epoch": 45.12,
"learning_rate": 2.339923469387755e-05,
"loss": 0.8008,
"step": 3700
},
{
"epoch": 46.34,
"learning_rate": 2.14859693877551e-05,
"loss": 0.7863,
"step": 3800
},
{
"epoch": 47.56,
"learning_rate": 1.9572704081632652e-05,
"loss": 0.7965,
"step": 3900
},
{
"epoch": 48.78,
"learning_rate": 1.7659438775510203e-05,
"loss": 0.7935,
"step": 4000
},
{
"epoch": 48.78,
"eval_loss": 0.2637428939342499,
"eval_runtime": 33.4727,
"eval_samples_per_second": 35.641,
"eval_steps_per_second": 2.241,
"eval_wer": 0.23525106133801785,
"step": 4000
},
{
"epoch": 50.0,
"learning_rate": 1.5746173469387754e-05,
"loss": 0.7857,
"step": 4100
},
{
"epoch": 51.22,
"learning_rate": 1.3832908163265304e-05,
"loss": 0.7834,
"step": 4200
},
{
"epoch": 52.44,
"learning_rate": 1.1919642857142855e-05,
"loss": 0.7732,
"step": 4300
},
{
"epoch": 53.66,
"learning_rate": 1.0006377551020406e-05,
"loss": 0.7565,
"step": 4400
},
{
"epoch": 53.66,
"eval_loss": 0.2629481256008148,
"eval_runtime": 33.9009,
"eval_samples_per_second": 35.191,
"eval_steps_per_second": 2.212,
"eval_wer": 0.23217684087249305,
"step": 4400
},
{
"epoch": 54.88,
"learning_rate": 8.093112244897959e-06,
"loss": 0.7525,
"step": 4500
},
{
"epoch": 56.1,
"learning_rate": 6.198979591836734e-06,
"loss": 0.7716,
"step": 4600
},
{
"epoch": 57.32,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.7521,
"step": 4700
},
{
"epoch": 58.54,
"learning_rate": 2.372448979591836e-06,
"loss": 0.7359,
"step": 4800
},
{
"epoch": 58.54,
"eval_loss": 0.2578625977039337,
"eval_runtime": 33.5888,
"eval_samples_per_second": 35.518,
"eval_steps_per_second": 2.233,
"eval_wer": 0.22529644268774704,
"step": 4800
},
{
"epoch": 59.76,
"learning_rate": 4.591836734693877e-07,
"loss": 0.7476,
"step": 4900
},
{
"epoch": 60.0,
"step": 4920,
"total_flos": 1.5656877251307375e+19,
"train_loss": 1.5886853283982936,
"train_runtime": 6665.2028,
"train_samples_per_second": 23.459,
"train_steps_per_second": 0.738
}
],
"max_steps": 4920,
"num_train_epochs": 60,
"total_flos": 1.5656877251307375e+19,
"trial_name": null,
"trial_params": null
}