|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 7440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 13.4856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 12.348929405212402, |
|
"eval_runtime": 128.1609, |
|
"eval_samples_per_second": 14.388, |
|
"eval_steps_per_second": 1.802, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.6344086021505376e-05, |
|
"loss": 4.9759, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 3.1773388385772705, |
|
"eval_runtime": 128.7202, |
|
"eval_samples_per_second": 14.326, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.978494623655914e-05, |
|
"loss": 2.9905, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 2.9219164848327637, |
|
"eval_runtime": 129.5213, |
|
"eval_samples_per_second": 14.237, |
|
"eval_steps_per_second": 1.783, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 5.32258064516129e-05, |
|
"loss": 2.8774, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.8899402618408203, |
|
"eval_runtime": 129.0058, |
|
"eval_samples_per_second": 14.294, |
|
"eval_steps_per_second": 1.791, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 2.832, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 2.787729501724243, |
|
"eval_runtime": 129.1257, |
|
"eval_samples_per_second": 14.281, |
|
"eval_steps_per_second": 1.789, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.010752688172043e-05, |
|
"loss": 2.6921, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 2.3222157955169678, |
|
"eval_runtime": 129.5689, |
|
"eval_samples_per_second": 14.232, |
|
"eval_steps_per_second": 1.783, |
|
"eval_wer": 1.020324445273168, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 9.35483870967742e-05, |
|
"loss": 1.7609, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 0.7123318910598755, |
|
"eval_runtime": 129.8973, |
|
"eval_samples_per_second": 14.196, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.7778295730001865, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0829, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.5219887495040894, |
|
"eval_runtime": 130.0287, |
|
"eval_samples_per_second": 14.181, |
|
"eval_steps_per_second": 1.777, |
|
"eval_wer": 0.6609173969793026, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8991, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_loss": 0.4353589415550232, |
|
"eval_runtime": 129.1246, |
|
"eval_samples_per_second": 14.281, |
|
"eval_steps_per_second": 1.789, |
|
"eval_wer": 0.5778482192802535, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7943, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 0.409180223941803, |
|
"eval_runtime": 129.6166, |
|
"eval_samples_per_second": 14.227, |
|
"eval_steps_per_second": 1.782, |
|
"eval_wer": 0.5485735595748649, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7319, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 0.37231260538101196, |
|
"eval_runtime": 129.5968, |
|
"eval_samples_per_second": 14.229, |
|
"eval_steps_per_second": 1.782, |
|
"eval_wer": 0.5166884206600784, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6875, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"eval_loss": 0.3546667993068695, |
|
"eval_runtime": 129.8844, |
|
"eval_samples_per_second": 14.197, |
|
"eval_steps_per_second": 1.779, |
|
"eval_wer": 0.5021443222077195, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6538, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"eval_loss": 0.3382996916770935, |
|
"eval_runtime": 130.4091, |
|
"eval_samples_per_second": 14.14, |
|
"eval_steps_per_second": 1.771, |
|
"eval_wer": 0.5028901734104047, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6385, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"eval_loss": 0.33781182765960693, |
|
"eval_runtime": 129.3767, |
|
"eval_samples_per_second": 14.253, |
|
"eval_steps_per_second": 1.785, |
|
"eval_wer": 0.4781838523214619, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6031, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"eval_loss": 0.33670100569725037, |
|
"eval_runtime": 130.1665, |
|
"eval_samples_per_second": 14.166, |
|
"eval_steps_per_second": 1.775, |
|
"eval_wer": 0.47445459630803655, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5778, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.3219880163669586, |
|
"eval_runtime": 130.1154, |
|
"eval_samples_per_second": 14.172, |
|
"eval_steps_per_second": 1.775, |
|
"eval_wer": 0.4559015476412456, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5531, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 0.3436262309551239, |
|
"eval_runtime": 130.5395, |
|
"eval_samples_per_second": 14.126, |
|
"eval_steps_per_second": 1.77, |
|
"eval_wer": 0.4655043818758158, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5506, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"eval_loss": 0.32741010189056396, |
|
"eval_runtime": 130.5027, |
|
"eval_samples_per_second": 14.13, |
|
"eval_steps_per_second": 1.77, |
|
"eval_wer": 0.4504008950214432, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5186, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"eval_loss": 0.3106687664985657, |
|
"eval_runtime": 131.9389, |
|
"eval_samples_per_second": 13.976, |
|
"eval_steps_per_second": 1.751, |
|
"eval_wer": 0.44368823419727765, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5065, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_loss": 0.3165331482887268, |
|
"eval_runtime": 130.835, |
|
"eval_samples_per_second": 14.094, |
|
"eval_steps_per_second": 1.766, |
|
"eval_wer": 0.44005220958418795, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5016, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"eval_loss": 0.3141399919986725, |
|
"eval_runtime": 131.1616, |
|
"eval_samples_per_second": 14.059, |
|
"eval_steps_per_second": 1.761, |
|
"eval_wer": 0.440798060786873, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4878, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"eval_loss": 0.323138564825058, |
|
"eval_runtime": 129.6671, |
|
"eval_samples_per_second": 14.221, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.4408912921872086, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4701, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"eval_loss": 0.314998984336853, |
|
"eval_runtime": 130.2103, |
|
"eval_samples_per_second": 14.162, |
|
"eval_steps_per_second": 1.774, |
|
"eval_wer": 0.441823606190565, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4643, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_loss": 0.32002875208854675, |
|
"eval_runtime": 129.7219, |
|
"eval_samples_per_second": 14.215, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.43641618497109824, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4623, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"eval_loss": 0.3018810749053955, |
|
"eval_runtime": 127.3741, |
|
"eval_samples_per_second": 14.477, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.42187208651873953, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4497, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"eval_loss": 0.3108297288417816, |
|
"eval_runtime": 127.9529, |
|
"eval_samples_per_second": 14.412, |
|
"eval_steps_per_second": 1.805, |
|
"eval_wer": 0.43054260674995337, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4439, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"eval_loss": 0.31016433238983154, |
|
"eval_runtime": 127.0701, |
|
"eval_samples_per_second": 14.512, |
|
"eval_steps_per_second": 1.818, |
|
"eval_wer": 0.42429610292746595, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4289, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"eval_loss": 0.29791948199272156, |
|
"eval_runtime": 128.4647, |
|
"eval_samples_per_second": 14.354, |
|
"eval_steps_per_second": 1.798, |
|
"eval_wer": 0.41935483870967744, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.423, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"eval_loss": 0.3108172118663788, |
|
"eval_runtime": 128.2048, |
|
"eval_samples_per_second": 14.383, |
|
"eval_steps_per_second": 1.802, |
|
"eval_wer": 0.41832929330598545, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4214, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"eval_loss": 0.3556348383426666, |
|
"eval_runtime": 128.3694, |
|
"eval_samples_per_second": 14.365, |
|
"eval_steps_per_second": 1.799, |
|
"eval_wer": 0.42000745851202687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4139, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.3197577893733978, |
|
"eval_runtime": 128.1255, |
|
"eval_samples_per_second": 14.392, |
|
"eval_steps_per_second": 1.803, |
|
"eval_wer": 0.42243147492075334, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4015, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_loss": 0.29826977849006653, |
|
"eval_runtime": 128.9516, |
|
"eval_samples_per_second": 14.3, |
|
"eval_steps_per_second": 1.791, |
|
"eval_wer": 0.4151594256945739, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4044, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"eval_loss": 0.28930172324180603, |
|
"eval_runtime": 128.281, |
|
"eval_samples_per_second": 14.375, |
|
"eval_steps_per_second": 1.801, |
|
"eval_wer": 0.4065821368636957, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4013, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 27.42, |
|
"eval_loss": 0.32470086216926575, |
|
"eval_runtime": 128.6984, |
|
"eval_samples_per_second": 14.328, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.4160917396979303, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3959, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.23, |
|
"eval_loss": 0.32055872678756714, |
|
"eval_runtime": 128.9252, |
|
"eval_samples_per_second": 14.303, |
|
"eval_steps_per_second": 1.792, |
|
"eval_wer": 0.4156255826962521, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3923, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"eval_loss": 0.32669639587402344, |
|
"eval_runtime": 129.4219, |
|
"eval_samples_per_second": 14.248, |
|
"eval_steps_per_second": 1.785, |
|
"eval_wer": 0.41432034309155324, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3789, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 29.84, |
|
"eval_loss": 0.31051748991012573, |
|
"eval_runtime": 129.0113, |
|
"eval_samples_per_second": 14.293, |
|
"eval_steps_per_second": 1.791, |
|
"eval_wer": 0.4084467648704084, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"learning_rate": 9.795698924731184e-05, |
|
"loss": 0.371, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 30.65, |
|
"eval_loss": 0.31601375341415405, |
|
"eval_runtime": 130.3347, |
|
"eval_samples_per_second": 14.148, |
|
"eval_steps_per_second": 1.772, |
|
"eval_wer": 0.41031139287712104, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"learning_rate": 9.526881720430108e-05, |
|
"loss": 0.3813, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 31.45, |
|
"eval_loss": 0.32537344098091125, |
|
"eval_runtime": 129.4643, |
|
"eval_samples_per_second": 14.243, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.4071415252657095, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 9.258064516129033e-05, |
|
"loss": 0.3575, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"eval_loss": 0.35630208253860474, |
|
"eval_runtime": 129.9641, |
|
"eval_samples_per_second": 14.189, |
|
"eval_steps_per_second": 1.777, |
|
"eval_wer": 0.40928584747342905, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 33.06, |
|
"learning_rate": 8.989247311827958e-05, |
|
"loss": 0.3642, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 33.06, |
|
"eval_loss": 0.29261836409568787, |
|
"eval_runtime": 129.3284, |
|
"eval_samples_per_second": 14.258, |
|
"eval_steps_per_second": 1.786, |
|
"eval_wer": 0.4012679470445646, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"learning_rate": 8.720430107526883e-05, |
|
"loss": 0.3494, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"eval_loss": 0.3040522634983063, |
|
"eval_runtime": 129.9364, |
|
"eval_samples_per_second": 14.192, |
|
"eval_steps_per_second": 1.778, |
|
"eval_wer": 0.39912362483684505, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 34.68, |
|
"learning_rate": 8.451612903225808e-05, |
|
"loss": 0.3446, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 34.68, |
|
"eval_loss": 0.30367347598075867, |
|
"eval_runtime": 129.7515, |
|
"eval_samples_per_second": 14.212, |
|
"eval_steps_per_second": 1.78, |
|
"eval_wer": 0.3964199142271117, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"learning_rate": 8.182795698924732e-05, |
|
"loss": 0.3458, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"eval_loss": 0.3123793303966522, |
|
"eval_runtime": 129.7166, |
|
"eval_samples_per_second": 14.216, |
|
"eval_steps_per_second": 1.781, |
|
"eval_wer": 0.3993100876375163, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"learning_rate": 7.913978494623657e-05, |
|
"loss": 0.3418, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"eval_loss": 0.33982059359550476, |
|
"eval_runtime": 130.2572, |
|
"eval_samples_per_second": 14.157, |
|
"eval_steps_per_second": 1.773, |
|
"eval_wer": 0.40052209584187953, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"learning_rate": 7.647849462365591e-05, |
|
"loss": 0.3424, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"eval_loss": 0.3201097548007965, |
|
"eval_runtime": 130.7802, |
|
"eval_samples_per_second": 14.1, |
|
"eval_steps_per_second": 1.766, |
|
"eval_wer": 0.4044378146559761, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"learning_rate": 7.379032258064516e-05, |
|
"loss": 0.3256, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 37.9, |
|
"eval_loss": 0.31234973669052124, |
|
"eval_runtime": 129.793, |
|
"eval_samples_per_second": 14.207, |
|
"eval_steps_per_second": 1.78, |
|
"eval_wer": 0.39912362483684505, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 7.110215053763441e-05, |
|
"loss": 0.3183, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"eval_loss": 0.3197832405567169, |
|
"eval_runtime": 131.4284, |
|
"eval_samples_per_second": 14.03, |
|
"eval_steps_per_second": 1.758, |
|
"eval_wer": 0.39707253402946113, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 6.841397849462365e-05, |
|
"loss": 0.3189, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"eval_loss": 0.32954466342926025, |
|
"eval_runtime": 135.8464, |
|
"eval_samples_per_second": 13.574, |
|
"eval_steps_per_second": 1.7, |
|
"eval_wer": 0.4004288644415439, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"learning_rate": 6.57258064516129e-05, |
|
"loss": 0.3227, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 40.32, |
|
"eval_loss": 0.32881081104278564, |
|
"eval_runtime": 135.8344, |
|
"eval_samples_per_second": 13.575, |
|
"eval_steps_per_second": 1.701, |
|
"eval_wer": 0.39604698862576915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"learning_rate": 6.303763440860215e-05, |
|
"loss": 0.3007, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 41.13, |
|
"eval_loss": 0.3239193856716156, |
|
"eval_runtime": 131.4702, |
|
"eval_samples_per_second": 14.026, |
|
"eval_steps_per_second": 1.757, |
|
"eval_wer": 0.39558083162409097, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"learning_rate": 6.0349462365591405e-05, |
|
"loss": 0.3013, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_loss": 0.32908666133880615, |
|
"eval_runtime": 131.5819, |
|
"eval_samples_per_second": 14.014, |
|
"eval_steps_per_second": 1.756, |
|
"eval_wer": 0.39464851762073466, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"learning_rate": 5.7661290322580655e-05, |
|
"loss": 0.3067, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 42.74, |
|
"eval_loss": 0.3361692726612091, |
|
"eval_runtime": 131.6013, |
|
"eval_samples_per_second": 14.012, |
|
"eval_steps_per_second": 1.755, |
|
"eval_wer": 0.3944620548200634, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 43.55, |
|
"learning_rate": 5.497311827956989e-05, |
|
"loss": 0.3047, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 43.55, |
|
"eval_loss": 0.32479631900787354, |
|
"eval_runtime": 131.6497, |
|
"eval_samples_per_second": 14.007, |
|
"eval_steps_per_second": 1.755, |
|
"eval_wer": 0.39297035241469325, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 5.228494623655914e-05, |
|
"loss": 0.2939, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"eval_loss": 0.3141416013240814, |
|
"eval_runtime": 131.4484, |
|
"eval_samples_per_second": 14.028, |
|
"eval_steps_per_second": 1.757, |
|
"eval_wer": 0.392783889614022, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 4.959677419354839e-05, |
|
"loss": 0.2795, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"eval_loss": 0.3212459981441498, |
|
"eval_runtime": 132.0759, |
|
"eval_samples_per_second": 13.962, |
|
"eval_steps_per_second": 1.749, |
|
"eval_wer": 0.392783889614022, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"learning_rate": 4.690860215053764e-05, |
|
"loss": 0.295, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_loss": 0.33260008692741394, |
|
"eval_runtime": 131.9753, |
|
"eval_samples_per_second": 13.972, |
|
"eval_steps_per_second": 1.75, |
|
"eval_wer": 0.39091926160730933, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"learning_rate": 4.4220430107526885e-05, |
|
"loss": 0.2819, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 46.77, |
|
"eval_loss": 0.3328990936279297, |
|
"eval_runtime": 132.6165, |
|
"eval_samples_per_second": 13.905, |
|
"eval_steps_per_second": 1.742, |
|
"eval_wer": 0.39408912921872086, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 47.58, |
|
"learning_rate": 4.1532258064516135e-05, |
|
"loss": 0.2881, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 47.58, |
|
"eval_loss": 0.32989582419395447, |
|
"eval_runtime": 132.6745, |
|
"eval_samples_per_second": 13.899, |
|
"eval_steps_per_second": 1.741, |
|
"eval_wer": 0.392783889614022, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 3.884408602150538e-05, |
|
"loss": 0.2826, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"eval_loss": 0.33995482325553894, |
|
"eval_runtime": 132.1443, |
|
"eval_samples_per_second": 13.954, |
|
"eval_steps_per_second": 1.748, |
|
"eval_wer": 0.3911057244079806, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"learning_rate": 3.615591397849463e-05, |
|
"loss": 0.2741, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 49.19, |
|
"eval_loss": 0.3233118951320648, |
|
"eval_runtime": 132.6042, |
|
"eval_samples_per_second": 13.906, |
|
"eval_steps_per_second": 1.742, |
|
"eval_wer": 0.38784262539623343, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.346774193548387e-05, |
|
"loss": 0.2621, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.31878435611724854, |
|
"eval_runtime": 133.6242, |
|
"eval_samples_per_second": 13.8, |
|
"eval_steps_per_second": 1.729, |
|
"eval_wer": 0.38905463360059667, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 50.81, |
|
"learning_rate": 3.077956989247312e-05, |
|
"loss": 0.2743, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 50.81, |
|
"eval_loss": 0.32761555910110474, |
|
"eval_runtime": 133.7291, |
|
"eval_samples_per_second": 13.789, |
|
"eval_steps_per_second": 1.727, |
|
"eval_wer": 0.3868170799925415, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 2.8091397849462365e-05, |
|
"loss": 0.272, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"eval_loss": 0.3271888196468353, |
|
"eval_runtime": 133.5583, |
|
"eval_samples_per_second": 13.807, |
|
"eval_steps_per_second": 1.73, |
|
"eval_wer": 0.38551184038784264, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 52.42, |
|
"learning_rate": 2.5403225806451615e-05, |
|
"loss": 0.2717, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 52.42, |
|
"eval_loss": 0.32686877250671387, |
|
"eval_runtime": 133.4336, |
|
"eval_samples_per_second": 13.82, |
|
"eval_steps_per_second": 1.731, |
|
"eval_wer": 0.38299459257878055, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 53.23, |
|
"learning_rate": 2.271505376344086e-05, |
|
"loss": 0.261, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 53.23, |
|
"eval_loss": 0.3300979435443878, |
|
"eval_runtime": 133.7969, |
|
"eval_samples_per_second": 13.782, |
|
"eval_steps_per_second": 1.726, |
|
"eval_wer": 0.3838336751818012, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 54.03, |
|
"learning_rate": 2.002688172043011e-05, |
|
"loss": 0.2499, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 54.03, |
|
"eval_loss": 0.3309214413166046, |
|
"eval_runtime": 133.9684, |
|
"eval_samples_per_second": 13.764, |
|
"eval_steps_per_second": 1.724, |
|
"eval_wer": 0.38560507178817827, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 54.84, |
|
"learning_rate": 1.733870967741936e-05, |
|
"loss": 0.2617, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 54.84, |
|
"eval_loss": 0.32907187938690186, |
|
"eval_runtime": 135.3114, |
|
"eval_samples_per_second": 13.628, |
|
"eval_steps_per_second": 1.707, |
|
"eval_wer": 0.3820622785754242, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"learning_rate": 1.4650537634408603e-05, |
|
"loss": 0.2628, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"eval_loss": 0.32681843638420105, |
|
"eval_runtime": 135.4461, |
|
"eval_samples_per_second": 13.614, |
|
"eval_steps_per_second": 1.705, |
|
"eval_wer": 0.38085027037106095, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"learning_rate": 1.196236559139785e-05, |
|
"loss": 0.2528, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 56.45, |
|
"eval_loss": 0.32666248083114624, |
|
"eval_runtime": 134.5769, |
|
"eval_samples_per_second": 13.702, |
|
"eval_steps_per_second": 1.716, |
|
"eval_wer": 0.3807570389707253, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 57.26, |
|
"learning_rate": 9.274193548387097e-06, |
|
"loss": 0.2475, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 57.26, |
|
"eval_loss": 0.32469746470451355, |
|
"eval_runtime": 135.31, |
|
"eval_samples_per_second": 13.628, |
|
"eval_steps_per_second": 1.707, |
|
"eval_wer": 0.37963826216669777, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 6.586021505376344e-06, |
|
"loss": 0.2545, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"eval_loss": 0.32185879349708557, |
|
"eval_runtime": 135.4299, |
|
"eval_samples_per_second": 13.616, |
|
"eval_steps_per_second": 1.706, |
|
"eval_wer": 0.3789856423643483, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 58.87, |
|
"learning_rate": 3.8978494623655915e-06, |
|
"loss": 0.2571, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 58.87, |
|
"eval_loss": 0.32077959179878235, |
|
"eval_runtime": 135.1873, |
|
"eval_samples_per_second": 13.64, |
|
"eval_steps_per_second": 1.709, |
|
"eval_wer": 0.37889241096401266, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"learning_rate": 1.2096774193548388e-06, |
|
"loss": 0.2546, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 59.68, |
|
"eval_loss": 0.3218778669834137, |
|
"eval_runtime": 134.9556, |
|
"eval_samples_per_second": 13.664, |
|
"eval_steps_per_second": 1.712, |
|
"eval_wer": 0.37945179936602647, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 7440, |
|
"total_flos": 2.7374813261347353e+19, |
|
"train_loss": 0.0985529641951284, |
|
"train_runtime": 13747.1961, |
|
"train_samples_per_second": 17.305, |
|
"train_steps_per_second": 0.541 |
|
} |
|
], |
|
"max_steps": 7440, |
|
"num_train_epochs": 60, |
|
"total_flos": 2.7374813261347353e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|