|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 11300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.82e-05, |
|
"loss": 10.5082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001182, |
|
"loss": 3.6378, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00017819999999999997, |
|
"loss": 3.2141, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.0002382, |
|
"loss": 3.1644, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.0928, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_loss": 3.0804343223571777, |
|
"eval_runtime": 51.9529, |
|
"eval_samples_per_second": 32.78, |
|
"eval_steps_per_second": 1.039, |
|
"eval_wer": 1.0072811059907834, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.00029730555555555554, |
|
"loss": 2.7805, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.0002945277777777777, |
|
"loss": 1.9414, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00029174999999999996, |
|
"loss": 1.6707, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002889722222222222, |
|
"loss": 1.5232, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0002861944444444444, |
|
"loss": 1.4505, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.9038452506065369, |
|
"eval_runtime": 51.3716, |
|
"eval_samples_per_second": 33.151, |
|
"eval_steps_per_second": 1.051, |
|
"eval_wer": 0.7329953917050691, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.0002834166666666666, |
|
"loss": 1.3908, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.00028063888888888886, |
|
"loss": 1.3335, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 0.0002778611111111111, |
|
"loss": 1.2725, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.0002750833333333333, |
|
"loss": 1.2043, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.00027230555555555553, |
|
"loss": 1.2207, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"eval_loss": 0.7374930381774902, |
|
"eval_runtime": 51.4675, |
|
"eval_samples_per_second": 33.089, |
|
"eval_steps_per_second": 1.049, |
|
"eval_wer": 0.604516129032258, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"learning_rate": 0.00026952777777777777, |
|
"loss": 1.1794, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 0.00026674999999999995, |
|
"loss": 1.1502, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.0002639722222222222, |
|
"loss": 1.1261, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.00026119444444444443, |
|
"loss": 1.1082, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.0002584166666666666, |
|
"loss": 1.0695, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"eval_loss": 0.7119461894035339, |
|
"eval_runtime": 51.1988, |
|
"eval_samples_per_second": 33.263, |
|
"eval_steps_per_second": 1.055, |
|
"eval_wer": 0.544147465437788, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 0.0002556388888888889, |
|
"loss": 1.064, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 0.0002528611111111111, |
|
"loss": 1.0327, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 0.0002500833333333333, |
|
"loss": 1.0353, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 0.00024730555555555557, |
|
"loss": 1.0108, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 0.00024452777777777776, |
|
"loss": 1.0104, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"eval_loss": 0.6069046258926392, |
|
"eval_runtime": 51.5986, |
|
"eval_samples_per_second": 33.005, |
|
"eval_steps_per_second": 1.047, |
|
"eval_wer": 0.5295852534562212, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 0.00024174999999999997, |
|
"loss": 0.9806, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"learning_rate": 0.0002389722222222222, |
|
"loss": 0.9712, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 0.00023619444444444442, |
|
"loss": 0.9703, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.66, |
|
"learning_rate": 0.00023341666666666663, |
|
"loss": 0.9298, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 0.00023063888888888887, |
|
"loss": 0.9299, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 0.6168299913406372, |
|
"eval_runtime": 51.1346, |
|
"eval_samples_per_second": 33.304, |
|
"eval_steps_per_second": 1.056, |
|
"eval_wer": 0.5206451612903226, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.43, |
|
"learning_rate": 0.00022786111111111108, |
|
"loss": 0.9159, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 28.32, |
|
"learning_rate": 0.00022508333333333332, |
|
"loss": 0.922, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 0.00022230555555555553, |
|
"loss": 0.8775, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 30.09, |
|
"learning_rate": 0.00021952777777777774, |
|
"loss": 0.889, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 0.00021674999999999998, |
|
"loss": 0.8588, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_loss": 0.6382256150245667, |
|
"eval_runtime": 50.9107, |
|
"eval_samples_per_second": 33.451, |
|
"eval_steps_per_second": 1.061, |
|
"eval_wer": 0.5170506912442396, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.0002139722222222222, |
|
"loss": 0.865, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.74, |
|
"learning_rate": 0.0002111944444444444, |
|
"loss": 0.8679, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"learning_rate": 0.00020841666666666665, |
|
"loss": 0.8034, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 0.00020563888888888886, |
|
"loss": 0.7997, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 0.0002028611111111111, |
|
"loss": 0.7942, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"eval_loss": 0.6048025488853455, |
|
"eval_runtime": 50.6633, |
|
"eval_samples_per_second": 33.614, |
|
"eval_steps_per_second": 1.066, |
|
"eval_wer": 0.49880184331797234, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.28, |
|
"learning_rate": 0.00020011111111111108, |
|
"loss": 0.796, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 37.17, |
|
"learning_rate": 0.00019733333333333332, |
|
"loss": 0.8013, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 38.05, |
|
"learning_rate": 0.00019455555555555554, |
|
"loss": 0.7807, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 38.94, |
|
"learning_rate": 0.00019177777777777777, |
|
"loss": 0.7579, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 0.00018899999999999999, |
|
"loss": 0.7808, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"eval_loss": 0.6730400919914246, |
|
"eval_runtime": 50.5688, |
|
"eval_samples_per_second": 33.677, |
|
"eval_steps_per_second": 1.068, |
|
"eval_wer": 0.5083870967741936, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.71, |
|
"learning_rate": 0.0001862222222222222, |
|
"loss": 0.7556, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 0.00018344444444444444, |
|
"loss": 0.767, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 42.48, |
|
"learning_rate": 0.00018066666666666665, |
|
"loss": 0.7401, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 43.36, |
|
"learning_rate": 0.00017788888888888886, |
|
"loss": 0.7413, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"learning_rate": 0.0001751111111111111, |
|
"loss": 0.743, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.25, |
|
"eval_loss": 0.6749109625816345, |
|
"eval_runtime": 50.7919, |
|
"eval_samples_per_second": 33.529, |
|
"eval_steps_per_second": 1.063, |
|
"eval_wer": 0.5011981566820276, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 45.13, |
|
"learning_rate": 0.0001723333333333333, |
|
"loss": 0.6921, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 46.02, |
|
"learning_rate": 0.00016955555555555555, |
|
"loss": 0.7032, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.9, |
|
"learning_rate": 0.00016677777777777776, |
|
"loss": 0.688, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 47.79, |
|
"learning_rate": 0.00016399999999999997, |
|
"loss": 0.6842, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"learning_rate": 0.00016122222222222221, |
|
"loss": 0.6652, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.67, |
|
"eval_loss": 0.6491430401802063, |
|
"eval_runtime": 50.5445, |
|
"eval_samples_per_second": 33.693, |
|
"eval_steps_per_second": 1.068, |
|
"eval_wer": 0.4735483870967742, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 49.56, |
|
"learning_rate": 0.00015844444444444443, |
|
"loss": 0.6798, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 50.44, |
|
"learning_rate": 0.00015566666666666664, |
|
"loss": 0.6639, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 51.33, |
|
"learning_rate": 0.00015288888888888888, |
|
"loss": 0.6691, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 52.21, |
|
"learning_rate": 0.0001501111111111111, |
|
"loss": 0.6275, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 53.1, |
|
"learning_rate": 0.00014733333333333333, |
|
"loss": 0.6386, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.1, |
|
"eval_loss": 0.6927830576896667, |
|
"eval_runtime": 50.5958, |
|
"eval_samples_per_second": 33.659, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 0.49539170506912444, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.98, |
|
"learning_rate": 0.00014455555555555554, |
|
"loss": 0.6288, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 54.87, |
|
"learning_rate": 0.00014177777777777778, |
|
"loss": 0.6247, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 55.75, |
|
"learning_rate": 0.00013902777777777777, |
|
"loss": 0.6245, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 56.64, |
|
"learning_rate": 0.00013624999999999998, |
|
"loss": 0.606, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 57.52, |
|
"learning_rate": 0.00013347222222222222, |
|
"loss": 0.5945, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 57.52, |
|
"eval_loss": 0.6358979344367981, |
|
"eval_runtime": 51.0247, |
|
"eval_samples_per_second": 33.376, |
|
"eval_steps_per_second": 1.058, |
|
"eval_wer": 0.479815668202765, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 58.41, |
|
"learning_rate": 0.0001307222222222222, |
|
"loss": 0.6067, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 59.29, |
|
"learning_rate": 0.00012794444444444442, |
|
"loss": 0.5917, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 60.18, |
|
"learning_rate": 0.00012516666666666666, |
|
"loss": 0.5729, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 61.06, |
|
"learning_rate": 0.0001223888888888889, |
|
"loss": 0.5759, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 61.95, |
|
"learning_rate": 0.0001196111111111111, |
|
"loss": 0.5561, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 61.95, |
|
"eval_loss": 0.6409346461296082, |
|
"eval_runtime": 51.2819, |
|
"eval_samples_per_second": 33.209, |
|
"eval_steps_per_second": 1.053, |
|
"eval_wer": 0.4799078341013825, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 62.83, |
|
"learning_rate": 0.00011683333333333332, |
|
"loss": 0.5496, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 63.72, |
|
"learning_rate": 0.00011405555555555554, |
|
"loss": 0.5541, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 64.6, |
|
"learning_rate": 0.00011127777777777777, |
|
"loss": 0.555, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 65.49, |
|
"learning_rate": 0.0001085, |
|
"loss": 0.5393, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"learning_rate": 0.0001057222222222222, |
|
"loss": 0.5464, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 66.37, |
|
"eval_loss": 0.6451661586761475, |
|
"eval_runtime": 50.5882, |
|
"eval_samples_per_second": 33.664, |
|
"eval_steps_per_second": 1.067, |
|
"eval_wer": 0.46912442396313364, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 67.26, |
|
"learning_rate": 0.00010294444444444443, |
|
"loss": 0.5308, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 68.14, |
|
"learning_rate": 0.00010016666666666666, |
|
"loss": 0.5304, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 69.03, |
|
"learning_rate": 9.738888888888888e-05, |
|
"loss": 0.5278, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 69.91, |
|
"learning_rate": 9.46111111111111e-05, |
|
"loss": 0.5112, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 9.183333333333332e-05, |
|
"loss": 0.5119, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_loss": 0.6376333832740784, |
|
"eval_runtime": 50.3564, |
|
"eval_samples_per_second": 33.819, |
|
"eval_steps_per_second": 1.072, |
|
"eval_wer": 0.4657142857142857, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 71.68, |
|
"learning_rate": 8.905555555555555e-05, |
|
"loss": 0.4974, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 72.57, |
|
"learning_rate": 8.627777777777776e-05, |
|
"loss": 0.4986, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 73.45, |
|
"learning_rate": 8.349999999999998e-05, |
|
"loss": 0.4752, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 74.34, |
|
"learning_rate": 8.072222222222222e-05, |
|
"loss": 0.4947, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 75.22, |
|
"learning_rate": 7.797222222222222e-05, |
|
"loss": 0.474, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 75.22, |
|
"eval_loss": 0.6540603041648865, |
|
"eval_runtime": 50.3966, |
|
"eval_samples_per_second": 33.792, |
|
"eval_steps_per_second": 1.072, |
|
"eval_wer": 0.46995391705069123, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 76.11, |
|
"learning_rate": 7.519444444444445e-05, |
|
"loss": 0.4724, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"learning_rate": 7.241666666666666e-05, |
|
"loss": 0.4668, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"learning_rate": 6.963888888888889e-05, |
|
"loss": 0.46, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 78.76, |
|
"learning_rate": 6.68611111111111e-05, |
|
"loss": 0.45, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 79.65, |
|
"learning_rate": 6.408333333333332e-05, |
|
"loss": 0.45, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 79.65, |
|
"eval_loss": 0.637355625629425, |
|
"eval_runtime": 50.4224, |
|
"eval_samples_per_second": 33.775, |
|
"eval_steps_per_second": 1.071, |
|
"eval_wer": 0.45714285714285713, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 80.53, |
|
"learning_rate": 6.130555555555555e-05, |
|
"loss": 0.4282, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 81.42, |
|
"learning_rate": 5.8527777777777774e-05, |
|
"loss": 0.4455, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 82.3, |
|
"learning_rate": 5.574999999999999e-05, |
|
"loss": 0.4423, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 83.19, |
|
"learning_rate": 5.297222222222222e-05, |
|
"loss": 0.4262, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"learning_rate": 5.019444444444444e-05, |
|
"loss": 0.4315, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"eval_loss": 0.6568067073822021, |
|
"eval_runtime": 50.7772, |
|
"eval_samples_per_second": 33.539, |
|
"eval_steps_per_second": 1.063, |
|
"eval_wer": 0.46248847926267284, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"learning_rate": 4.741666666666666e-05, |
|
"loss": 0.4069, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 85.84, |
|
"learning_rate": 4.463888888888888e-05, |
|
"loss": 0.416, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 86.73, |
|
"learning_rate": 4.186111111111111e-05, |
|
"loss": 0.4106, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 87.61, |
|
"learning_rate": 3.9083333333333326e-05, |
|
"loss": 0.4025, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"learning_rate": 3.630555555555555e-05, |
|
"loss": 0.3967, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 88.5, |
|
"eval_loss": 0.6636261343955994, |
|
"eval_runtime": 50.7873, |
|
"eval_samples_per_second": 33.532, |
|
"eval_steps_per_second": 1.063, |
|
"eval_wer": 0.46046082949308753, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 89.38, |
|
"learning_rate": 3.352777777777777e-05, |
|
"loss": 0.3857, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 90.27, |
|
"learning_rate": 3.0749999999999995e-05, |
|
"loss": 0.3991, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 91.15, |
|
"learning_rate": 2.7972222222222217e-05, |
|
"loss": 0.3899, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 92.04, |
|
"learning_rate": 2.519444444444444e-05, |
|
"loss": 0.3965, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"learning_rate": 2.2416666666666665e-05, |
|
"loss": 0.3937, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 92.92, |
|
"eval_loss": 0.6537252068519592, |
|
"eval_runtime": 49.8207, |
|
"eval_samples_per_second": 34.183, |
|
"eval_steps_per_second": 1.084, |
|
"eval_wer": 0.4597235023041475, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 93.81, |
|
"learning_rate": 1.9638888888888887e-05, |
|
"loss": 0.3808, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 94.69, |
|
"learning_rate": 1.686111111111111e-05, |
|
"loss": 0.3701, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 95.58, |
|
"learning_rate": 1.4083333333333331e-05, |
|
"loss": 0.3746, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 96.46, |
|
"learning_rate": 1.1305555555555553e-05, |
|
"loss": 0.3647, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 97.35, |
|
"learning_rate": 8.527777777777777e-06, |
|
"loss": 0.3788, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 97.35, |
|
"eval_loss": 0.6614137887954712, |
|
"eval_runtime": 50.2299, |
|
"eval_samples_per_second": 33.904, |
|
"eval_steps_per_second": 1.075, |
|
"eval_wer": 0.45889400921658985, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 98.23, |
|
"learning_rate": 5.749999999999999e-06, |
|
"loss": 0.3666, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 99.12, |
|
"learning_rate": 2.972222222222222e-06, |
|
"loss": 0.3701, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 1.9444444444444442e-07, |
|
"loss": 0.3755, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 11300, |
|
"total_flos": 4.438624660507792e+19, |
|
"train_loss": 0.9130245208740234, |
|
"train_runtime": 17767.5681, |
|
"train_samples_per_second": 20.267, |
|
"train_steps_per_second": 0.636 |
|
} |
|
], |
|
"max_steps": 11300, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.438624660507792e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|