|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 3280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 9.6805, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 9.3474, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3e-06, |
|
"loss": 8.6058, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 6.288315296173096, |
|
"eval_runtime": 21.6292, |
|
"eval_samples_per_second": 121.318, |
|
"eval_steps_per_second": 7.582, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 6.5168, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-06, |
|
"loss": 5.178, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 6e-06, |
|
"loss": 4.6616, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 4.045103549957275, |
|
"eval_runtime": 21.856, |
|
"eval_samples_per_second": 120.059, |
|
"eval_steps_per_second": 7.504, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7e-06, |
|
"loss": 4.2963, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 4.0823, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9e-06, |
|
"loss": 3.8526, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 3.5416758060455322, |
|
"eval_runtime": 21.8041, |
|
"eval_samples_per_second": 120.344, |
|
"eval_steps_per_second": 7.522, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1e-05, |
|
"loss": 3.7408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 3.5235, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.2e-05, |
|
"loss": 3.3617, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 3.2384, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 3.0504775047302246, |
|
"eval_runtime": 22.0029, |
|
"eval_samples_per_second": 119.257, |
|
"eval_steps_per_second": 7.454, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4e-05, |
|
"loss": 3.0665, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 2.9046, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.7948, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 2.670631170272827, |
|
"eval_runtime": 21.7032, |
|
"eval_samples_per_second": 120.904, |
|
"eval_steps_per_second": 7.556, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.7e-05, |
|
"loss": 2.6959, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.6047, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.9e-05, |
|
"loss": 2.549, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 2.426814317703247, |
|
"eval_runtime": 21.9773, |
|
"eval_samples_per_second": 119.396, |
|
"eval_steps_per_second": 7.462, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 2e-05, |
|
"loss": 2.4746, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.9561403508771933e-05, |
|
"loss": 2.353, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.912280701754386e-05, |
|
"loss": 2.1808, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.986131055853677, |
|
"eval_loss": 1.8554401397705078, |
|
"eval_runtime": 21.7818, |
|
"eval_samples_per_second": 120.467, |
|
"eval_steps_per_second": 7.529, |
|
"eval_wer": 0.9742561721882254, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.868421052631579e-05, |
|
"loss": 2.028, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 1.824561403508772e-05, |
|
"loss": 1.8932, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 1.780701754385965e-05, |
|
"loss": 1.7557, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 1.6069, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.6230821555437986, |
|
"eval_loss": 1.2551288604736328, |
|
"eval_runtime": 21.5405, |
|
"eval_samples_per_second": 121.817, |
|
"eval_steps_per_second": 7.614, |
|
"eval_wer": 0.6822114370120278, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.692982456140351e-05, |
|
"loss": 1.4826, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 1.649122807017544e-05, |
|
"loss": 1.3003, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 1.605263157894737e-05, |
|
"loss": 1.1916, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.22420829869246467, |
|
"eval_loss": 0.7985360026359558, |
|
"eval_runtime": 22.1233, |
|
"eval_samples_per_second": 118.608, |
|
"eval_steps_per_second": 7.413, |
|
"eval_wer": 0.3679397903917845, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 1.56140350877193e-05, |
|
"loss": 1.1239, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 1.517543859649123e-05, |
|
"loss": 1.0425, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 0.9977, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.18267704633058726, |
|
"eval_loss": 0.6234104633331299, |
|
"eval_runtime": 21.4356, |
|
"eval_samples_per_second": 122.413, |
|
"eval_steps_per_second": 7.651, |
|
"eval_wer": 0.31180980516283324, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 1.429824561403509e-05, |
|
"loss": 0.9625, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 1.385964912280702e-05, |
|
"loss": 0.9316, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 1.3421052631578948e-05, |
|
"loss": 0.8949, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 1.2982456140350879e-05, |
|
"loss": 0.836, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.16431108759730934, |
|
"eval_loss": 0.510288417339325, |
|
"eval_runtime": 21.4424, |
|
"eval_samples_per_second": 122.374, |
|
"eval_steps_per_second": 7.648, |
|
"eval_wer": 0.2800872195259197, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 1.2543859649122808e-05, |
|
"loss": 0.821, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 0.7918, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.7515, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.1549391580379412, |
|
"eval_loss": 0.43052738904953003, |
|
"eval_runtime": 21.9755, |
|
"eval_samples_per_second": 119.406, |
|
"eval_steps_per_second": 7.463, |
|
"eval_wer": 0.26630090736442286, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 1.1228070175438597e-05, |
|
"loss": 0.7517, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.0789473684210528e-05, |
|
"loss": 0.7086, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.0350877192982459e-05, |
|
"loss": 0.7045, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.14133474416143904, |
|
"eval_loss": 0.3688310384750366, |
|
"eval_runtime": 21.9834, |
|
"eval_samples_per_second": 119.363, |
|
"eval_steps_per_second": 7.46, |
|
"eval_wer": 0.24892734050784274, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 9.912280701754386e-06, |
|
"loss": 0.6799, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.6578, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 9.035087719298246e-06, |
|
"loss": 0.6533, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.1339656866450004, |
|
"eval_loss": 0.3257971704006195, |
|
"eval_runtime": 21.7706, |
|
"eval_samples_per_second": 120.529, |
|
"eval_steps_per_second": 7.533, |
|
"eval_wer": 0.2398536962791025, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 8.596491228070176e-06, |
|
"loss": 0.6267, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 8.157894736842106e-06, |
|
"loss": 0.6625, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 7.719298245614036e-06, |
|
"loss": 0.6162, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 7.280701754385966e-06, |
|
"loss": 0.5906, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.1287884513642204, |
|
"eval_loss": 0.29405683279037476, |
|
"eval_runtime": 21.5389, |
|
"eval_samples_per_second": 121.826, |
|
"eval_steps_per_second": 7.614, |
|
"eval_wer": 0.23183512696068087, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.6021, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 6.403508771929825e-06, |
|
"loss": 0.5722, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 5.964912280701755e-06, |
|
"loss": 0.5746, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.12780591036202857, |
|
"eval_loss": 0.2748357355594635, |
|
"eval_runtime": 21.719, |
|
"eval_samples_per_second": 120.816, |
|
"eval_steps_per_second": 7.551, |
|
"eval_wer": 0.23000633044946192, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 5.526315789473685e-06, |
|
"loss": 0.559, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 16.46, |
|
"learning_rate": 5.087719298245615e-06, |
|
"loss": 0.5676, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 4.649122807017544e-06, |
|
"loss": 0.5169, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.12417806666162799, |
|
"eval_loss": 0.25731268525123596, |
|
"eval_runtime": 21.7044, |
|
"eval_samples_per_second": 120.897, |
|
"eval_steps_per_second": 7.556, |
|
"eval_wer": 0.224027572624323, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.5603, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 3.7719298245614037e-06, |
|
"loss": 0.5362, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.5187, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 2.8947368421052634e-06, |
|
"loss": 0.5511, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.12277983523543194, |
|
"eval_loss": 0.24789051711559296, |
|
"eval_runtime": 21.566, |
|
"eval_samples_per_second": 121.673, |
|
"eval_steps_per_second": 7.605, |
|
"eval_wer": 0.22107336287543083, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 18.29, |
|
"learning_rate": 2.456140350877193e-06, |
|
"loss": 0.541, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 2.017543859649123e-06, |
|
"loss": 0.5095, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 1.5789473684210526e-06, |
|
"loss": 0.5318, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.12100370342377749, |
|
"eval_loss": 0.2409585565328598, |
|
"eval_runtime": 21.8589, |
|
"eval_samples_per_second": 120.043, |
|
"eval_steps_per_second": 7.503, |
|
"eval_wer": 0.21861152141802068, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 1.1403508771929824e-06, |
|
"loss": 0.5156, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 7.017543859649123e-07, |
|
"loss": 0.4971, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 2.6315789473684213e-07, |
|
"loss": 0.5174, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.12100370342377749, |
|
"eval_loss": 0.23930250108242035, |
|
"eval_runtime": 21.6599, |
|
"eval_samples_per_second": 121.146, |
|
"eval_steps_per_second": 7.572, |
|
"eval_wer": 0.21868185974537527, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 3280, |
|
"total_flos": 1.8383341927882616e+18, |
|
"train_loss": 1.9778692466456715, |
|
"train_runtime": 7257.7038, |
|
"train_samples_per_second": 28.924, |
|
"train_steps_per_second": 0.452 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3280, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.8383341927882616e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|