|
{ |
|
"best_metric": 0.1746993511915207, |
|
"best_model_checkpoint": "./checkpoint-8000", |
|
"epoch": 69.56521739130434, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.0304e-05, |
|
"loss": 6.0688, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0204000000000002e-05, |
|
"loss": 3.1889, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.0104e-05, |
|
"loss": 2.2092, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.000399999999999e-05, |
|
"loss": 1.5574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.9903999999999995e-05, |
|
"loss": 1.3719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_cer": 0.08328164981226813, |
|
"eval_loss": 0.3388712704181671, |
|
"eval_runtime": 199.6026, |
|
"eval_samples_per_second": 29.068, |
|
"eval_steps_per_second": 0.456, |
|
"eval_wer": 0.42359243003445185, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 5.9804e-05, |
|
"loss": 1.3634, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 6.970400000000001e-05, |
|
"loss": 1.3345, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 7.960399999999999e-05, |
|
"loss": 1.2407, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 8e-05, |
|
"loss": 1.172, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1361, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_cer": 0.06299695098173931, |
|
"eval_loss": 0.23091697692871094, |
|
"eval_runtime": 209.236, |
|
"eval_samples_per_second": 27.729, |
|
"eval_steps_per_second": 0.435, |
|
"eval_wer": 0.31617820567275473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0819, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0923, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0803, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0719, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0517, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_cer": 0.059732533136255095, |
|
"eval_loss": 0.21664097905158997, |
|
"eval_runtime": 198.1012, |
|
"eval_samples_per_second": 29.288, |
|
"eval_steps_per_second": 0.459, |
|
"eval_wer": 0.3056066827127283, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0523, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0414, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 8e-05, |
|
"loss": 1.031, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0126, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0118, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_cer": 0.055684506456416864, |
|
"eval_loss": 0.21414823830127716, |
|
"eval_runtime": 204.5017, |
|
"eval_samples_per_second": 28.371, |
|
"eval_steps_per_second": 0.445, |
|
"eval_wer": 0.2783991693803389, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0243, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0084, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0074, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9933, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9922, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_cer": 0.05935372696988491, |
|
"eval_loss": 0.22312845289707184, |
|
"eval_runtime": 198.4087, |
|
"eval_samples_per_second": 29.243, |
|
"eval_steps_per_second": 0.459, |
|
"eval_wer": 0.29411487092359245, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9838, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"learning_rate": 8e-05, |
|
"loss": 1.019, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9897, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9897, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9929, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_cer": 0.05865182142631663, |
|
"eval_loss": 0.21711210906505585, |
|
"eval_runtime": 199.7486, |
|
"eval_samples_per_second": 29.047, |
|
"eval_steps_per_second": 0.456, |
|
"eval_wer": 0.28915946953608, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9971, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9621, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9744, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9587, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9485, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"eval_cer": 0.05992565000538499, |
|
"eval_loss": 0.2236333191394806, |
|
"eval_runtime": 197.9762, |
|
"eval_samples_per_second": 29.307, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.29560149133984615, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 31.3, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9468, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.17, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9598, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9575, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 33.91, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9491, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9573, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"eval_cer": 0.061589426108657946, |
|
"eval_loss": 0.2313707023859024, |
|
"eval_runtime": 198.2807, |
|
"eval_samples_per_second": 29.262, |
|
"eval_steps_per_second": 0.459, |
|
"eval_wer": 0.304285242342725, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 35.65, |
|
"learning_rate": 7.8176e-05, |
|
"loss": 0.9251, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 36.52, |
|
"learning_rate": 7.627600000000001e-05, |
|
"loss": 0.9299, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 37.39, |
|
"learning_rate": 7.437600000000001e-05, |
|
"loss": 0.928, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 7.2476e-05, |
|
"loss": 0.9303, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 7.057600000000001e-05, |
|
"loss": 0.9195, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"eval_cer": 0.058024191601644466, |
|
"eval_loss": 0.21694457530975342, |
|
"eval_runtime": 198.1273, |
|
"eval_samples_per_second": 29.284, |
|
"eval_steps_per_second": 0.459, |
|
"eval_wer": 0.2812308273160602, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 6.8676e-05, |
|
"loss": 0.9182, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 40.87, |
|
"learning_rate": 6.6776e-05, |
|
"loss": 0.8885, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 41.74, |
|
"learning_rate": 6.487600000000001e-05, |
|
"loss": 0.9037, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 42.61, |
|
"learning_rate": 6.2976e-05, |
|
"loss": 0.8929, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 6.1076e-05, |
|
"loss": 0.8915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"eval_cer": 0.05601503340550457, |
|
"eval_loss": 0.21094831824302673, |
|
"eval_runtime": 198.7631, |
|
"eval_samples_per_second": 29.191, |
|
"eval_steps_per_second": 0.458, |
|
"eval_wer": 0.2779508235405163, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.35, |
|
"learning_rate": 5.9176000000000004e-05, |
|
"loss": 0.8687, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 45.22, |
|
"learning_rate": 5.7276000000000005e-05, |
|
"loss": 0.8635, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.09, |
|
"learning_rate": 5.5376e-05, |
|
"loss": 0.859, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 46.96, |
|
"learning_rate": 5.3476e-05, |
|
"loss": 0.8586, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 5.157600000000001e-05, |
|
"loss": 0.8449, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"eval_cer": 0.05136537340260782, |
|
"eval_loss": 0.20504631102085114, |
|
"eval_runtime": 198.4879, |
|
"eval_samples_per_second": 29.231, |
|
"eval_steps_per_second": 0.458, |
|
"eval_wer": 0.25338619094813347, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.7, |
|
"learning_rate": 4.9676000000000003e-05, |
|
"loss": 0.8345, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 4.7776e-05, |
|
"loss": 0.8229, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 50.43, |
|
"learning_rate": 4.5876000000000006e-05, |
|
"loss": 0.8203, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 51.3, |
|
"learning_rate": 4.397600000000001e-05, |
|
"loss": 0.8084, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"learning_rate": 4.207600000000001e-05, |
|
"loss": 0.8028, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 52.17, |
|
"eval_cer": 0.04915567076544842, |
|
"eval_loss": 0.2032497674226761, |
|
"eval_runtime": 199.0329, |
|
"eval_samples_per_second": 29.151, |
|
"eval_steps_per_second": 0.457, |
|
"eval_wer": 0.24562272877436406, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 53.04, |
|
"learning_rate": 4.0176e-05, |
|
"loss": 0.7975, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 53.91, |
|
"learning_rate": 3.8295000000000005e-05, |
|
"loss": 0.7942, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 54.78, |
|
"learning_rate": 3.6395e-05, |
|
"loss": 0.7867, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"learning_rate": 3.4495e-05, |
|
"loss": 0.787, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 56.52, |
|
"learning_rate": 3.2595e-05, |
|
"loss": 0.7881, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 56.52, |
|
"eval_cer": 0.04694596812828902, |
|
"eval_loss": 0.18896546959877014, |
|
"eval_runtime": 199.9061, |
|
"eval_samples_per_second": 29.024, |
|
"eval_steps_per_second": 0.455, |
|
"eval_wer": 0.2380008494973807, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 57.39, |
|
"learning_rate": 3.0695e-05, |
|
"loss": 0.7608, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 58.26, |
|
"learning_rate": 2.8795000000000005e-05, |
|
"loss": 0.7542, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 59.13, |
|
"learning_rate": 2.6895e-05, |
|
"loss": 0.755, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 2.4995000000000004e-05, |
|
"loss": 0.7569, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 60.87, |
|
"learning_rate": 2.3095e-05, |
|
"loss": 0.7423, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 60.87, |
|
"eval_cer": 0.04418662517129838, |
|
"eval_loss": 0.18159770965576172, |
|
"eval_runtime": 198.6364, |
|
"eval_samples_per_second": 29.209, |
|
"eval_steps_per_second": 0.458, |
|
"eval_wer": 0.22452687715323988, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 61.74, |
|
"learning_rate": 2.1195000000000006e-05, |
|
"loss": 0.7409, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 62.61, |
|
"learning_rate": 1.9295e-05, |
|
"loss": 0.7208, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 63.48, |
|
"learning_rate": 1.7395e-05, |
|
"loss": 0.7488, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 64.35, |
|
"learning_rate": 1.5495000000000003e-05, |
|
"loss": 0.7134, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"learning_rate": 1.3594999999999998e-05, |
|
"loss": 0.7248, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"eval_cer": 0.042203463476772125, |
|
"eval_loss": 0.17892056703567505, |
|
"eval_runtime": 197.8292, |
|
"eval_samples_per_second": 29.328, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.21650384633536268, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 66.09, |
|
"learning_rate": 1.1695000000000002e-05, |
|
"loss": 0.7076, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 66.96, |
|
"learning_rate": 9.794999999999999e-06, |
|
"loss": 0.7132, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 67.83, |
|
"learning_rate": 7.895000000000003e-06, |
|
"loss": 0.6972, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 68.7, |
|
"learning_rate": 5.994999999999999e-06, |
|
"loss": 0.7019, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"learning_rate": 4.095000000000005e-06, |
|
"loss": 0.6993, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"eval_cer": 0.040792224817745956, |
|
"eval_loss": 0.1746993511915207, |
|
"eval_runtime": 198.0151, |
|
"eval_samples_per_second": 29.301, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.21072254471659824, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 69.57, |
|
"step": 8000, |
|
"total_flos": 3.5433976605211066e+20, |
|
"train_loss": 1.039798891067505, |
|
"train_runtime": 49145.562, |
|
"train_samples_per_second": 20.836, |
|
"train_steps_per_second": 0.163 |
|
} |
|
], |
|
"max_steps": 8000, |
|
"num_train_epochs": 70, |
|
"total_flos": 3.5433976605211066e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|