|
{ |
|
"best_metric": 0.2248678207397461, |
|
"best_model_checkpoint": "./checkpoint-1600", |
|
"epoch": 61.53333333333333, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.980500000000001e-05, |
|
"loss": 4.9923, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.156188488006592, |
|
"eval_runtime": 15.7049, |
|
"eval_samples_per_second": 21.331, |
|
"eval_steps_per_second": 0.382, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 8e-05, |
|
"loss": 2.1775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_cer": 0.11219018715225089, |
|
"eval_loss": 0.43336454033851624, |
|
"eval_runtime": 15.2789, |
|
"eval_samples_per_second": 21.926, |
|
"eval_steps_per_second": 0.393, |
|
"eval_wer": 0.5804059328649492, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 8e-05, |
|
"loss": 1.3708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"eval_cer": 0.07966616084977238, |
|
"eval_loss": 0.3105751574039459, |
|
"eval_runtime": 15.048, |
|
"eval_samples_per_second": 22.262, |
|
"eval_steps_per_second": 0.399, |
|
"eval_wer": 0.4336455893832943, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2266, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"eval_cer": 0.06732422862923622, |
|
"eval_loss": 0.26751142740249634, |
|
"eval_runtime": 15.232, |
|
"eval_samples_per_second": 21.993, |
|
"eval_steps_per_second": 0.394, |
|
"eval_wer": 0.3672911787665886, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 8e-05, |
|
"loss": 1.093, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_cer": 0.06327769347496207, |
|
"eval_loss": 0.24162611365318298, |
|
"eval_runtime": 14.995, |
|
"eval_samples_per_second": 22.341, |
|
"eval_steps_per_second": 0.4, |
|
"eval_wer": 0.35011709601873536, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 8e-05, |
|
"loss": 0.989, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"eval_cer": 0.06105209914011128, |
|
"eval_loss": 0.23200440406799316, |
|
"eval_runtime": 15.3525, |
|
"eval_samples_per_second": 21.821, |
|
"eval_steps_per_second": 0.391, |
|
"eval_wer": 0.3251366120218579, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9518, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"eval_cer": 0.05842185128983308, |
|
"eval_loss": 0.2413272261619568, |
|
"eval_runtime": 15.2385, |
|
"eval_samples_per_second": 21.984, |
|
"eval_steps_per_second": 0.394, |
|
"eval_wer": 0.3192818110850898, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 30.76, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9075, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 30.76, |
|
"eval_cer": 0.05933232169954476, |
|
"eval_loss": 0.23544833064079285, |
|
"eval_runtime": 15.1938, |
|
"eval_samples_per_second": 22.049, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 0.3200624512099922, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 7.059500000000001e-05, |
|
"loss": 0.878, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"eval_cer": 0.057916034395548814, |
|
"eval_loss": 0.22777308523654938, |
|
"eval_runtime": 14.9728, |
|
"eval_samples_per_second": 22.374, |
|
"eval_steps_per_second": 0.401, |
|
"eval_wer": 0.3126463700234192, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 6.109500000000001e-05, |
|
"loss": 0.8563, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_cer": 0.054779969650986346, |
|
"eval_loss": 0.2326740324497223, |
|
"eval_runtime": 15.1749, |
|
"eval_samples_per_second": 22.076, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 0.2962529274004684, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 42.3, |
|
"learning_rate": 5.169000000000001e-05, |
|
"loss": 0.8084, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 42.3, |
|
"eval_cer": 0.05407182599898837, |
|
"eval_loss": 0.22712552547454834, |
|
"eval_runtime": 15.3083, |
|
"eval_samples_per_second": 21.884, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 0.2923497267759563, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 4.219000000000001e-05, |
|
"loss": 0.7845, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"eval_cer": 0.053667172483560954, |
|
"eval_loss": 0.23326420783996582, |
|
"eval_runtime": 15.1559, |
|
"eval_samples_per_second": 22.104, |
|
"eval_steps_per_second": 0.396, |
|
"eval_wer": 0.29508196721311475, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 3.269000000000001e-05, |
|
"loss": 0.7487, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"eval_cer": 0.052453211937278706, |
|
"eval_loss": 0.22895006835460663, |
|
"eval_runtime": 15.373, |
|
"eval_samples_per_second": 21.791, |
|
"eval_steps_per_second": 0.39, |
|
"eval_wer": 0.2888368462138954, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"learning_rate": 2.319e-05, |
|
"loss": 0.7182, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"eval_cer": 0.05346484572584724, |
|
"eval_loss": 0.23406584560871124, |
|
"eval_runtime": 15.2056, |
|
"eval_samples_per_second": 22.031, |
|
"eval_steps_per_second": 0.395, |
|
"eval_wer": 0.28766588602654175, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 1.369e-05, |
|
"loss": 0.7095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_cer": 0.05154274152756702, |
|
"eval_loss": 0.22908572852611542, |
|
"eval_runtime": 15.2684, |
|
"eval_samples_per_second": 21.941, |
|
"eval_steps_per_second": 0.393, |
|
"eval_wer": 0.2818110850897736, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.53, |
|
"learning_rate": 4.190000000000005e-06, |
|
"loss": 0.6953, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 61.53, |
|
"eval_cer": 0.05078401618614062, |
|
"eval_loss": 0.2248678207397461, |
|
"eval_runtime": 15.2139, |
|
"eval_samples_per_second": 22.019, |
|
"eval_steps_per_second": 0.394, |
|
"eval_wer": 0.2782982045277127, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 61.53, |
|
"step": 1600, |
|
"total_flos": 1.3126730002882698e+20, |
|
"train_loss": 1.2442097234725953, |
|
"train_runtime": 17605.3989, |
|
"train_samples_per_second": 11.633, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"max_steps": 1600, |
|
"num_train_epochs": 62, |
|
"total_flos": 1.3126730002882698e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|