|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 11085, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.831924793466908e-05, |
|
"loss": 18.2448, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3216891288757324, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.2541, |
|
"eval_samples_per_second": 24.814, |
|
"eval_steps_per_second": 3.171, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.130187066755295e-05, |
|
"loss": 3.4894, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.335986375808716, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.2646, |
|
"eval_samples_per_second": 24.778, |
|
"eval_steps_per_second": 3.166, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.428449340043682e-05, |
|
"loss": 3.4869, |
|
"step": 2217 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.368795394897461, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.3486, |
|
"eval_samples_per_second": 24.494, |
|
"eval_steps_per_second": 3.13, |
|
"step": 2217 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 7.726711613332067e-05, |
|
"loss": 3.4836, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.357470750808716, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.3992, |
|
"eval_samples_per_second": 24.327, |
|
"eval_steps_per_second": 3.108, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.024973886620455e-05, |
|
"loss": 3.4864, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.36539888381958, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.5815, |
|
"eval_samples_per_second": 23.742, |
|
"eval_steps_per_second": 3.034, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.323236159908841e-05, |
|
"loss": 3.4849, |
|
"step": 4434 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3517842292785645, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.7896, |
|
"eval_samples_per_second": 23.108, |
|
"eval_steps_per_second": 2.953, |
|
"step": 4434 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 5.6214984331972276e-05, |
|
"loss": 3.484, |
|
"step": 5173 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3279595375061035, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.3916, |
|
"eval_samples_per_second": 24.352, |
|
"eval_steps_per_second": 3.112, |
|
"step": 5173 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.9197607064856144e-05, |
|
"loss": 3.4854, |
|
"step": 5912 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.361226797103882, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.4066, |
|
"eval_samples_per_second": 24.303, |
|
"eval_steps_per_second": 3.105, |
|
"step": 5912 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.2180229797740006e-05, |
|
"loss": 3.4807, |
|
"step": 6651 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3284316062927246, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.4013, |
|
"eval_samples_per_second": 24.32, |
|
"eval_steps_per_second": 3.108, |
|
"step": 6651 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.5162852530623875e-05, |
|
"loss": 3.4854, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.360283851623535, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.389, |
|
"eval_samples_per_second": 24.361, |
|
"eval_steps_per_second": 3.113, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 2.8145475263507744e-05, |
|
"loss": 3.4854, |
|
"step": 8129 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3530423641204834, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.4543, |
|
"eval_samples_per_second": 24.147, |
|
"eval_steps_per_second": 3.085, |
|
"step": 8129 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.112809799639161e-05, |
|
"loss": 3.4855, |
|
"step": 8868 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3396615982055664, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.3505, |
|
"eval_samples_per_second": 24.488, |
|
"eval_steps_per_second": 3.129, |
|
"step": 8868 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.4110720729275472e-05, |
|
"loss": 3.4813, |
|
"step": 9607 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.352962017059326, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.5315, |
|
"eval_samples_per_second": 23.9, |
|
"eval_steps_per_second": 3.054, |
|
"step": 9607 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 7.093343462159339e-06, |
|
"loss": 3.4785, |
|
"step": 10346 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.345838785171509, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.0543, |
|
"eval_samples_per_second": 25.516, |
|
"eval_steps_per_second": 3.26, |
|
"step": 10346 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 7.596619504320577e-08, |
|
"loss": 3.4798, |
|
"step": 11085 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.345510482788086, |
|
"eval_new_wer": 1.0, |
|
"eval_old_wer": 1.0, |
|
"eval_runtime": 7.0297, |
|
"eval_samples_per_second": 25.606, |
|
"eval_steps_per_second": 3.272, |
|
"step": 11085 |
|
} |
|
], |
|
"max_steps": 11085, |
|
"num_train_epochs": 15, |
|
"total_flos": 8.649191629868698e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|