|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8596961572832886, |
|
"eval_steps": 200, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5179, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1_score": 0.4513274336283186, |
|
"eval_label_f1": 0.7168141592920354, |
|
"eval_loss": 0.4356614947319031, |
|
"eval_runtime": 45.8378, |
|
"eval_samples_per_second": 2.182, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4309, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1_score": 0.6751054852320675, |
|
"eval_label_f1": 0.8354430379746836, |
|
"eval_loss": 0.43058258295059204, |
|
"eval_runtime": 45.2842, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.4235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_f1_score": 0.6721991701244813, |
|
"eval_label_f1": 0.8547717842323652, |
|
"eval_loss": 0.42819398641586304, |
|
"eval_runtime": 45.2307, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.423, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.4267, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_f1_score": 0.7073170731707317, |
|
"eval_label_f1": 0.845528455284553, |
|
"eval_loss": 0.4269372224807739, |
|
"eval_runtime": 45.2446, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.4283, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.4254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_f1_score": 0.7272727272727273, |
|
"eval_label_f1": 0.8677685950413223, |
|
"eval_loss": 0.42638763785362244, |
|
"eval_runtime": 45.2086, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.4225, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.4264, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_f1_score": 0.7398373983739838, |
|
"eval_label_f1": 0.8780487804878049, |
|
"eval_loss": 0.42636168003082275, |
|
"eval_runtime": 45.2052, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 0.4263, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.4206, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_f1_score": 0.7206477732793523, |
|
"eval_label_f1": 0.8582995951417003, |
|
"eval_loss": 0.4262264370918274, |
|
"eval_runtime": 45.2829, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.4244, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.4232, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_f1_score": 0.7410358565737052, |
|
"eval_label_f1": 0.8685258964143426, |
|
"eval_loss": 0.42596718668937683, |
|
"eval_runtime": 45.2943, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 200, |
|
"total_flos": 4.383735905718576e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|