|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.579088471849866, |
|
"eval_steps": 200, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5179, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_f1_score": 0.4513274336283186, |
|
"eval_label_f1": 0.7168141592920354, |
|
"eval_loss": 0.4356614947319031, |
|
"eval_runtime": 45.8378, |
|
"eval_samples_per_second": 2.182, |
|
"eval_steps_per_second": 0.087, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6e-05, |
|
"loss": 0.4296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8e-05, |
|
"loss": 0.4309, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1_score": 0.6751054852320675, |
|
"eval_label_f1": 0.8354430379746836, |
|
"eval_loss": 0.43058258295059204, |
|
"eval_runtime": 45.2842, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.4262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 0.4235, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_f1_score": 0.6721991701244813, |
|
"eval_label_f1": 0.8547717842323652, |
|
"eval_loss": 0.42819398641586304, |
|
"eval_runtime": 45.2307, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.951340343707852e-05, |
|
"loss": 0.423, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.890738003669029e-05, |
|
"loss": 0.4267, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_f1_score": 0.7073170731707317, |
|
"eval_label_f1": 0.845528455284553, |
|
"eval_loss": 0.4269372224807739, |
|
"eval_runtime": 45.2446, |
|
"eval_samples_per_second": 2.21, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 0.4283, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.4254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_f1_score": 0.7272727272727273, |
|
"eval_label_f1": 0.8677685950413223, |
|
"eval_loss": 0.42638763785362244, |
|
"eval_runtime": 45.2086, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.567727288213005e-05, |
|
"loss": 0.4225, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 0.4264, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_f1_score": 0.7398373983739838, |
|
"eval_label_f1": 0.8780487804878049, |
|
"eval_loss": 0.42636168003082275, |
|
"eval_runtime": 45.2052, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 9.24024048078213e-05, |
|
"loss": 0.4263, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.4206, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_f1_score": 0.7206477732793523, |
|
"eval_label_f1": 0.8582995951417003, |
|
"eval_loss": 0.4262264370918274, |
|
"eval_runtime": 45.2829, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.4244, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 8.596699001693255e-05, |
|
"loss": 0.4232, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_f1_score": 0.7410358565737052, |
|
"eval_label_f1": 0.8685258964143426, |
|
"eval_loss": 0.42596718668937683, |
|
"eval_runtime": 45.2943, |
|
"eval_samples_per_second": 2.208, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.345653031794292e-05, |
|
"loss": 0.4224, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 0.4249, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_f1_score": 0.7603305785123967, |
|
"eval_label_f1": 0.8925619834710744, |
|
"eval_loss": 0.4255012273788452, |
|
"eval_runtime": 45.1763, |
|
"eval_samples_per_second": 2.214, |
|
"eval_steps_per_second": 0.089, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 7.795964517353735e-05, |
|
"loss": 0.4231, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.4239, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_f1_score": 0.7630522088353413, |
|
"eval_label_f1": 0.8835341365461847, |
|
"eval_loss": 0.42558813095092773, |
|
"eval_runtime": 45.2083, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 0.4222, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.873032967079561e-05, |
|
"loss": 0.4213, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_f1_score": 0.7692307692307692, |
|
"eval_label_f1": 0.8987854251012146, |
|
"eval_loss": 0.42547106742858887, |
|
"eval_runtime": 45.1934, |
|
"eval_samples_per_second": 2.213, |
|
"eval_steps_per_second": 0.089, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.4219, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 0.4213, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_f1_score": 0.7768595041322314, |
|
"eval_label_f1": 0.8925619834710744, |
|
"eval_loss": 0.4256010055541992, |
|
"eval_runtime": 45.2023, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.4206, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.522642316338268e-05, |
|
"loss": 0.4244, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_f1_score": 0.7710843373493976, |
|
"eval_label_f1": 0.899598393574297, |
|
"eval_loss": 0.42528876662254333, |
|
"eval_runtime": 45.2, |
|
"eval_samples_per_second": 2.212, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 0.4234, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.825502516487497e-05, |
|
"loss": 0.4234, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1_score": 0.7385892116182572, |
|
"eval_label_f1": 0.8796680497925312, |
|
"eval_loss": 0.42544686794281006, |
|
"eval_runtime": 45.4613, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.477357683661734e-05, |
|
"loss": 0.4227, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.4222, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_f1_score": 0.7916666666666667, |
|
"eval_label_f1": 0.9, |
|
"eval_loss": 0.4252234399318695, |
|
"eval_runtime": 45.4771, |
|
"eval_samples_per_second": 2.199, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 3.790390522001662e-05, |
|
"loss": 0.4233, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.4239, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"eval_f1_score": 0.7800829875518673, |
|
"eval_label_f1": 0.896265560165975, |
|
"eval_loss": 0.42535117268562317, |
|
"eval_runtime": 45.3979, |
|
"eval_samples_per_second": 2.203, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 0.4197, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 2.8081442660546125e-05, |
|
"loss": 0.4201, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_f1_score": 0.7949790794979079, |
|
"eval_label_f1": 0.8953974895397491, |
|
"eval_loss": 0.4253609776496887, |
|
"eval_runtime": 45.5098, |
|
"eval_samples_per_second": 2.197, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.4226, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 0.4194, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_f1_score": 0.7851239669421488, |
|
"eval_label_f1": 0.9008264462809917, |
|
"eval_loss": 0.4253368377685547, |
|
"eval_runtime": 45.4648, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 1.9216926233717085e-05, |
|
"loss": 0.4218, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.6543469682057106e-05, |
|
"loss": 0.4203, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_f1_score": 0.7933884297520661, |
|
"eval_label_f1": 0.9090909090909092, |
|
"eval_loss": 0.42517000436782837, |
|
"eval_runtime": 45.4234, |
|
"eval_samples_per_second": 2.202, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 0.4215, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 0.4214, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_f1_score": 0.8049792531120332, |
|
"eval_label_f1": 0.9045643153526971, |
|
"eval_loss": 0.4253119230270386, |
|
"eval_runtime": 45.4883, |
|
"eval_samples_per_second": 2.198, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.4202, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 0.4206, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_f1_score": 0.8, |
|
"eval_label_f1": 0.9, |
|
"eval_loss": 0.4253150224685669, |
|
"eval_runtime": 45.4534, |
|
"eval_samples_per_second": 2.2, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 5.852620357053651e-06, |
|
"loss": 0.423, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 4.322727117869951e-06, |
|
"loss": 0.4205, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"eval_f1_score": 0.8049792531120332, |
|
"eval_label_f1": 0.9128630705394191, |
|
"eval_loss": 0.4252975285053253, |
|
"eval_runtime": 45.4095, |
|
"eval_samples_per_second": 2.202, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.4214, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.9369152030840556e-06, |
|
"loss": 0.4207, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_f1_score": 0.7950819672131147, |
|
"eval_label_f1": 0.9016393442622952, |
|
"eval_loss": 0.4253052771091461, |
|
"eval_runtime": 45.4187, |
|
"eval_samples_per_second": 2.202, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1.0926199633097157e-06, |
|
"loss": 0.4219, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 0.4218, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"eval_f1_score": 0.7983539094650206, |
|
"eval_label_f1": 0.897119341563786, |
|
"eval_loss": 0.4253281354904175, |
|
"eval_runtime": 45.4281, |
|
"eval_samples_per_second": 2.201, |
|
"eval_steps_per_second": 0.088, |
|
"eval_wer": 0.059877955758962625, |
|
"step": 4800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 200, |
|
"total_flos": 1.31511649068167e+21, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|