|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"global_step": 26299, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.923951481044907e-05, |
|
"loss": 0.0205, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_EG_f1": 0.9960332310455804, |
|
"eval_ET_f1": 0.9988771614641814, |
|
"eval_TE_f1": 0.9952833720146739, |
|
"eval_loss": 0.0021067976485937834, |
|
"eval_overall_accuracy": 0.9995180550845189, |
|
"eval_overall_f1": 0.9967312922623949, |
|
"eval_overall_precision": 0.9966566866267466, |
|
"eval_overall_recall": 0.9968059090682237, |
|
"eval_runtime": 16.2281, |
|
"eval_samples_per_second": 411.569, |
|
"eval_steps_per_second": 25.758, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8479029620898135e-05, |
|
"loss": 0.0027, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_EG_f1": 0.9955841628620612, |
|
"eval_ET_f1": 0.9992513849378649, |
|
"eval_TE_f1": 0.995433789954338, |
|
"eval_loss": 0.0024818568490445614, |
|
"eval_overall_accuracy": 0.9993931064027275, |
|
"eval_overall_f1": 0.9967563251659264, |
|
"eval_overall_precision": 0.9966568534504266, |
|
"eval_overall_recall": 0.9968558167390328, |
|
"eval_runtime": 16.4446, |
|
"eval_samples_per_second": 406.152, |
|
"eval_steps_per_second": 25.419, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.77185444313472e-05, |
|
"loss": 0.0018, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_EG_f1": 0.9987274496594056, |
|
"eval_ET_f1": 0.999700553975146, |
|
"eval_TE_f1": 0.9982783142450782, |
|
"eval_loss": 0.0007141608512029052, |
|
"eval_overall_accuracy": 0.9998393516948396, |
|
"eval_overall_f1": 0.9989020860365306, |
|
"eval_overall_precision": 0.9988522381356355, |
|
"eval_overall_recall": 0.9989519389130109, |
|
"eval_runtime": 16.3629, |
|
"eval_samples_per_second": 408.181, |
|
"eval_steps_per_second": 25.546, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.695805924179627e-05, |
|
"loss": 0.0015, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_EG_f1": 0.9991765850737331, |
|
"eval_ET_f1": 0.9992513849378649, |
|
"eval_TE_f1": 0.9983530468633028, |
|
"eval_loss": 0.0010974227916449308, |
|
"eval_overall_accuracy": 0.9997858022597862, |
|
"eval_overall_f1": 0.9989270118527761, |
|
"eval_overall_precision": 0.9989020860365306, |
|
"eval_overall_recall": 0.9989519389130109, |
|
"eval_runtime": 16.7722, |
|
"eval_samples_per_second": 398.219, |
|
"eval_steps_per_second": 24.922, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.6197574052245332e-05, |
|
"loss": 0.0005, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_EG_f1": 0.9982030548068285, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9979038778260219, |
|
"eval_loss": 0.0007138435612432659, |
|
"eval_overall_accuracy": 0.9998125769773129, |
|
"eval_overall_f1": 0.998652425633859, |
|
"eval_overall_precision": 0.998702271025705, |
|
"eval_overall_recall": 0.9986025852173479, |
|
"eval_runtime": 16.5358, |
|
"eval_samples_per_second": 403.912, |
|
"eval_steps_per_second": 25.279, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.54370888626944e-05, |
|
"loss": 0.0008, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_EG_f1": 0.9983530468633028, |
|
"eval_ET_f1": 0.999550830962719, |
|
"eval_TE_f1": 0.9979038778260219, |
|
"eval_loss": 0.0009068112121894956, |
|
"eval_overall_accuracy": 0.9997947271656284, |
|
"eval_overall_f1": 0.9986025852173479, |
|
"eval_overall_precision": 0.9986025852173479, |
|
"eval_overall_recall": 0.9986025852173479, |
|
"eval_runtime": 16.3597, |
|
"eval_samples_per_second": 408.259, |
|
"eval_steps_per_second": 25.551, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4676603673143466e-05, |
|
"loss": 0.0009, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_EG_f1": 0.9989519389130109, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9988022159005839, |
|
"eval_loss": 0.0010577181819826365, |
|
"eval_overall_accuracy": 0.9998482766006819, |
|
"eval_overall_f1": 0.999201477267056, |
|
"eval_overall_precision": 0.999201477267056, |
|
"eval_overall_recall": 0.999201477267056, |
|
"eval_runtime": 16.3447, |
|
"eval_samples_per_second": 408.634, |
|
"eval_steps_per_second": 25.574, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.3916118483592534e-05, |
|
"loss": 0.0005, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_EG_f1": 0.9991765850737331, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9991765850737331, |
|
"eval_loss": 0.0005780701176263392, |
|
"eval_overall_accuracy": 0.9998839762240508, |
|
"eval_overall_f1": 0.9994011378381076, |
|
"eval_overall_precision": 0.9993512650331853, |
|
"eval_overall_recall": 0.999451015621101, |
|
"eval_runtime": 16.6712, |
|
"eval_samples_per_second": 400.631, |
|
"eval_steps_per_second": 25.073, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.31556332940416e-05, |
|
"loss": 0.0002, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_EG_f1": 0.9988771614641814, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9985777378546299, |
|
"eval_loss": 0.0005696099251508713, |
|
"eval_overall_accuracy": 0.999892901129893, |
|
"eval_overall_f1": 0.9991017067571614, |
|
"eval_overall_precision": 0.9990518488946554, |
|
"eval_overall_recall": 0.999151569596247, |
|
"eval_runtime": 16.4448, |
|
"eval_samples_per_second": 406.147, |
|
"eval_steps_per_second": 25.418, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.2395148104490666e-05, |
|
"loss": 0.0005, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_EG_f1": 0.9989519389130109, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9988771614641814, |
|
"eval_loss": 0.0004689108463935554, |
|
"eval_overall_accuracy": 0.999892901129893, |
|
"eval_overall_f1": 0.9992264504054896, |
|
"eval_overall_precision": 0.9992015171174768, |
|
"eval_overall_recall": 0.9992513849378649, |
|
"eval_runtime": 16.3326, |
|
"eval_samples_per_second": 408.938, |
|
"eval_steps_per_second": 25.593, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1634662914939731e-05, |
|
"loss": 0.0002, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_EG_f1": 0.9988771614641814, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9990268732689572, |
|
"eval_loss": 0.0005850127199664712, |
|
"eval_overall_accuracy": 0.9999286007532621, |
|
"eval_overall_f1": 0.9993013274777922, |
|
"eval_overall_precision": 0.9992514596536753, |
|
"eval_overall_recall": 0.999351200279483, |
|
"eval_runtime": 16.4437, |
|
"eval_samples_per_second": 406.175, |
|
"eval_steps_per_second": 25.42, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.08741777253888e-05, |
|
"loss": 0.0008, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_EG_f1": 0.9992513849378649, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.0004198316019028425, |
|
"eval_overall_accuracy": 0.9999464505649466, |
|
"eval_overall_f1": 0.99950092329191, |
|
"eval_overall_precision": 0.99950092329191, |
|
"eval_overall_recall": 0.99950092329191, |
|
"eval_runtime": 16.4892, |
|
"eval_samples_per_second": 405.052, |
|
"eval_steps_per_second": 25.35, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.0113692535837865e-05, |
|
"loss": 0.0002, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_EG_f1": 0.9991016619254379, |
|
"eval_ET_f1": 0.999700553975146, |
|
"eval_TE_f1": 0.9989519389130109, |
|
"eval_loss": 0.0011158857960253954, |
|
"eval_overall_accuracy": 0.9998482766006819, |
|
"eval_overall_f1": 0.9992513849378649, |
|
"eval_overall_precision": 0.9992513849378649, |
|
"eval_overall_recall": 0.9992513849378649, |
|
"eval_runtime": 16.7537, |
|
"eval_samples_per_second": 398.659, |
|
"eval_steps_per_second": 24.95, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 9.353207346286931e-06, |
|
"loss": 0.0003, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_EG_f1": 0.9988022159005839, |
|
"eval_ET_f1": 0.999850276987573, |
|
"eval_TE_f1": 0.9986524928881569, |
|
"eval_loss": 0.0010174426715821028, |
|
"eval_overall_accuracy": 0.9998393516948396, |
|
"eval_overall_f1": 0.9991016619254379, |
|
"eval_overall_precision": 0.9991016619254379, |
|
"eval_overall_recall": 0.9991016619254379, |
|
"eval_runtime": 16.5225, |
|
"eval_samples_per_second": 404.237, |
|
"eval_steps_per_second": 25.299, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.592722156735998e-06, |
|
"loss": 0.0, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_EG_f1": 0.9992513849378649, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.001382750691846013, |
|
"eval_overall_accuracy": 0.9999018260357353, |
|
"eval_overall_f1": 0.99950092329191, |
|
"eval_overall_precision": 0.99950092329191, |
|
"eval_overall_recall": 0.99950092329191, |
|
"eval_runtime": 16.3741, |
|
"eval_samples_per_second": 407.899, |
|
"eval_steps_per_second": 25.528, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 7.832236967185065e-06, |
|
"loss": 0.0002, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_EG_f1": 0.9990268732689572, |
|
"eval_ET_f1": 0.9997754322928364, |
|
"eval_TE_f1": 0.9991016619254379, |
|
"eval_loss": 0.0008425627020187676, |
|
"eval_overall_accuracy": 0.9998839762240508, |
|
"eval_overall_f1": 0.9993013274777922, |
|
"eval_overall_precision": 0.9992514596536753, |
|
"eval_overall_recall": 0.999351200279483, |
|
"eval_runtime": 16.4088, |
|
"eval_samples_per_second": 407.037, |
|
"eval_steps_per_second": 25.474, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 7.071751777634132e-06, |
|
"loss": 0.0001, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 0.999700553975146, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.000884207256603986, |
|
"eval_overall_accuracy": 0.9999196758474198, |
|
"eval_overall_f1": 0.999451015621101, |
|
"eval_overall_precision": 0.999451015621101, |
|
"eval_overall_recall": 0.999451015621101, |
|
"eval_runtime": 16.6905, |
|
"eval_samples_per_second": 400.169, |
|
"eval_steps_per_second": 25.044, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 6.311266588083198e-06, |
|
"loss": 0.0005, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_EG_f1": 0.9992513849378649, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.0005574871320277452, |
|
"eval_overall_accuracy": 0.9999375256591043, |
|
"eval_overall_f1": 0.99950092329191, |
|
"eval_overall_precision": 0.99950092329191, |
|
"eval_overall_recall": 0.99950092329191, |
|
"eval_runtime": 16.5015, |
|
"eval_samples_per_second": 404.752, |
|
"eval_steps_per_second": 25.331, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.5507813985322644e-06, |
|
"loss": 0.0, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_EG_f1": 0.9989519389130109, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9989519389130109, |
|
"eval_loss": 0.0005674651474691927, |
|
"eval_overall_accuracy": 0.9999018260357353, |
|
"eval_overall_f1": 0.999301292608674, |
|
"eval_overall_precision": 0.999301292608674, |
|
"eval_overall_recall": 0.999301292608674, |
|
"eval_runtime": 16.4647, |
|
"eval_samples_per_second": 405.657, |
|
"eval_steps_per_second": 25.388, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.79029620898133e-06, |
|
"loss": 0.0001, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_EG_f1": 0.9992513849378649, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.0006091786199249327, |
|
"eval_overall_accuracy": 0.9999375256591043, |
|
"eval_overall_f1": 0.99950092329191, |
|
"eval_overall_precision": 0.99950092329191, |
|
"eval_overall_recall": 0.99950092329191, |
|
"eval_runtime": 16.4349, |
|
"eval_samples_per_second": 406.391, |
|
"eval_steps_per_second": 25.434, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.029811019430397e-06, |
|
"loss": 0.0, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9994011079502919, |
|
"eval_loss": 0.0006266526179388165, |
|
"eval_overall_accuracy": 0.9999553754707888, |
|
"eval_overall_f1": 0.9996007386335279, |
|
"eval_overall_precision": 0.9996007386335279, |
|
"eval_overall_recall": 0.9996007386335279, |
|
"eval_runtime": 16.6877, |
|
"eval_samples_per_second": 400.236, |
|
"eval_steps_per_second": 25.048, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3.2693258298794635e-06, |
|
"loss": 0.0, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_EG_f1": 0.9992513849378649, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9992513849378649, |
|
"eval_loss": 0.0006635423633269966, |
|
"eval_overall_accuracy": 0.9999375256591043, |
|
"eval_overall_f1": 0.99950092329191, |
|
"eval_overall_precision": 0.99950092329191, |
|
"eval_overall_recall": 0.99950092329191, |
|
"eval_runtime": 16.716, |
|
"eval_samples_per_second": 399.557, |
|
"eval_steps_per_second": 25.006, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 2.50884064032853e-06, |
|
"loss": 0.0, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9994011079502919, |
|
"eval_loss": 0.0006512191030196846, |
|
"eval_overall_accuracy": 0.9999464505649466, |
|
"eval_overall_f1": 0.9996007386335279, |
|
"eval_overall_precision": 0.9996007386335279, |
|
"eval_overall_recall": 0.9996007386335279, |
|
"eval_runtime": 16.3818, |
|
"eval_samples_per_second": 407.709, |
|
"eval_steps_per_second": 25.516, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.7483554507775962e-06, |
|
"loss": 0.0, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9994011079502919, |
|
"eval_loss": 0.000729710329324007, |
|
"eval_overall_accuracy": 0.9999464505649466, |
|
"eval_overall_f1": 0.9996007386335279, |
|
"eval_overall_precision": 0.9996007386335279, |
|
"eval_overall_recall": 0.9996007386335279, |
|
"eval_runtime": 16.5641, |
|
"eval_samples_per_second": 403.222, |
|
"eval_steps_per_second": 25.235, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 9.878702612266627e-07, |
|
"loss": 0.0001, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9994011079502919, |
|
"eval_loss": 0.0005905419238843024, |
|
"eval_overall_accuracy": 0.9999464505649466, |
|
"eval_overall_f1": 0.9996007386335279, |
|
"eval_overall_precision": 0.9996007386335279, |
|
"eval_overall_recall": 0.9996007386335279, |
|
"eval_runtime": 16.5058, |
|
"eval_samples_per_second": 404.647, |
|
"eval_steps_per_second": 25.325, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 2.2738507167572915e-07, |
|
"loss": 0.0001, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"eval_EG_f1": 0.9994011079502919, |
|
"eval_ET_f1": 1.0, |
|
"eval_TE_f1": 0.9994011079502919, |
|
"eval_loss": 0.0006466144695878029, |
|
"eval_overall_accuracy": 0.9999464505649466, |
|
"eval_overall_f1": 0.9996007386335279, |
|
"eval_overall_precision": 0.9996007386335279, |
|
"eval_overall_recall": 0.9996007386335279, |
|
"eval_runtime": 16.8702, |
|
"eval_samples_per_second": 395.906, |
|
"eval_steps_per_second": 24.777, |
|
"step": 26000 |
|
} |
|
], |
|
"max_steps": 26299, |
|
"num_train_epochs": 7, |
|
"total_flos": 6405072201643752.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|