cares-biobert-base / trainer_state.json
chizhik
re-trained model after eliminating annotation errors
2724fbb
{
"best_metric": 0.7364675967036229,
"best_model_checkpoint": "./CARES/checkpoints/bio-ber-stratified/run-3/checkpoint-4402",
"epoch": 62.0,
"global_step": 4402,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.0,
"eval_loss": 0.3057152032852173,
"eval_macro_f1": 0.0,
"eval_macro_precision": 0.0,
"eval_macro_recall": 0.0,
"eval_micro_f1": 0.0,
"eval_micro_precision": 0.0,
"eval_micro_recall": 0.0,
"eval_runtime": 2.8103,
"eval_samples_per_second": 343.733,
"eval_steps_per_second": 11.031,
"step": 142
},
{
"epoch": 4.0,
"eval_loss": 0.2555946707725525,
"eval_macro_f1": 0.030103995621237,
"eval_macro_precision": 0.11450892857142857,
"eval_macro_recall": 0.018604569384720493,
"eval_micro_f1": 0.06492679821769573,
"eval_micro_precision": 0.9444444444444444,
"eval_micro_recall": 0.03361898483849703,
"eval_runtime": 2.814,
"eval_samples_per_second": 343.287,
"eval_steps_per_second": 11.016,
"step": 284
},
{
"epoch": 6.0,
"eval_loss": 0.20202794671058655,
"eval_macro_f1": 0.20731282317116273,
"eval_macro_precision": 0.31073371688318574,
"eval_macro_recall": 0.17971428447401858,
"eval_micro_f1": 0.5260370697263901,
"eval_micro_precision": 0.7957276368491322,
"eval_micro_recall": 0.3928806855636124,
"eval_runtime": 2.8153,
"eval_samples_per_second": 343.125,
"eval_steps_per_second": 11.011,
"step": 426
},
{
"epoch": 7.04,
"learning_rate": 3.3448027530200134e-05,
"loss": 0.3166,
"step": 500
},
{
"epoch": 8.0,
"eval_loss": 0.16987857222557068,
"eval_macro_f1": 0.33747696352729556,
"eval_macro_precision": 0.5216496556534496,
"eval_macro_recall": 0.3003629712640149,
"eval_micro_f1": 0.6596736596736597,
"eval_micro_precision": 0.8032166508987701,
"eval_micro_recall": 0.5596572181938035,
"eval_runtime": 2.8142,
"eval_samples_per_second": 343.259,
"eval_steps_per_second": 11.016,
"step": 568
},
{
"epoch": 10.0,
"eval_loss": 0.15612231194972992,
"eval_macro_f1": 0.3962525677685097,
"eval_macro_precision": 0.5709879791429361,
"eval_macro_recall": 0.34789915051543907,
"eval_micro_f1": 0.6866002214839424,
"eval_micro_precision": 0.7802013422818792,
"eval_micro_recall": 0.6130520764667106,
"eval_runtime": 2.8176,
"eval_samples_per_second": 342.846,
"eval_steps_per_second": 11.002,
"step": 710
},
{
"epoch": 12.0,
"eval_loss": 0.15700845420360565,
"eval_macro_f1": 0.5033118074728004,
"eval_macro_precision": 0.6788170359469108,
"eval_macro_recall": 0.4359827145940205,
"eval_micro_f1": 0.7075812274368232,
"eval_micro_precision": 0.7821229050279329,
"eval_micro_recall": 0.6460118655240606,
"eval_runtime": 2.9435,
"eval_samples_per_second": 328.182,
"eval_steps_per_second": 10.532,
"step": 852
},
{
"epoch": 14.0,
"eval_loss": 0.14791876077651978,
"eval_macro_f1": 0.5201462693304842,
"eval_macro_precision": 0.6808348875622964,
"eval_macro_recall": 0.45464762510361434,
"eval_micro_f1": 0.7195077813970322,
"eval_micro_precision": 0.797752808988764,
"eval_micro_recall": 0.6552406064601186,
"eval_runtime": 2.814,
"eval_samples_per_second": 343.289,
"eval_steps_per_second": 11.017,
"step": 994
},
{
"epoch": 14.08,
"learning_rate": 3.091408605063951e-05,
"loss": 0.1146,
"step": 1000
},
{
"epoch": 16.0,
"eval_loss": 0.15235331654548645,
"eval_macro_f1": 0.560037439235141,
"eval_macro_precision": 0.6537080668718289,
"eval_macro_recall": 0.5093665280810307,
"eval_micro_f1": 0.7277091906721537,
"eval_micro_precision": 0.7583988563259471,
"eval_micro_recall": 0.6994067237969677,
"eval_runtime": 2.8165,
"eval_samples_per_second": 342.975,
"eval_steps_per_second": 11.006,
"step": 1136
},
{
"epoch": 18.0,
"eval_loss": 0.1468934863805771,
"eval_macro_f1": 0.5752898959236763,
"eval_macro_precision": 0.6785878888299093,
"eval_macro_recall": 0.5191620319354175,
"eval_micro_f1": 0.7432950191570881,
"eval_micro_precision": 0.7880354505169868,
"eval_micro_recall": 0.7033618984838497,
"eval_runtime": 2.8153,
"eval_samples_per_second": 343.13,
"eval_steps_per_second": 11.011,
"step": 1278
},
{
"epoch": 20.0,
"eval_loss": 0.15318024158477783,
"eval_macro_f1": 0.5614807929032789,
"eval_macro_precision": 0.6891794820810673,
"eval_macro_recall": 0.5011545072485702,
"eval_micro_f1": 0.7351351351351351,
"eval_micro_precision": 0.8108108108108109,
"eval_micro_recall": 0.6723796967699407,
"eval_runtime": 2.8153,
"eval_samples_per_second": 343.128,
"eval_steps_per_second": 11.011,
"step": 1420
},
{
"epoch": 21.13,
"learning_rate": 2.83801445710789e-05,
"loss": 0.0497,
"step": 1500
},
{
"epoch": 22.0,
"eval_loss": 0.15478584170341492,
"eval_macro_f1": 0.5700524565361512,
"eval_macro_precision": 0.6592231713192768,
"eval_macro_recall": 0.5224052472361357,
"eval_micro_f1": 0.7377900935226879,
"eval_micro_precision": 0.7773722627737226,
"eval_micro_recall": 0.7020435069215557,
"eval_runtime": 2.8156,
"eval_samples_per_second": 343.089,
"eval_steps_per_second": 11.01,
"step": 1562
},
{
"epoch": 24.0,
"eval_loss": 0.16469129920005798,
"eval_macro_f1": 0.5893809890315513,
"eval_macro_precision": 0.7250493831689879,
"eval_macro_recall": 0.5372381591153548,
"eval_micro_f1": 0.7337398373983739,
"eval_micro_precision": 0.7547038327526132,
"eval_micro_recall": 0.7139090309822017,
"eval_runtime": 2.8148,
"eval_samples_per_second": 343.185,
"eval_steps_per_second": 11.013,
"step": 1704
},
{
"epoch": 26.0,
"eval_loss": 0.1553143858909607,
"eval_macro_f1": 0.5787478878813199,
"eval_macro_precision": 0.6623158627962805,
"eval_macro_recall": 0.5281203519575204,
"eval_micro_f1": 0.7432293452176895,
"eval_micro_precision": 0.7742857142857142,
"eval_micro_recall": 0.7145682267633487,
"eval_runtime": 2.8124,
"eval_samples_per_second": 343.477,
"eval_steps_per_second": 11.023,
"step": 1846
},
{
"epoch": 28.0,
"eval_loss": 0.1586785614490509,
"eval_macro_f1": 0.5883554607456316,
"eval_macro_precision": 0.7177006367498535,
"eval_macro_recall": 0.5324948845496986,
"eval_micro_f1": 0.7415575465196417,
"eval_micro_precision": 0.7768953068592058,
"eval_micro_recall": 0.7092946605141727,
"eval_runtime": 2.8164,
"eval_samples_per_second": 342.99,
"eval_steps_per_second": 11.007,
"step": 1988
},
{
"epoch": 28.17,
"learning_rate": 2.5846203091518283e-05,
"loss": 0.0242,
"step": 2000
},
{
"epoch": 30.0,
"eval_loss": 0.16228000819683075,
"eval_macro_f1": 0.6422229361530924,
"eval_macro_precision": 0.790092149978405,
"eval_macro_recall": 0.5761555188160953,
"eval_micro_f1": 0.75272599366866,
"eval_micro_precision": 0.8069381598793364,
"eval_micro_recall": 0.7053394858272907,
"eval_runtime": 2.8172,
"eval_samples_per_second": 342.897,
"eval_steps_per_second": 11.004,
"step": 2130
},
{
"epoch": 32.0,
"eval_loss": 0.15960222482681274,
"eval_macro_f1": 0.6566298525236257,
"eval_macro_precision": 0.74358025279367,
"eval_macro_recall": 0.6057522843683747,
"eval_micro_f1": 0.7514529914529915,
"eval_micro_precision": 0.7805397727272727,
"eval_micro_recall": 0.7244561634805537,
"eval_runtime": 2.8157,
"eval_samples_per_second": 343.071,
"eval_steps_per_second": 11.01,
"step": 2272
},
{
"epoch": 34.0,
"eval_loss": 0.1677185595035553,
"eval_macro_f1": 0.6556730308579793,
"eval_macro_precision": 0.7959295044168222,
"eval_macro_recall": 0.5949893771939523,
"eval_micro_f1": 0.7518796992481203,
"eval_micro_precision": 0.7806955287437899,
"eval_micro_recall": 0.7251153592617007,
"eval_runtime": 2.8129,
"eval_samples_per_second": 343.412,
"eval_steps_per_second": 11.02,
"step": 2414
},
{
"epoch": 35.21,
"learning_rate": 2.331226161195767e-05,
"loss": 0.0135,
"step": 2500
},
{
"epoch": 36.0,
"eval_loss": 0.16759739816188812,
"eval_macro_f1": 0.6573674747389432,
"eval_macro_precision": 0.771138294992533,
"eval_macro_recall": 0.595599674775744,
"eval_micro_f1": 0.7523187907935418,
"eval_micro_precision": 0.7855093256814921,
"eval_micro_recall": 0.7218193803559657,
"eval_runtime": 2.8149,
"eval_samples_per_second": 343.173,
"eval_steps_per_second": 11.013,
"step": 2556
},
{
"epoch": 38.0,
"eval_loss": 0.1679902970790863,
"eval_macro_f1": 0.6639964013713953,
"eval_macro_precision": 0.758530396471428,
"eval_macro_recall": 0.6080264740396573,
"eval_micro_f1": 0.7524888431170615,
"eval_micro_precision": 0.7851002865329513,
"eval_micro_recall": 0.7224785761371127,
"eval_runtime": 2.8145,
"eval_samples_per_second": 343.228,
"eval_steps_per_second": 11.015,
"step": 2698
},
{
"epoch": 40.0,
"eval_loss": 0.17791299521923065,
"eval_macro_f1": 0.706771883149178,
"eval_macro_precision": 0.8182896833237783,
"eval_macro_recall": 0.6422691238382328,
"eval_micro_f1": 0.7502562350529552,
"eval_micro_precision": 0.7787234042553192,
"eval_micro_recall": 0.7237969676994067,
"eval_runtime": 2.8149,
"eval_samples_per_second": 343.168,
"eval_steps_per_second": 11.013,
"step": 2840
},
{
"epoch": 42.0,
"eval_loss": 0.17416273057460785,
"eval_macro_f1": 0.7042921705265328,
"eval_macro_precision": 0.8549297777881784,
"eval_macro_recall": 0.6327733363764919,
"eval_micro_f1": 0.7576791808873721,
"eval_micro_precision": 0.7855626326963907,
"eval_micro_recall": 0.7317073170731707,
"eval_runtime": 2.8128,
"eval_samples_per_second": 343.433,
"eval_steps_per_second": 11.021,
"step": 2982
},
{
"epoch": 42.25,
"learning_rate": 2.077832013239705e-05,
"loss": 0.0089,
"step": 3000
},
{
"epoch": 44.0,
"eval_loss": 0.17721112072467804,
"eval_macro_f1": 0.7133417842190977,
"eval_macro_precision": 0.8405823975317646,
"eval_macro_recall": 0.6551419828069049,
"eval_micro_f1": 0.7551782682512733,
"eval_micro_precision": 0.7787114845938375,
"eval_micro_recall": 0.7330257086354647,
"eval_runtime": 2.8136,
"eval_samples_per_second": 343.328,
"eval_steps_per_second": 11.018,
"step": 3124
},
{
"epoch": 46.0,
"eval_loss": 0.17867934703826904,
"eval_macro_f1": 0.7067733545432416,
"eval_macro_precision": 0.8456299689876698,
"eval_macro_recall": 0.6424972968645987,
"eval_micro_f1": 0.7581743869209809,
"eval_micro_precision": 0.7843551797040169,
"eval_micro_recall": 0.7336849044166117,
"eval_runtime": 2.8132,
"eval_samples_per_second": 343.385,
"eval_steps_per_second": 11.02,
"step": 3266
},
{
"epoch": 48.0,
"eval_loss": 0.18271668255329132,
"eval_macro_f1": 0.7082847532013592,
"eval_macro_precision": 0.8433782766054858,
"eval_macro_recall": 0.641905005936743,
"eval_micro_f1": 0.7553444180522565,
"eval_micro_precision": 0.7783216783216783,
"eval_micro_recall": 0.7336849044166117,
"eval_runtime": 2.8144,
"eval_samples_per_second": 343.24,
"eval_steps_per_second": 11.015,
"step": 3408
},
{
"epoch": 49.3,
"learning_rate": 1.8244378652836435e-05,
"loss": 0.0062,
"step": 3500
},
{
"epoch": 50.0,
"eval_loss": 0.18576982617378235,
"eval_macro_f1": 0.7214487691145703,
"eval_macro_precision": 0.8418509428350153,
"eval_macro_recall": 0.6584015327988482,
"eval_micro_f1": 0.7551299589603283,
"eval_micro_precision": 0.7846481876332623,
"eval_micro_recall": 0.7277521423862887,
"eval_runtime": 2.8151,
"eval_samples_per_second": 343.147,
"eval_steps_per_second": 11.012,
"step": 3550
},
{
"epoch": 52.0,
"eval_loss": 0.18477760255336761,
"eval_macro_f1": 0.7358242812719007,
"eval_macro_precision": 0.8363784298641037,
"eval_macro_recall": 0.682320650811777,
"eval_micro_f1": 0.7623529411764705,
"eval_micro_precision": 0.7777777777777778,
"eval_micro_recall": 0.7475280158206987,
"eval_runtime": 2.8152,
"eval_samples_per_second": 343.143,
"eval_steps_per_second": 11.012,
"step": 3692
},
{
"epoch": 54.0,
"eval_loss": 0.18875756859779358,
"eval_macro_f1": 0.7155614476825691,
"eval_macro_precision": 0.8418373811449409,
"eval_macro_recall": 0.6438779359984136,
"eval_micro_f1": 0.7593035165585524,
"eval_micro_precision": 0.7875354107648725,
"eval_micro_recall": 0.7330257086354647,
"eval_runtime": 2.8151,
"eval_samples_per_second": 343.149,
"eval_steps_per_second": 11.012,
"step": 3834
},
{
"epoch": 56.0,
"eval_loss": 0.18950717151165009,
"eval_macro_f1": 0.7224219338762576,
"eval_macro_precision": 0.8143469166127995,
"eval_macro_recall": 0.6672755163012613,
"eval_micro_f1": 0.7565011820330969,
"eval_micro_precision": 0.775623268698061,
"eval_micro_recall": 0.7382992748846408,
"eval_runtime": 2.8155,
"eval_samples_per_second": 343.104,
"eval_steps_per_second": 11.011,
"step": 3976
},
{
"epoch": 56.34,
"learning_rate": 1.5710437173275818e-05,
"loss": 0.0047,
"step": 4000
},
{
"epoch": 58.0,
"eval_loss": 0.19801756739616394,
"eval_macro_f1": 0.7241508401060731,
"eval_macro_precision": 0.8273979735617242,
"eval_macro_recall": 0.6621892130462943,
"eval_micro_f1": 0.7576676777890125,
"eval_micro_precision": 0.7751724137931034,
"eval_micro_recall": 0.7409360580092288,
"eval_runtime": 2.8137,
"eval_samples_per_second": 343.316,
"eval_steps_per_second": 11.017,
"step": 4118
},
{
"epoch": 60.0,
"eval_loss": 0.194396510720253,
"eval_macro_f1": 0.7287972344248086,
"eval_macro_precision": 0.8203998323639319,
"eval_macro_recall": 0.6775029786499779,
"eval_micro_f1": 0.7583222370173102,
"eval_micro_precision": 0.7659717552118359,
"eval_micro_recall": 0.7508239947264338,
"eval_runtime": 2.8142,
"eval_samples_per_second": 343.264,
"eval_steps_per_second": 11.016,
"step": 4260
},
{
"epoch": 62.0,
"eval_loss": 0.19370318949222565,
"eval_macro_f1": 0.7364675967036229,
"eval_macro_precision": 0.8295903604136947,
"eval_macro_recall": 0.6817263379239634,
"eval_micro_f1": 0.7599051811716898,
"eval_micro_precision": 0.7813370473537604,
"eval_micro_recall": 0.7396176664469347,
"eval_runtime": 2.8163,
"eval_samples_per_second": 343.005,
"eval_steps_per_second": 11.007,
"step": 4402
}
],
"max_steps": 7100,
"num_train_epochs": 100,
"total_flos": 3.3093548336434176e+16,
"trial_name": null,
"trial_params": {
"adam_epsilon": 3.039596615397574e-08,
"learning_rate": 3.349363847683222e-05,
"per_device_eval_batch_size": 32,
"per_device_train_batch_size": 32,
"seed": 326,
"warmup_steps": 491,
"weight_decay": 0.01214452830676255
}
}