|
{ |
|
"best_metric": 0.6838614548013121, |
|
"best_model_checkpoint": "../saved_model/cino-small-v2_tncc-title_v3/checkpoint-7424", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 9280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.49838187702265374, |
|
"eval_loss": 1.6120452880859375, |
|
"eval_macro-f1": 0.3080200150906938, |
|
"eval_macro-precision": 0.4470989896292958, |
|
"eval_macro-recall": 0.33407156196923765, |
|
"eval_runtime": 3.5999, |
|
"eval_samples_per_second": 257.505, |
|
"eval_steps_per_second": 8.056, |
|
"eval_weighted-f1": 0.423917385683886, |
|
"eval_weighted-precision": 0.5166350613267663, |
|
"eval_weighted-recall": 0.49838187702265374, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6472491909385113, |
|
"eval_loss": 1.1052128076553345, |
|
"eval_macro-f1": 0.5457350988248497, |
|
"eval_macro-precision": 0.6404982007168077, |
|
"eval_macro-recall": 0.559508539878825, |
|
"eval_runtime": 3.6454, |
|
"eval_samples_per_second": 254.294, |
|
"eval_steps_per_second": 7.955, |
|
"eval_weighted-f1": 0.6324317601702024, |
|
"eval_weighted-precision": 0.6658002868686018, |
|
"eval_weighted-recall": 0.6472491909385113, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.6939655172413796e-05, |
|
"loss": 1.7073, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6709816612729234, |
|
"eval_loss": 1.0001471042633057, |
|
"eval_macro-f1": 0.6065301675158895, |
|
"eval_macro-precision": 0.654418913280353, |
|
"eval_macro-recall": 0.6099139180853289, |
|
"eval_runtime": 3.6176, |
|
"eval_samples_per_second": 256.245, |
|
"eval_steps_per_second": 8.016, |
|
"eval_weighted-f1": 0.6605809984207812, |
|
"eval_weighted-precision": 0.6778557170715117, |
|
"eval_weighted-recall": 0.6709816612729234, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.686084142394822, |
|
"eval_loss": 0.9795448780059814, |
|
"eval_macro-f1": 0.645770505361501, |
|
"eval_macro-precision": 0.6624511940296568, |
|
"eval_macro-recall": 0.6454397105147797, |
|
"eval_runtime": 3.6312, |
|
"eval_samples_per_second": 255.29, |
|
"eval_steps_per_second": 7.986, |
|
"eval_weighted-f1": 0.680474769885252, |
|
"eval_weighted-precision": 0.6882611664755186, |
|
"eval_weighted-recall": 0.686084142394822, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 4.9568965517241384e-05, |
|
"loss": 0.8684, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6947141316073355, |
|
"eval_loss": 0.9905449748039246, |
|
"eval_macro-f1": 0.6674507510628628, |
|
"eval_macro-precision": 0.6625099174020581, |
|
"eval_macro-recall": 0.6829958649269056, |
|
"eval_runtime": 3.6247, |
|
"eval_samples_per_second": 255.745, |
|
"eval_steps_per_second": 8.001, |
|
"eval_weighted-f1": 0.6952993996868073, |
|
"eval_weighted-precision": 0.7036333325841285, |
|
"eval_weighted-recall": 0.6947141316073355, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7044228694714132, |
|
"eval_loss": 1.0819956064224243, |
|
"eval_macro-f1": 0.6835424949893771, |
|
"eval_macro-precision": 0.6831322585201048, |
|
"eval_macro-recall": 0.6897387284921295, |
|
"eval_runtime": 3.6099, |
|
"eval_samples_per_second": 256.793, |
|
"eval_steps_per_second": 8.033, |
|
"eval_weighted-f1": 0.7063455583024486, |
|
"eval_weighted-precision": 0.7133818379200453, |
|
"eval_weighted-recall": 0.7044228694714132, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 4.6575670498084294e-05, |
|
"loss": 0.4904, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6850053937432579, |
|
"eval_loss": 1.2797751426696777, |
|
"eval_macro-f1": 0.6529772810537373, |
|
"eval_macro-precision": 0.6632776375522283, |
|
"eval_macro-recall": 0.6621596382156031, |
|
"eval_runtime": 3.621, |
|
"eval_samples_per_second": 256.009, |
|
"eval_steps_per_second": 8.009, |
|
"eval_weighted-f1": 0.6864725766536225, |
|
"eval_weighted-precision": 0.7003185776965812, |
|
"eval_weighted-recall": 0.6850053937432579, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6796116504854369, |
|
"eval_loss": 1.3903510570526123, |
|
"eval_macro-f1": 0.6510499118775824, |
|
"eval_macro-precision": 0.6651601590963526, |
|
"eval_macro-recall": 0.6479225440423547, |
|
"eval_runtime": 3.6152, |
|
"eval_samples_per_second": 256.415, |
|
"eval_steps_per_second": 8.022, |
|
"eval_weighted-f1": 0.6783723538205079, |
|
"eval_weighted-precision": 0.6881154476659207, |
|
"eval_weighted-recall": 0.6796116504854369, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 4.3582375478927204e-05, |
|
"loss": 0.2376, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.674217907227616, |
|
"eval_loss": 1.4255810976028442, |
|
"eval_macro-f1": 0.6424901861691795, |
|
"eval_macro-precision": 0.6705435579848175, |
|
"eval_macro-recall": 0.6378243354162482, |
|
"eval_runtime": 3.6179, |
|
"eval_samples_per_second": 256.225, |
|
"eval_steps_per_second": 8.016, |
|
"eval_weighted-f1": 0.6736702968697759, |
|
"eval_weighted-precision": 0.6879912323928888, |
|
"eval_weighted-recall": 0.674217907227616, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6882416396979504, |
|
"eval_loss": 1.5364353656768799, |
|
"eval_macro-f1": 0.6654116450501577, |
|
"eval_macro-precision": 0.6574262835666708, |
|
"eval_macro-recall": 0.6801476025508416, |
|
"eval_runtime": 3.6024, |
|
"eval_samples_per_second": 257.33, |
|
"eval_steps_per_second": 8.05, |
|
"eval_weighted-f1": 0.6895751725299344, |
|
"eval_weighted-precision": 0.6945151299816443, |
|
"eval_weighted-recall": 0.6882416396979504, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 4.058908045977012e-05, |
|
"loss": 0.1495, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6828478964401294, |
|
"eval_loss": 1.6653459072113037, |
|
"eval_macro-f1": 0.6512014541822767, |
|
"eval_macro-precision": 0.6627229410666052, |
|
"eval_macro-recall": 0.6485507026145255, |
|
"eval_runtime": 3.5928, |
|
"eval_samples_per_second": 258.014, |
|
"eval_steps_per_second": 8.072, |
|
"eval_weighted-f1": 0.6800769417795847, |
|
"eval_weighted-precision": 0.6882902408612807, |
|
"eval_weighted-recall": 0.6828478964401294, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6839266450916937, |
|
"eval_loss": 1.6987706422805786, |
|
"eval_macro-f1": 0.6479532709170456, |
|
"eval_macro-precision": 0.6580973534101414, |
|
"eval_macro-recall": 0.6474043097812547, |
|
"eval_runtime": 3.6004, |
|
"eval_samples_per_second": 257.471, |
|
"eval_steps_per_second": 8.055, |
|
"eval_weighted-f1": 0.6841454877516878, |
|
"eval_weighted-precision": 0.6902044355228595, |
|
"eval_weighted-recall": 0.6839266450916937, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 3.759578544061303e-05, |
|
"loss": 0.1058, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6817691477885652, |
|
"eval_loss": 1.747767448425293, |
|
"eval_macro-f1": 0.6556363951964088, |
|
"eval_macro-precision": 0.6700908993068831, |
|
"eval_macro-recall": 0.6561971975750561, |
|
"eval_runtime": 3.6016, |
|
"eval_samples_per_second": 257.383, |
|
"eval_steps_per_second": 8.052, |
|
"eval_weighted-f1": 0.6814628702483411, |
|
"eval_weighted-precision": 0.6867012225910729, |
|
"eval_weighted-recall": 0.6817691477885652, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.674217907227616, |
|
"eval_loss": 1.918135166168213, |
|
"eval_macro-f1": 0.6397376565535934, |
|
"eval_macro-precision": 0.6549903289689117, |
|
"eval_macro-recall": 0.6473498155004918, |
|
"eval_runtime": 3.5879, |
|
"eval_samples_per_second": 258.368, |
|
"eval_steps_per_second": 8.083, |
|
"eval_weighted-f1": 0.6765985742439555, |
|
"eval_weighted-precision": 0.6901672277432537, |
|
"eval_weighted-recall": 0.674217907227616, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6957928802588996, |
|
"eval_loss": 1.9204330444335938, |
|
"eval_macro-f1": 0.6658200325646808, |
|
"eval_macro-precision": 0.6663228409772431, |
|
"eval_macro-recall": 0.6692964824595501, |
|
"eval_runtime": 3.5954, |
|
"eval_samples_per_second": 257.831, |
|
"eval_steps_per_second": 8.066, |
|
"eval_weighted-f1": 0.6957701345209284, |
|
"eval_weighted-precision": 0.69903863020904, |
|
"eval_weighted-recall": 0.6957928802588996, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 3.460249042145594e-05, |
|
"loss": 0.0775, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6752966558791802, |
|
"eval_loss": 1.9924219846725464, |
|
"eval_macro-f1": 0.6392465953749347, |
|
"eval_macro-precision": 0.6561467817801693, |
|
"eval_macro-recall": 0.6402901277480958, |
|
"eval_runtime": 3.6181, |
|
"eval_samples_per_second": 256.214, |
|
"eval_steps_per_second": 8.015, |
|
"eval_weighted-f1": 0.673631619187917, |
|
"eval_weighted-precision": 0.6858049526730299, |
|
"eval_weighted-recall": 0.6752966558791802, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.686084142394822, |
|
"eval_loss": 2.0478932857513428, |
|
"eval_macro-f1": 0.6463214194781717, |
|
"eval_macro-precision": 0.6570967483791249, |
|
"eval_macro-recall": 0.6503589081998865, |
|
"eval_runtime": 3.5962, |
|
"eval_samples_per_second": 257.771, |
|
"eval_steps_per_second": 8.064, |
|
"eval_weighted-f1": 0.6852865958011676, |
|
"eval_weighted-precision": 0.6939600166349821, |
|
"eval_weighted-recall": 0.686084142394822, |
|
"step": 3944 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 3.160919540229885e-05, |
|
"loss": 0.0687, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.674217907227616, |
|
"eval_loss": 2.0522632598876953, |
|
"eval_macro-f1": 0.6405085398516451, |
|
"eval_macro-precision": 0.6662302886679464, |
|
"eval_macro-recall": 0.6393535717017708, |
|
"eval_runtime": 3.6131, |
|
"eval_samples_per_second": 256.565, |
|
"eval_steps_per_second": 8.026, |
|
"eval_weighted-f1": 0.674060546264426, |
|
"eval_weighted-precision": 0.6894424888472697, |
|
"eval_weighted-recall": 0.674217907227616, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6828478964401294, |
|
"eval_loss": 1.9823521375656128, |
|
"eval_macro-f1": 0.651754402283885, |
|
"eval_macro-precision": 0.6440801367882046, |
|
"eval_macro-recall": 0.6683888090630882, |
|
"eval_runtime": 3.6037, |
|
"eval_samples_per_second": 257.233, |
|
"eval_steps_per_second": 8.047, |
|
"eval_weighted-f1": 0.6821812966643793, |
|
"eval_weighted-precision": 0.68760298706152, |
|
"eval_weighted-recall": 0.6828478964401294, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 2.8615900383141765e-05, |
|
"loss": 0.0577, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6914778856526429, |
|
"eval_loss": 2.0154926776885986, |
|
"eval_macro-f1": 0.6659900934162544, |
|
"eval_macro-precision": 0.6712869574383351, |
|
"eval_macro-recall": 0.6703643770947892, |
|
"eval_runtime": 3.6164, |
|
"eval_samples_per_second": 256.332, |
|
"eval_steps_per_second": 8.019, |
|
"eval_weighted-f1": 0.6906611756556518, |
|
"eval_weighted-precision": 0.6985916032921785, |
|
"eval_weighted-recall": 0.6914778856526429, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6828478964401294, |
|
"eval_loss": 2.05501389503479, |
|
"eval_macro-f1": 0.6486634410090307, |
|
"eval_macro-precision": 0.6780446089713759, |
|
"eval_macro-recall": 0.6376704510538888, |
|
"eval_runtime": 3.5904, |
|
"eval_samples_per_second": 258.186, |
|
"eval_steps_per_second": 8.077, |
|
"eval_weighted-f1": 0.6755183997397308, |
|
"eval_weighted-precision": 0.6900556731555942, |
|
"eval_weighted-recall": 0.6828478964401294, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"learning_rate": 2.5622605363984675e-05, |
|
"loss": 0.0489, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6796116504854369, |
|
"eval_loss": 2.0277795791625977, |
|
"eval_macro-f1": 0.6505710558420628, |
|
"eval_macro-precision": 0.6513644147664868, |
|
"eval_macro-recall": 0.6562986108294033, |
|
"eval_runtime": 3.5971, |
|
"eval_samples_per_second": 257.71, |
|
"eval_steps_per_second": 8.062, |
|
"eval_weighted-f1": 0.6787275977253558, |
|
"eval_weighted-precision": 0.6831300629390828, |
|
"eval_weighted-recall": 0.6796116504854369, |
|
"step": 5104 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6828478964401294, |
|
"eval_loss": 2.033602237701416, |
|
"eval_macro-f1": 0.6518094813918546, |
|
"eval_macro-precision": 0.6546940340503233, |
|
"eval_macro-recall": 0.6544133192355402, |
|
"eval_runtime": 3.6042, |
|
"eval_samples_per_second": 257.199, |
|
"eval_steps_per_second": 8.046, |
|
"eval_weighted-f1": 0.6812281754499014, |
|
"eval_weighted-precision": 0.6846668946206854, |
|
"eval_weighted-recall": 0.6828478964401294, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 23.71, |
|
"learning_rate": 2.2629310344827588e-05, |
|
"loss": 0.0431, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.697950377562028, |
|
"eval_loss": 2.0058302879333496, |
|
"eval_macro-f1": 0.672432519382824, |
|
"eval_macro-precision": 0.7031870372780317, |
|
"eval_macro-recall": 0.6570710914948313, |
|
"eval_runtime": 3.6053, |
|
"eval_samples_per_second": 257.123, |
|
"eval_steps_per_second": 8.044, |
|
"eval_weighted-f1": 0.6956222916424519, |
|
"eval_weighted-precision": 0.7050444644388131, |
|
"eval_weighted-recall": 0.697950377562028, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6903991370010788, |
|
"eval_loss": 2.0136592388153076, |
|
"eval_macro-f1": 0.6593408320415061, |
|
"eval_macro-precision": 0.6670076069784594, |
|
"eval_macro-recall": 0.6569151453566795, |
|
"eval_runtime": 3.6053, |
|
"eval_samples_per_second": 257.124, |
|
"eval_steps_per_second": 8.044, |
|
"eval_weighted-f1": 0.6888694520013063, |
|
"eval_weighted-precision": 0.6914605561085521, |
|
"eval_weighted-recall": 0.6903991370010788, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 1.9636015325670498e-05, |
|
"loss": 0.0372, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6763754045307443, |
|
"eval_loss": 2.21449875831604, |
|
"eval_macro-f1": 0.6400140077032914, |
|
"eval_macro-precision": 0.649984379772116, |
|
"eval_macro-recall": 0.6499316041741732, |
|
"eval_runtime": 3.6097, |
|
"eval_samples_per_second": 256.806, |
|
"eval_steps_per_second": 8.034, |
|
"eval_weighted-f1": 0.6791315210465927, |
|
"eval_weighted-precision": 0.6961390172701661, |
|
"eval_weighted-recall": 0.6763754045307443, |
|
"step": 6032 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6882416396979504, |
|
"eval_loss": 2.1051621437072754, |
|
"eval_macro-f1": 0.6563104764431434, |
|
"eval_macro-precision": 0.6584303947936838, |
|
"eval_macro-recall": 0.6627288799113591, |
|
"eval_runtime": 3.5999, |
|
"eval_samples_per_second": 257.511, |
|
"eval_steps_per_second": 8.056, |
|
"eval_weighted-f1": 0.6864978936047862, |
|
"eval_weighted-precision": 0.6922661220105522, |
|
"eval_weighted-recall": 0.6882416396979504, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6947141316073355, |
|
"eval_loss": 2.096506357192993, |
|
"eval_macro-f1": 0.6701111894500894, |
|
"eval_macro-precision": 0.6973353037360721, |
|
"eval_macro-recall": 0.6569800023874061, |
|
"eval_runtime": 3.6151, |
|
"eval_samples_per_second": 256.427, |
|
"eval_steps_per_second": 8.022, |
|
"eval_weighted-f1": 0.6888264314204101, |
|
"eval_weighted-precision": 0.6996849509212928, |
|
"eval_weighted-recall": 0.6947141316073355, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 1.664272030651341e-05, |
|
"loss": 0.0357, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6947141316073355, |
|
"eval_loss": 2.094637632369995, |
|
"eval_macro-f1": 0.6685294866541375, |
|
"eval_macro-precision": 0.6813823548362715, |
|
"eval_macro-recall": 0.6641734635067332, |
|
"eval_runtime": 3.6038, |
|
"eval_samples_per_second": 257.226, |
|
"eval_steps_per_second": 8.047, |
|
"eval_weighted-f1": 0.6943390563150669, |
|
"eval_weighted-precision": 0.7001493288391278, |
|
"eval_weighted-recall": 0.6947141316073355, |
|
"step": 6728 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6893203883495146, |
|
"eval_loss": 2.0883684158325195, |
|
"eval_macro-f1": 0.6602023917903279, |
|
"eval_macro-precision": 0.6628650635074098, |
|
"eval_macro-recall": 0.6659307948401801, |
|
"eval_runtime": 3.6134, |
|
"eval_samples_per_second": 256.543, |
|
"eval_steps_per_second": 8.026, |
|
"eval_weighted-f1": 0.6902134268034101, |
|
"eval_weighted-precision": 0.6951233806637598, |
|
"eval_weighted-recall": 0.6893203883495146, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 30.17, |
|
"learning_rate": 1.3649425287356324e-05, |
|
"loss": 0.0286, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.703344120819849, |
|
"eval_loss": 2.126500368118286, |
|
"eval_macro-f1": 0.6756885174044472, |
|
"eval_macro-precision": 0.6887882836127188, |
|
"eval_macro-recall": 0.6701991060577401, |
|
"eval_runtime": 3.6102, |
|
"eval_samples_per_second": 256.771, |
|
"eval_steps_per_second": 8.033, |
|
"eval_weighted-f1": 0.7025418148884912, |
|
"eval_weighted-precision": 0.7070891495572698, |
|
"eval_weighted-recall": 0.703344120819849, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7022653721682848, |
|
"eval_loss": 2.110332489013672, |
|
"eval_macro-f1": 0.6838614548013121, |
|
"eval_macro-precision": 0.6918280242510296, |
|
"eval_macro-recall": 0.6819433903345183, |
|
"eval_runtime": 3.6092, |
|
"eval_samples_per_second": 256.84, |
|
"eval_steps_per_second": 8.035, |
|
"eval_weighted-f1": 0.7015731601193966, |
|
"eval_weighted-precision": 0.7068600524270214, |
|
"eval_weighted-recall": 0.7022653721682848, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"learning_rate": 1.0656130268199234e-05, |
|
"loss": 0.0248, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6925566343042071, |
|
"eval_loss": 2.076742649078369, |
|
"eval_macro-f1": 0.6677185111141611, |
|
"eval_macro-precision": 0.6682997232361648, |
|
"eval_macro-recall": 0.6726620472560384, |
|
"eval_runtime": 3.6011, |
|
"eval_samples_per_second": 257.423, |
|
"eval_steps_per_second": 8.053, |
|
"eval_weighted-f1": 0.693162535561584, |
|
"eval_weighted-precision": 0.6976019284855784, |
|
"eval_weighted-recall": 0.6925566343042071, |
|
"step": 7656 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.686084142394822, |
|
"eval_loss": 2.1223480701446533, |
|
"eval_macro-f1": 0.661874148838316, |
|
"eval_macro-precision": 0.6610753547813631, |
|
"eval_macro-recall": 0.6713446322584083, |
|
"eval_runtime": 3.6215, |
|
"eval_samples_per_second": 255.972, |
|
"eval_steps_per_second": 8.008, |
|
"eval_weighted-f1": 0.6841907777006906, |
|
"eval_weighted-precision": 0.6894138997369361, |
|
"eval_weighted-recall": 0.686084142394822, |
|
"step": 7888 |
|
}, |
|
{ |
|
"epoch": 34.48, |
|
"learning_rate": 7.662835249042145e-06, |
|
"loss": 0.0235, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6903991370010788, |
|
"eval_loss": 2.133087635040283, |
|
"eval_macro-f1": 0.662335562798904, |
|
"eval_macro-precision": 0.6736920728738612, |
|
"eval_macro-recall": 0.6597960660569285, |
|
"eval_runtime": 3.6194, |
|
"eval_samples_per_second": 256.117, |
|
"eval_steps_per_second": 8.012, |
|
"eval_weighted-f1": 0.6874362655396534, |
|
"eval_weighted-precision": 0.6919778724812079, |
|
"eval_weighted-recall": 0.6903991370010788, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.697950377562028, |
|
"eval_loss": 2.122838020324707, |
|
"eval_macro-f1": 0.670156602236844, |
|
"eval_macro-precision": 0.6690594327579752, |
|
"eval_macro-recall": 0.6751260562968397, |
|
"eval_runtime": 3.6045, |
|
"eval_samples_per_second": 257.182, |
|
"eval_steps_per_second": 8.046, |
|
"eval_weighted-f1": 0.6980890949772001, |
|
"eval_weighted-precision": 0.7007470226449659, |
|
"eval_weighted-recall": 0.697950377562028, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 36.64, |
|
"learning_rate": 4.669540229885057e-06, |
|
"loss": 0.0211, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6990291262135923, |
|
"eval_loss": 2.1283230781555176, |
|
"eval_macro-f1": 0.6734553818830601, |
|
"eval_macro-precision": 0.6869153083999092, |
|
"eval_macro-recall": 0.6662304826991132, |
|
"eval_runtime": 3.6053, |
|
"eval_samples_per_second": 257.124, |
|
"eval_steps_per_second": 8.044, |
|
"eval_weighted-f1": 0.6984888091028846, |
|
"eval_weighted-precision": 0.7035914434375581, |
|
"eval_weighted-recall": 0.6990291262135923, |
|
"step": 8584 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6957928802588996, |
|
"eval_loss": 2.121021270751953, |
|
"eval_macro-f1": 0.6693193516472609, |
|
"eval_macro-precision": 0.6852915803739035, |
|
"eval_macro-recall": 0.6603953493083169, |
|
"eval_runtime": 3.5959, |
|
"eval_samples_per_second": 257.795, |
|
"eval_steps_per_second": 8.065, |
|
"eval_weighted-f1": 0.6937517174106657, |
|
"eval_weighted-precision": 0.6980351944602388, |
|
"eval_weighted-recall": 0.6957928802588996, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 38.79, |
|
"learning_rate": 1.6762452107279694e-06, |
|
"loss": 0.0175, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6957928802588996, |
|
"eval_loss": 2.125298023223877, |
|
"eval_macro-f1": 0.670175716053461, |
|
"eval_macro-precision": 0.6823954080156959, |
|
"eval_macro-recall": 0.6637507266661784, |
|
"eval_runtime": 3.6051, |
|
"eval_samples_per_second": 257.138, |
|
"eval_steps_per_second": 8.044, |
|
"eval_weighted-f1": 0.6946838990403312, |
|
"eval_weighted-precision": 0.6981818094755908, |
|
"eval_weighted-recall": 0.6957928802588996, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.697950377562028, |
|
"eval_loss": 2.123837471008301, |
|
"eval_macro-f1": 0.672888533432289, |
|
"eval_macro-precision": 0.6846457775029204, |
|
"eval_macro-recall": 0.6663735016267468, |
|
"eval_runtime": 3.6082, |
|
"eval_samples_per_second": 256.916, |
|
"eval_steps_per_second": 8.037, |
|
"eval_weighted-f1": 0.6972181365180552, |
|
"eval_weighted-precision": 0.700475447211827, |
|
"eval_weighted-recall": 0.697950377562028, |
|
"step": 9280 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 3.933393471111168e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|