{ "best_metric": 0.6838614548013121, "best_model_checkpoint": "../saved_model/cino-small-v2_tncc-title_v3/checkpoint-7424", "epoch": 40.0, "eval_steps": 500, "global_step": 9280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.49838187702265374, "eval_loss": 1.6120452880859375, "eval_macro-f1": 0.3080200150906938, "eval_macro-precision": 0.4470989896292958, "eval_macro-recall": 0.33407156196923765, "eval_runtime": 3.5999, "eval_samples_per_second": 257.505, "eval_steps_per_second": 8.056, "eval_weighted-f1": 0.423917385683886, "eval_weighted-precision": 0.5166350613267663, "eval_weighted-recall": 0.49838187702265374, "step": 232 }, { "epoch": 2.0, "eval_accuracy": 0.6472491909385113, "eval_loss": 1.1052128076553345, "eval_macro-f1": 0.5457350988248497, "eval_macro-precision": 0.6404982007168077, "eval_macro-recall": 0.559508539878825, "eval_runtime": 3.6454, "eval_samples_per_second": 254.294, "eval_steps_per_second": 7.955, "eval_weighted-f1": 0.6324317601702024, "eval_weighted-precision": 0.6658002868686018, "eval_weighted-recall": 0.6472491909385113, "step": 464 }, { "epoch": 2.16, "learning_rate": 2.6939655172413796e-05, "loss": 1.7073, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.6709816612729234, "eval_loss": 1.0001471042633057, "eval_macro-f1": 0.6065301675158895, "eval_macro-precision": 0.654418913280353, "eval_macro-recall": 0.6099139180853289, "eval_runtime": 3.6176, "eval_samples_per_second": 256.245, "eval_steps_per_second": 8.016, "eval_weighted-f1": 0.6605809984207812, "eval_weighted-precision": 0.6778557170715117, "eval_weighted-recall": 0.6709816612729234, "step": 696 }, { "epoch": 4.0, "eval_accuracy": 0.686084142394822, "eval_loss": 0.9795448780059814, "eval_macro-f1": 0.645770505361501, "eval_macro-precision": 0.6624511940296568, "eval_macro-recall": 0.6454397105147797, "eval_runtime": 3.6312, "eval_samples_per_second": 255.29, "eval_steps_per_second": 7.986, "eval_weighted-f1": 0.680474769885252, "eval_weighted-precision": 0.6882611664755186, "eval_weighted-recall": 0.686084142394822, "step": 928 }, { "epoch": 4.31, "learning_rate": 4.9568965517241384e-05, "loss": 0.8684, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.6947141316073355, "eval_loss": 0.9905449748039246, "eval_macro-f1": 0.6674507510628628, "eval_macro-precision": 0.6625099174020581, "eval_macro-recall": 0.6829958649269056, "eval_runtime": 3.6247, "eval_samples_per_second": 255.745, "eval_steps_per_second": 8.001, "eval_weighted-f1": 0.6952993996868073, "eval_weighted-precision": 0.7036333325841285, "eval_weighted-recall": 0.6947141316073355, "step": 1160 }, { "epoch": 6.0, "eval_accuracy": 0.7044228694714132, "eval_loss": 1.0819956064224243, "eval_macro-f1": 0.6835424949893771, "eval_macro-precision": 0.6831322585201048, "eval_macro-recall": 0.6897387284921295, "eval_runtime": 3.6099, "eval_samples_per_second": 256.793, "eval_steps_per_second": 8.033, "eval_weighted-f1": 0.7063455583024486, "eval_weighted-precision": 0.7133818379200453, "eval_weighted-recall": 0.7044228694714132, "step": 1392 }, { "epoch": 6.47, "learning_rate": 4.6575670498084294e-05, "loss": 0.4904, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.6850053937432579, "eval_loss": 1.2797751426696777, "eval_macro-f1": 0.6529772810537373, "eval_macro-precision": 0.6632776375522283, "eval_macro-recall": 0.6621596382156031, "eval_runtime": 3.621, "eval_samples_per_second": 256.009, "eval_steps_per_second": 8.009, "eval_weighted-f1": 0.6864725766536225, "eval_weighted-precision": 0.7003185776965812, "eval_weighted-recall": 0.6850053937432579, "step": 1624 }, { "epoch": 8.0, "eval_accuracy": 0.6796116504854369, "eval_loss": 1.3903510570526123, "eval_macro-f1": 0.6510499118775824, "eval_macro-precision": 0.6651601590963526, "eval_macro-recall": 0.6479225440423547, "eval_runtime": 3.6152, "eval_samples_per_second": 256.415, "eval_steps_per_second": 8.022, "eval_weighted-f1": 0.6783723538205079, "eval_weighted-precision": 0.6881154476659207, "eval_weighted-recall": 0.6796116504854369, "step": 1856 }, { "epoch": 8.62, "learning_rate": 4.3582375478927204e-05, "loss": 0.2376, "step": 2000 }, { "epoch": 9.0, "eval_accuracy": 0.674217907227616, "eval_loss": 1.4255810976028442, "eval_macro-f1": 0.6424901861691795, "eval_macro-precision": 0.6705435579848175, "eval_macro-recall": 0.6378243354162482, "eval_runtime": 3.6179, "eval_samples_per_second": 256.225, "eval_steps_per_second": 8.016, "eval_weighted-f1": 0.6736702968697759, "eval_weighted-precision": 0.6879912323928888, "eval_weighted-recall": 0.674217907227616, "step": 2088 }, { "epoch": 10.0, "eval_accuracy": 0.6882416396979504, "eval_loss": 1.5364353656768799, "eval_macro-f1": 0.6654116450501577, "eval_macro-precision": 0.6574262835666708, "eval_macro-recall": 0.6801476025508416, "eval_runtime": 3.6024, "eval_samples_per_second": 257.33, "eval_steps_per_second": 8.05, "eval_weighted-f1": 0.6895751725299344, "eval_weighted-precision": 0.6945151299816443, "eval_weighted-recall": 0.6882416396979504, "step": 2320 }, { "epoch": 10.78, "learning_rate": 4.058908045977012e-05, "loss": 0.1495, "step": 2500 }, { "epoch": 11.0, "eval_accuracy": 0.6828478964401294, "eval_loss": 1.6653459072113037, "eval_macro-f1": 0.6512014541822767, "eval_macro-precision": 0.6627229410666052, "eval_macro-recall": 0.6485507026145255, "eval_runtime": 3.5928, "eval_samples_per_second": 258.014, "eval_steps_per_second": 8.072, "eval_weighted-f1": 0.6800769417795847, "eval_weighted-precision": 0.6882902408612807, "eval_weighted-recall": 0.6828478964401294, "step": 2552 }, { "epoch": 12.0, "eval_accuracy": 0.6839266450916937, "eval_loss": 1.6987706422805786, "eval_macro-f1": 0.6479532709170456, "eval_macro-precision": 0.6580973534101414, "eval_macro-recall": 0.6474043097812547, "eval_runtime": 3.6004, "eval_samples_per_second": 257.471, "eval_steps_per_second": 8.055, "eval_weighted-f1": 0.6841454877516878, "eval_weighted-precision": 0.6902044355228595, "eval_weighted-recall": 0.6839266450916937, "step": 2784 }, { "epoch": 12.93, "learning_rate": 3.759578544061303e-05, "loss": 0.1058, "step": 3000 }, { "epoch": 13.0, "eval_accuracy": 0.6817691477885652, "eval_loss": 1.747767448425293, "eval_macro-f1": 0.6556363951964088, "eval_macro-precision": 0.6700908993068831, "eval_macro-recall": 0.6561971975750561, "eval_runtime": 3.6016, "eval_samples_per_second": 257.383, "eval_steps_per_second": 8.052, "eval_weighted-f1": 0.6814628702483411, "eval_weighted-precision": 0.6867012225910729, "eval_weighted-recall": 0.6817691477885652, "step": 3016 }, { "epoch": 14.0, "eval_accuracy": 0.674217907227616, "eval_loss": 1.918135166168213, "eval_macro-f1": 0.6397376565535934, "eval_macro-precision": 0.6549903289689117, "eval_macro-recall": 0.6473498155004918, "eval_runtime": 3.5879, "eval_samples_per_second": 258.368, "eval_steps_per_second": 8.083, "eval_weighted-f1": 0.6765985742439555, "eval_weighted-precision": 0.6901672277432537, "eval_weighted-recall": 0.674217907227616, "step": 3248 }, { "epoch": 15.0, "eval_accuracy": 0.6957928802588996, "eval_loss": 1.9204330444335938, "eval_macro-f1": 0.6658200325646808, "eval_macro-precision": 0.6663228409772431, "eval_macro-recall": 0.6692964824595501, "eval_runtime": 3.5954, "eval_samples_per_second": 257.831, "eval_steps_per_second": 8.066, "eval_weighted-f1": 0.6957701345209284, "eval_weighted-precision": 0.69903863020904, "eval_weighted-recall": 0.6957928802588996, "step": 3480 }, { "epoch": 15.09, "learning_rate": 3.460249042145594e-05, "loss": 0.0775, "step": 3500 }, { "epoch": 16.0, "eval_accuracy": 0.6752966558791802, "eval_loss": 1.9924219846725464, "eval_macro-f1": 0.6392465953749347, "eval_macro-precision": 0.6561467817801693, "eval_macro-recall": 0.6402901277480958, "eval_runtime": 3.6181, "eval_samples_per_second": 256.214, "eval_steps_per_second": 8.015, "eval_weighted-f1": 0.673631619187917, "eval_weighted-precision": 0.6858049526730299, "eval_weighted-recall": 0.6752966558791802, "step": 3712 }, { "epoch": 17.0, "eval_accuracy": 0.686084142394822, "eval_loss": 2.0478932857513428, "eval_macro-f1": 0.6463214194781717, "eval_macro-precision": 0.6570967483791249, "eval_macro-recall": 0.6503589081998865, "eval_runtime": 3.5962, "eval_samples_per_second": 257.771, "eval_steps_per_second": 8.064, "eval_weighted-f1": 0.6852865958011676, "eval_weighted-precision": 0.6939600166349821, "eval_weighted-recall": 0.686084142394822, "step": 3944 }, { "epoch": 17.24, "learning_rate": 3.160919540229885e-05, "loss": 0.0687, "step": 4000 }, { "epoch": 18.0, "eval_accuracy": 0.674217907227616, "eval_loss": 2.0522632598876953, "eval_macro-f1": 0.6405085398516451, "eval_macro-precision": 0.6662302886679464, "eval_macro-recall": 0.6393535717017708, "eval_runtime": 3.6131, "eval_samples_per_second": 256.565, "eval_steps_per_second": 8.026, "eval_weighted-f1": 0.674060546264426, "eval_weighted-precision": 0.6894424888472697, "eval_weighted-recall": 0.674217907227616, "step": 4176 }, { "epoch": 19.0, "eval_accuracy": 0.6828478964401294, "eval_loss": 1.9823521375656128, "eval_macro-f1": 0.651754402283885, "eval_macro-precision": 0.6440801367882046, "eval_macro-recall": 0.6683888090630882, "eval_runtime": 3.6037, "eval_samples_per_second": 257.233, "eval_steps_per_second": 8.047, "eval_weighted-f1": 0.6821812966643793, "eval_weighted-precision": 0.68760298706152, "eval_weighted-recall": 0.6828478964401294, "step": 4408 }, { "epoch": 19.4, "learning_rate": 2.8615900383141765e-05, "loss": 0.0577, "step": 4500 }, { "epoch": 20.0, "eval_accuracy": 0.6914778856526429, "eval_loss": 2.0154926776885986, "eval_macro-f1": 0.6659900934162544, "eval_macro-precision": 0.6712869574383351, "eval_macro-recall": 0.6703643770947892, "eval_runtime": 3.6164, "eval_samples_per_second": 256.332, "eval_steps_per_second": 8.019, "eval_weighted-f1": 0.6906611756556518, "eval_weighted-precision": 0.6985916032921785, "eval_weighted-recall": 0.6914778856526429, "step": 4640 }, { "epoch": 21.0, "eval_accuracy": 0.6828478964401294, "eval_loss": 2.05501389503479, "eval_macro-f1": 0.6486634410090307, "eval_macro-precision": 0.6780446089713759, "eval_macro-recall": 0.6376704510538888, "eval_runtime": 3.5904, "eval_samples_per_second": 258.186, "eval_steps_per_second": 8.077, "eval_weighted-f1": 0.6755183997397308, "eval_weighted-precision": 0.6900556731555942, "eval_weighted-recall": 0.6828478964401294, "step": 4872 }, { "epoch": 21.55, "learning_rate": 2.5622605363984675e-05, "loss": 0.0489, "step": 5000 }, { "epoch": 22.0, "eval_accuracy": 0.6796116504854369, "eval_loss": 2.0277795791625977, "eval_macro-f1": 0.6505710558420628, "eval_macro-precision": 0.6513644147664868, "eval_macro-recall": 0.6562986108294033, "eval_runtime": 3.5971, "eval_samples_per_second": 257.71, "eval_steps_per_second": 8.062, "eval_weighted-f1": 0.6787275977253558, "eval_weighted-precision": 0.6831300629390828, "eval_weighted-recall": 0.6796116504854369, "step": 5104 }, { "epoch": 23.0, "eval_accuracy": 0.6828478964401294, "eval_loss": 2.033602237701416, "eval_macro-f1": 0.6518094813918546, "eval_macro-precision": 0.6546940340503233, "eval_macro-recall": 0.6544133192355402, "eval_runtime": 3.6042, "eval_samples_per_second": 257.199, "eval_steps_per_second": 8.046, "eval_weighted-f1": 0.6812281754499014, "eval_weighted-precision": 0.6846668946206854, "eval_weighted-recall": 0.6828478964401294, "step": 5336 }, { "epoch": 23.71, "learning_rate": 2.2629310344827588e-05, "loss": 0.0431, "step": 5500 }, { "epoch": 24.0, "eval_accuracy": 0.697950377562028, "eval_loss": 2.0058302879333496, "eval_macro-f1": 0.672432519382824, "eval_macro-precision": 0.7031870372780317, "eval_macro-recall": 0.6570710914948313, "eval_runtime": 3.6053, "eval_samples_per_second": 257.123, "eval_steps_per_second": 8.044, "eval_weighted-f1": 0.6956222916424519, "eval_weighted-precision": 0.7050444644388131, "eval_weighted-recall": 0.697950377562028, "step": 5568 }, { "epoch": 25.0, "eval_accuracy": 0.6903991370010788, "eval_loss": 2.0136592388153076, "eval_macro-f1": 0.6593408320415061, "eval_macro-precision": 0.6670076069784594, "eval_macro-recall": 0.6569151453566795, "eval_runtime": 3.6053, "eval_samples_per_second": 257.124, "eval_steps_per_second": 8.044, "eval_weighted-f1": 0.6888694520013063, "eval_weighted-precision": 0.6914605561085521, "eval_weighted-recall": 0.6903991370010788, "step": 5800 }, { "epoch": 25.86, "learning_rate": 1.9636015325670498e-05, "loss": 0.0372, "step": 6000 }, { "epoch": 26.0, "eval_accuracy": 0.6763754045307443, "eval_loss": 2.21449875831604, "eval_macro-f1": 0.6400140077032914, "eval_macro-precision": 0.649984379772116, "eval_macro-recall": 0.6499316041741732, "eval_runtime": 3.6097, "eval_samples_per_second": 256.806, "eval_steps_per_second": 8.034, "eval_weighted-f1": 0.6791315210465927, "eval_weighted-precision": 0.6961390172701661, "eval_weighted-recall": 0.6763754045307443, "step": 6032 }, { "epoch": 27.0, "eval_accuracy": 0.6882416396979504, "eval_loss": 2.1051621437072754, "eval_macro-f1": 0.6563104764431434, "eval_macro-precision": 0.6584303947936838, "eval_macro-recall": 0.6627288799113591, "eval_runtime": 3.5999, "eval_samples_per_second": 257.511, "eval_steps_per_second": 8.056, "eval_weighted-f1": 0.6864978936047862, "eval_weighted-precision": 0.6922661220105522, "eval_weighted-recall": 0.6882416396979504, "step": 6264 }, { "epoch": 28.0, "eval_accuracy": 0.6947141316073355, "eval_loss": 2.096506357192993, "eval_macro-f1": 0.6701111894500894, "eval_macro-precision": 0.6973353037360721, "eval_macro-recall": 0.6569800023874061, "eval_runtime": 3.6151, "eval_samples_per_second": 256.427, "eval_steps_per_second": 8.022, "eval_weighted-f1": 0.6888264314204101, "eval_weighted-precision": 0.6996849509212928, "eval_weighted-recall": 0.6947141316073355, "step": 6496 }, { "epoch": 28.02, "learning_rate": 1.664272030651341e-05, "loss": 0.0357, "step": 6500 }, { "epoch": 29.0, "eval_accuracy": 0.6947141316073355, "eval_loss": 2.094637632369995, "eval_macro-f1": 0.6685294866541375, "eval_macro-precision": 0.6813823548362715, "eval_macro-recall": 0.6641734635067332, "eval_runtime": 3.6038, "eval_samples_per_second": 257.226, "eval_steps_per_second": 8.047, "eval_weighted-f1": 0.6943390563150669, "eval_weighted-precision": 0.7001493288391278, "eval_weighted-recall": 0.6947141316073355, "step": 6728 }, { "epoch": 30.0, "eval_accuracy": 0.6893203883495146, "eval_loss": 2.0883684158325195, "eval_macro-f1": 0.6602023917903279, "eval_macro-precision": 0.6628650635074098, "eval_macro-recall": 0.6659307948401801, "eval_runtime": 3.6134, "eval_samples_per_second": 256.543, "eval_steps_per_second": 8.026, "eval_weighted-f1": 0.6902134268034101, "eval_weighted-precision": 0.6951233806637598, "eval_weighted-recall": 0.6893203883495146, "step": 6960 }, { "epoch": 30.17, "learning_rate": 1.3649425287356324e-05, "loss": 0.0286, "step": 7000 }, { "epoch": 31.0, "eval_accuracy": 0.703344120819849, "eval_loss": 2.126500368118286, "eval_macro-f1": 0.6756885174044472, "eval_macro-precision": 0.6887882836127188, "eval_macro-recall": 0.6701991060577401, "eval_runtime": 3.6102, "eval_samples_per_second": 256.771, "eval_steps_per_second": 8.033, "eval_weighted-f1": 0.7025418148884912, "eval_weighted-precision": 0.7070891495572698, "eval_weighted-recall": 0.703344120819849, "step": 7192 }, { "epoch": 32.0, "eval_accuracy": 0.7022653721682848, "eval_loss": 2.110332489013672, "eval_macro-f1": 0.6838614548013121, "eval_macro-precision": 0.6918280242510296, "eval_macro-recall": 0.6819433903345183, "eval_runtime": 3.6092, "eval_samples_per_second": 256.84, "eval_steps_per_second": 8.035, "eval_weighted-f1": 0.7015731601193966, "eval_weighted-precision": 0.7068600524270214, "eval_weighted-recall": 0.7022653721682848, "step": 7424 }, { "epoch": 32.33, "learning_rate": 1.0656130268199234e-05, "loss": 0.0248, "step": 7500 }, { "epoch": 33.0, "eval_accuracy": 0.6925566343042071, "eval_loss": 2.076742649078369, "eval_macro-f1": 0.6677185111141611, "eval_macro-precision": 0.6682997232361648, "eval_macro-recall": 0.6726620472560384, "eval_runtime": 3.6011, "eval_samples_per_second": 257.423, "eval_steps_per_second": 8.053, "eval_weighted-f1": 0.693162535561584, "eval_weighted-precision": 0.6976019284855784, "eval_weighted-recall": 0.6925566343042071, "step": 7656 }, { "epoch": 34.0, "eval_accuracy": 0.686084142394822, "eval_loss": 2.1223480701446533, "eval_macro-f1": 0.661874148838316, "eval_macro-precision": 0.6610753547813631, "eval_macro-recall": 0.6713446322584083, "eval_runtime": 3.6215, "eval_samples_per_second": 255.972, "eval_steps_per_second": 8.008, "eval_weighted-f1": 0.6841907777006906, "eval_weighted-precision": 0.6894138997369361, "eval_weighted-recall": 0.686084142394822, "step": 7888 }, { "epoch": 34.48, "learning_rate": 7.662835249042145e-06, "loss": 0.0235, "step": 8000 }, { "epoch": 35.0, "eval_accuracy": 0.6903991370010788, "eval_loss": 2.133087635040283, "eval_macro-f1": 0.662335562798904, "eval_macro-precision": 0.6736920728738612, "eval_macro-recall": 0.6597960660569285, "eval_runtime": 3.6194, "eval_samples_per_second": 256.117, "eval_steps_per_second": 8.012, "eval_weighted-f1": 0.6874362655396534, "eval_weighted-precision": 0.6919778724812079, "eval_weighted-recall": 0.6903991370010788, "step": 8120 }, { "epoch": 36.0, "eval_accuracy": 0.697950377562028, "eval_loss": 2.122838020324707, "eval_macro-f1": 0.670156602236844, "eval_macro-precision": 0.6690594327579752, "eval_macro-recall": 0.6751260562968397, "eval_runtime": 3.6045, "eval_samples_per_second": 257.182, "eval_steps_per_second": 8.046, "eval_weighted-f1": 0.6980890949772001, "eval_weighted-precision": 0.7007470226449659, "eval_weighted-recall": 0.697950377562028, "step": 8352 }, { "epoch": 36.64, "learning_rate": 4.669540229885057e-06, "loss": 0.0211, "step": 8500 }, { "epoch": 37.0, "eval_accuracy": 0.6990291262135923, "eval_loss": 2.1283230781555176, "eval_macro-f1": 0.6734553818830601, "eval_macro-precision": 0.6869153083999092, "eval_macro-recall": 0.6662304826991132, "eval_runtime": 3.6053, "eval_samples_per_second": 257.124, "eval_steps_per_second": 8.044, "eval_weighted-f1": 0.6984888091028846, "eval_weighted-precision": 0.7035914434375581, "eval_weighted-recall": 0.6990291262135923, "step": 8584 }, { "epoch": 38.0, "eval_accuracy": 0.6957928802588996, "eval_loss": 2.121021270751953, "eval_macro-f1": 0.6693193516472609, "eval_macro-precision": 0.6852915803739035, "eval_macro-recall": 0.6603953493083169, "eval_runtime": 3.5959, "eval_samples_per_second": 257.795, "eval_steps_per_second": 8.065, "eval_weighted-f1": 0.6937517174106657, "eval_weighted-precision": 0.6980351944602388, "eval_weighted-recall": 0.6957928802588996, "step": 8816 }, { "epoch": 38.79, "learning_rate": 1.6762452107279694e-06, "loss": 0.0175, "step": 9000 }, { "epoch": 39.0, "eval_accuracy": 0.6957928802588996, "eval_loss": 2.125298023223877, "eval_macro-f1": 0.670175716053461, "eval_macro-precision": 0.6823954080156959, "eval_macro-recall": 0.6637507266661784, "eval_runtime": 3.6051, "eval_samples_per_second": 257.138, "eval_steps_per_second": 8.044, "eval_weighted-f1": 0.6946838990403312, "eval_weighted-precision": 0.6981818094755908, "eval_weighted-recall": 0.6957928802588996, "step": 9048 }, { "epoch": 40.0, "eval_accuracy": 0.697950377562028, "eval_loss": 2.123837471008301, "eval_macro-f1": 0.672888533432289, "eval_macro-precision": 0.6846457775029204, "eval_macro-recall": 0.6663735016267468, "eval_runtime": 3.6082, "eval_samples_per_second": 256.916, "eval_steps_per_second": 8.037, "eval_weighted-f1": 0.6972181365180552, "eval_weighted-precision": 0.700475447211827, "eval_weighted-recall": 0.697950377562028, "step": 9280 } ], "logging_steps": 500, "max_steps": 9280, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 3.933393471111168e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }