{ "best_metric": 0.13405902683734894, "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/DinoVdeau_Aina-large-2024_06_12-batch-size32_epochs150_freeze/checkpoint-7564", "epoch": 71.0, "eval_steps": 500, "global_step": 8804, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5342987804878049, "eval_f1_macro": 0.42604577042963465, "eval_f1_micro": 0.7703523693803159, "eval_loss": 0.25564512610435486, "eval_roc_auc": 0.8456742309995622, "eval_runtime": 83.2883, "eval_samples_per_second": 15.753, "eval_steps_per_second": 0.492, "learning_rate": 0.001, "step": 124 }, { "epoch": 2.0, "eval_accuracy": 0.5449695121951219, "eval_f1_macro": 0.40573815087342935, "eval_f1_micro": 0.7607875994972768, "eval_loss": 0.1855568140745163, "eval_roc_auc": 0.832142389843843, "eval_runtime": 79.7287, "eval_samples_per_second": 16.456, "eval_steps_per_second": 0.514, "learning_rate": 0.001, "step": 248 }, { "epoch": 3.0, "eval_accuracy": 0.5335365853658537, "eval_f1_macro": 0.47696929852236075, "eval_f1_micro": 0.7772908366533864, "eval_loss": 0.15800759196281433, "eval_roc_auc": 0.8537812997478742, "eval_runtime": 80.1553, "eval_samples_per_second": 16.368, "eval_steps_per_second": 0.512, "learning_rate": 0.001, "step": 372 }, { "epoch": 4.0, "eval_accuracy": 0.5274390243902439, "eval_f1_macro": 0.39442465091480916, "eval_f1_micro": 0.7700247729149464, "eval_loss": 0.15843382477760315, "eval_roc_auc": 0.8406658939058698, "eval_runtime": 80.1541, "eval_samples_per_second": 16.368, "eval_steps_per_second": 0.512, "learning_rate": 0.001, "step": 496 }, { "epoch": 4.032258064516129, "grad_norm": 0.4048711955547333, "learning_rate": 0.001, "loss": 0.2548, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.5442073170731707, "eval_f1_macro": 0.3747774830573479, "eval_f1_micro": 0.761375774407178, "eval_loss": 0.15615205466747284, "eval_roc_auc": 0.8277699238925672, "eval_runtime": 80.3203, "eval_samples_per_second": 16.335, "eval_steps_per_second": 0.51, "learning_rate": 0.001, "step": 620 }, { "epoch": 6.0, "eval_accuracy": 0.5548780487804879, "eval_f1_macro": 0.4447304719957427, "eval_f1_micro": 0.7822349570200573, "eval_loss": 0.1484147012233734, "eval_roc_auc": 0.8494905062813375, "eval_runtime": 80.7491, "eval_samples_per_second": 16.248, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 744 }, { "epoch": 7.0, "eval_accuracy": 0.5647865853658537, "eval_f1_macro": 0.5284590192832462, "eval_f1_micro": 0.7904456041750301, "eval_loss": 0.1444726288318634, "eval_roc_auc": 0.8591234560859945, "eval_runtime": 80.5725, "eval_samples_per_second": 16.283, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 868 }, { "epoch": 8.0, "eval_accuracy": 0.5487804878048781, "eval_f1_macro": 0.48048557941722936, "eval_f1_micro": 0.7778469197261979, "eval_loss": 0.14579781889915466, "eval_roc_auc": 0.8436993085412435, "eval_runtime": 80.707, "eval_samples_per_second": 16.256, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 992 }, { "epoch": 8.064516129032258, "grad_norm": 0.3004395663738251, "learning_rate": 0.001, "loss": 0.1582, "step": 1000 }, { "epoch": 9.0, "eval_accuracy": 0.5655487804878049, "eval_f1_macro": 0.4775190765596756, "eval_f1_micro": 0.7989700930877401, "eval_loss": 0.1419043242931366, "eval_roc_auc": 0.8675275442813744, "eval_runtime": 80.638, "eval_samples_per_second": 16.27, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 1116 }, { "epoch": 10.0, "eval_accuracy": 0.5663109756097561, "eval_f1_macro": 0.5507171237721333, "eval_f1_micro": 0.7916750858759345, "eval_loss": 0.14263293147087097, "eval_roc_auc": 0.8580334062479557, "eval_runtime": 80.2636, "eval_samples_per_second": 16.346, "eval_steps_per_second": 0.511, "learning_rate": 0.001, "step": 1240 }, { "epoch": 11.0, "eval_accuracy": 0.555640243902439, "eval_f1_macro": 0.43142510371678144, "eval_f1_micro": 0.7765845441145505, "eval_loss": 0.1459268480539322, "eval_roc_auc": 0.8392686548761934, "eval_runtime": 80.6868, "eval_samples_per_second": 16.26, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 1364 }, { "epoch": 12.0, "eval_accuracy": 0.5625, "eval_f1_macro": 0.47044459599035204, "eval_f1_micro": 0.7899022801302932, "eval_loss": 0.14237171411514282, "eval_roc_auc": 0.8550704459254762, "eval_runtime": 81.9396, "eval_samples_per_second": 16.012, "eval_steps_per_second": 0.5, "learning_rate": 0.001, "step": 1488 }, { "epoch": 12.096774193548388, "grad_norm": 0.25983572006225586, "learning_rate": 0.001, "loss": 0.149, "step": 1500 }, { "epoch": 13.0, "eval_accuracy": 0.5586890243902439, "eval_f1_macro": 0.5304167061533165, "eval_f1_micro": 0.7957159857199523, "eval_loss": 0.14195148646831512, "eval_roc_auc": 0.8653116932352117, "eval_runtime": 80.549, "eval_samples_per_second": 16.288, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 1612 }, { "epoch": 14.0, "eval_accuracy": 0.5685975609756098, "eval_f1_macro": 0.5302959882181708, "eval_f1_micro": 0.795869737887212, "eval_loss": 0.14145776629447937, "eval_roc_auc": 0.8650728615019182, "eval_runtime": 80.4626, "eval_samples_per_second": 16.306, "eval_steps_per_second": 0.51, "learning_rate": 0.001, "step": 1736 }, { "epoch": 15.0, "eval_accuracy": 0.5746951219512195, "eval_f1_macro": 0.5020556893881151, "eval_f1_micro": 0.7903159622486664, "eval_loss": 0.14466659724712372, "eval_roc_auc": 0.8532598120211833, "eval_runtime": 80.906, "eval_samples_per_second": 16.216, "eval_steps_per_second": 0.507, "learning_rate": 0.001, "step": 1860 }, { "epoch": 16.0, "eval_accuracy": 0.5548780487804879, "eval_f1_macro": 0.44075758480309546, "eval_f1_micro": 0.7779618889809444, "eval_loss": 0.15050330758094788, "eval_roc_auc": 0.8441269620268028, "eval_runtime": 80.6226, "eval_samples_per_second": 16.273, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 1984 }, { "epoch": 16.129032258064516, "grad_norm": 0.3009089231491089, "learning_rate": 0.001, "loss": 0.1425, "step": 2000 }, { "epoch": 17.0, "eval_accuracy": 0.5625, "eval_f1_macro": 0.5071297581342885, "eval_f1_micro": 0.7849117174959872, "eval_loss": 0.15037894248962402, "eval_roc_auc": 0.8561482291643951, "eval_runtime": 81.6976, "eval_samples_per_second": 16.059, "eval_steps_per_second": 0.502, "learning_rate": 0.001, "step": 2108 }, { "epoch": 18.0, "eval_accuracy": 0.5632621951219512, "eval_f1_macro": 0.4937774793717716, "eval_f1_micro": 0.7868521879411171, "eval_loss": 0.15835434198379517, "eval_roc_auc": 0.8558981792592568, "eval_runtime": 80.6141, "eval_samples_per_second": 16.275, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 2232 }, { "epoch": 19.0, "eval_accuracy": 0.5678353658536586, "eval_f1_macro": 0.52511737039556, "eval_f1_micro": 0.795441147573197, "eval_loss": 0.13946771621704102, "eval_roc_auc": 0.8677040994345202, "eval_runtime": 80.1025, "eval_samples_per_second": 16.379, "eval_steps_per_second": 0.512, "learning_rate": 0.001, "step": 2356 }, { "epoch": 20.0, "eval_accuracy": 0.5678353658536586, "eval_f1_macro": 0.5902607630639873, "eval_f1_micro": 0.8031007751937984, "eval_loss": 0.1404852569103241, "eval_roc_auc": 0.875969375858662, "eval_runtime": 80.4024, "eval_samples_per_second": 16.318, "eval_steps_per_second": 0.51, "learning_rate": 0.001, "step": 2480 }, { "epoch": 20.161290322580644, "grad_norm": 0.2623468041419983, "learning_rate": 0.001, "loss": 0.1405, "step": 2500 }, { "epoch": 21.0, "eval_accuracy": 0.5640243902439024, "eval_f1_macro": 0.4816903721736541, "eval_f1_micro": 0.7965933848286789, "eval_loss": 0.14341644942760468, "eval_roc_auc": 0.866186396110922, "eval_runtime": 80.893, "eval_samples_per_second": 16.219, "eval_steps_per_second": 0.507, "learning_rate": 0.001, "step": 2604 }, { "epoch": 22.0, "eval_accuracy": 0.5510670731707317, "eval_f1_macro": 0.5288404087153705, "eval_f1_micro": 0.7923046721633294, "eval_loss": 0.14662735164165497, "eval_roc_auc": 0.86618943655002, "eval_runtime": 81.0949, "eval_samples_per_second": 16.179, "eval_steps_per_second": 0.506, "learning_rate": 0.001, "step": 2728 }, { "epoch": 23.0, "eval_accuracy": 0.5746951219512195, "eval_f1_macro": 0.4974177137762122, "eval_f1_micro": 0.7918968692449356, "eval_loss": 0.14562036097049713, "eval_roc_auc": 0.8548203960203378, "eval_runtime": 80.619, "eval_samples_per_second": 16.274, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 2852 }, { "epoch": 24.0, "eval_accuracy": 0.5586890243902439, "eval_f1_macro": 0.5008214078450646, "eval_f1_micro": 0.7888934258881176, "eval_loss": 0.13980671763420105, "eval_roc_auc": 0.8537680895642065, "eval_runtime": 80.4871, "eval_samples_per_second": 16.301, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 2976 }, { "epoch": 24.193548387096776, "grad_norm": 0.23391689360141754, "learning_rate": 0.001, "loss": 0.1376, "step": 3000 }, { "epoch": 25.0, "eval_accuracy": 0.5807926829268293, "eval_f1_macro": 0.5881062115996991, "eval_f1_micro": 0.8018232263178755, "eval_loss": 0.13920389115810394, "eval_roc_auc": 0.8689728012802556, "eval_runtime": 80.5799, "eval_samples_per_second": 16.282, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 3100 }, { "epoch": 26.0, "eval_accuracy": 0.5846036585365854, "eval_f1_macro": 0.5378532463138629, "eval_f1_micro": 0.8024120603015076, "eval_loss": 0.14581459760665894, "eval_roc_auc": 0.8654015434526975, "eval_runtime": 80.464, "eval_samples_per_second": 16.305, "eval_steps_per_second": 0.51, "learning_rate": 0.001, "step": 3224 }, { "epoch": 27.0, "eval_accuracy": 0.5716463414634146, "eval_f1_macro": 0.530975959272302, "eval_f1_micro": 0.7989535117729925, "eval_loss": 0.1388114094734192, "eval_roc_auc": 0.8631509894637253, "eval_runtime": 80.2786, "eval_samples_per_second": 16.343, "eval_steps_per_second": 0.511, "learning_rate": 0.001, "step": 3348 }, { "epoch": 28.0, "eval_accuracy": 0.5647865853658537, "eval_f1_macro": 0.4926175534546719, "eval_f1_micro": 0.7952771662997797, "eval_loss": 0.14750176668167114, "eval_roc_auc": 0.8626263564552087, "eval_runtime": 82.3178, "eval_samples_per_second": 15.938, "eval_steps_per_second": 0.498, "learning_rate": 0.001, "step": 3472 }, { "epoch": 28.225806451612904, "grad_norm": 0.22590430080890656, "learning_rate": 0.001, "loss": 0.1361, "step": 3500 }, { "epoch": 29.0, "eval_accuracy": 0.5754573170731707, "eval_f1_macro": 0.4730643714031951, "eval_f1_micro": 0.7916238965304866, "eval_loss": 0.14280082285404205, "eval_roc_auc": 0.8538109702397628, "eval_runtime": 80.6442, "eval_samples_per_second": 16.269, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 3596 }, { "epoch": 30.0, "eval_accuracy": 0.5647865853658537, "eval_f1_macro": 0.5269721557360777, "eval_f1_micro": 0.7960474308300396, "eval_loss": 0.14457112550735474, "eval_roc_auc": 0.866475237825243, "eval_runtime": 81.7071, "eval_samples_per_second": 16.057, "eval_steps_per_second": 0.502, "learning_rate": 0.001, "step": 3720 }, { "epoch": 31.0, "eval_accuracy": 0.555640243902439, "eval_f1_macro": 0.5163333467519122, "eval_f1_micro": 0.7859719438877755, "eval_loss": 0.2518298327922821, "eval_roc_auc": 0.8570576349829149, "eval_runtime": 80.7326, "eval_samples_per_second": 16.251, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 3844 }, { "epoch": 32.0, "eval_accuracy": 0.573170731707317, "eval_f1_macro": 0.5223572225800281, "eval_f1_micro": 0.7984836392657622, "eval_loss": 0.13625293970108032, "eval_roc_auc": 0.8652351580441212, "eval_runtime": 80.8064, "eval_samples_per_second": 16.236, "eval_steps_per_second": 0.507, "learning_rate": 0.001, "step": 3968 }, { "epoch": 32.25806451612903, "grad_norm": 0.28910037875175476, "learning_rate": 0.001, "loss": 0.1362, "step": 4000 }, { "epoch": 33.0, "eval_accuracy": 0.583079268292683, "eval_f1_macro": 0.5094906036573911, "eval_f1_micro": 0.797995991983968, "eval_loss": 0.14134813845157623, "eval_roc_auc": 0.8637633758351773, "eval_runtime": 82.4025, "eval_samples_per_second": 15.922, "eval_steps_per_second": 0.498, "learning_rate": 0.001, "step": 4092 }, { "epoch": 34.0, "eval_accuracy": 0.5617378048780488, "eval_f1_macro": 0.539855430008089, "eval_f1_micro": 0.7939271255060729, "eval_loss": 0.13918223977088928, "eval_roc_auc": 0.8587927821233924, "eval_runtime": 80.9915, "eval_samples_per_second": 16.199, "eval_steps_per_second": 0.506, "learning_rate": 0.001, "step": 4216 }, { "epoch": 35.0, "eval_accuracy": 0.5876524390243902, "eval_f1_macro": 0.48732799827507844, "eval_f1_micro": 0.8020833333333333, "eval_loss": 0.14003774523735046, "eval_roc_auc": 0.8661527415953876, "eval_runtime": 81.3186, "eval_samples_per_second": 16.134, "eval_steps_per_second": 0.504, "learning_rate": 0.001, "step": 4340 }, { "epoch": 36.0, "eval_accuracy": 0.5853658536585366, "eval_f1_macro": 0.514951340134722, "eval_f1_micro": 0.8016096579476861, "eval_loss": 0.15099692344665527, "eval_roc_auc": 0.8646809593864436, "eval_runtime": 80.5701, "eval_samples_per_second": 16.284, "eval_steps_per_second": 0.509, "learning_rate": 0.001, "step": 4464 }, { "epoch": 36.29032258064516, "grad_norm": 0.24523891508579254, "learning_rate": 0.001, "loss": 0.1331, "step": 4500 }, { "epoch": 37.0, "eval_accuracy": 0.5571646341463414, "eval_f1_macro": 0.48501044595638915, "eval_f1_micro": 0.7900541407659917, "eval_loss": 0.14428909122943878, "eval_roc_auc": 0.8591734660670223, "eval_runtime": 80.6859, "eval_samples_per_second": 16.261, "eval_steps_per_second": 0.508, "learning_rate": 0.001, "step": 4588 }, { "epoch": 38.0, "eval_accuracy": 0.5708841463414634, "eval_f1_macro": 0.5062759605333714, "eval_f1_micro": 0.7945869521308826, "eval_loss": 0.14405055344104767, "eval_roc_auc": 0.8597521979229398, "eval_runtime": 83.2275, "eval_samples_per_second": 15.764, "eval_steps_per_second": 0.493, "learning_rate": 0.001, "step": 4712 }, { "epoch": 39.0, "eval_accuracy": 0.5891768292682927, "eval_f1_macro": 0.535367946456459, "eval_f1_micro": 0.8024193548387097, "eval_loss": 0.13543353974819183, "eval_roc_auc": 0.8645809394243883, "eval_runtime": 80.2873, "eval_samples_per_second": 16.341, "eval_steps_per_second": 0.511, "learning_rate": 0.0001, "step": 4836 }, { "epoch": 40.0, "eval_accuracy": 0.59375, "eval_f1_macro": 0.5341458086147945, "eval_f1_micro": 0.8035498184751916, "eval_loss": 0.1358059197664261, "eval_roc_auc": 0.8650973947001582, "eval_runtime": 80.5322, "eval_samples_per_second": 16.292, "eval_steps_per_second": 0.509, "learning_rate": 0.0001, "step": 4960 }, { "epoch": 40.32258064516129, "grad_norm": 0.21013255417346954, "learning_rate": 0.0001, "loss": 0.1273, "step": 5000 }, { "epoch": 41.0, "eval_accuracy": 0.5945121951219512, "eval_f1_macro": 0.5376190537551451, "eval_f1_micro": 0.8055001992825829, "eval_loss": 0.13504844903945923, "eval_roc_auc": 0.8694974342887721, "eval_runtime": 80.151, "eval_samples_per_second": 16.369, "eval_steps_per_second": 0.512, "learning_rate": 0.0001, "step": 5084 }, { "epoch": 42.0, "eval_accuracy": 0.5891768292682927, "eval_f1_macro": 0.5479834256786992, "eval_f1_micro": 0.8035073734555599, "eval_loss": 0.13691695034503937, "eval_roc_auc": 0.8683798108133951, "eval_runtime": 80.2889, "eval_samples_per_second": 16.341, "eval_steps_per_second": 0.511, "learning_rate": 0.0001, "step": 5208 }, { "epoch": 43.0, "eval_accuracy": 0.586890243902439, "eval_f1_macro": 0.5561858306297028, "eval_f1_micro": 0.8031840796019901, "eval_loss": 0.13574542105197906, "eval_roc_auc": 0.8685839396038789, "eval_runtime": 80.354, "eval_samples_per_second": 16.328, "eval_steps_per_second": 0.51, "learning_rate": 0.0001, "step": 5332 }, { "epoch": 44.0, "eval_accuracy": 0.5891768292682927, "eval_f1_macro": 0.5433240630212841, "eval_f1_micro": 0.8039920159680638, "eval_loss": 0.13488240540027618, "eval_roc_auc": 0.8682103849657206, "eval_runtime": 81.5739, "eval_samples_per_second": 16.084, "eval_steps_per_second": 0.503, "learning_rate": 0.0001, "step": 5456 }, { "epoch": 44.354838709677416, "grad_norm": 0.20432598888874054, "learning_rate": 0.0001, "loss": 0.1173, "step": 5500 }, { "epoch": 45.0, "eval_accuracy": 0.5815548780487805, "eval_f1_macro": 0.5564064426334538, "eval_f1_micro": 0.8024593415311385, "eval_loss": 0.1361350268125534, "eval_roc_auc": 0.8691116130514936, "eval_runtime": 80.2107, "eval_samples_per_second": 16.357, "eval_steps_per_second": 0.511, "learning_rate": 0.0001, "step": 5580 }, { "epoch": 46.0, "eval_accuracy": 0.59375, "eval_f1_macro": 0.5738450359977211, "eval_f1_micro": 0.8072669826224328, "eval_loss": 0.1349516659975052, "eval_roc_auc": 0.8730421669062672, "eval_runtime": 79.7909, "eval_samples_per_second": 16.443, "eval_steps_per_second": 0.514, "learning_rate": 0.0001, "step": 5704 }, { "epoch": 47.0, "eval_accuracy": 0.586890243902439, "eval_f1_macro": 0.5398873319319681, "eval_f1_micro": 0.8035892323030908, "eval_loss": 0.13875022530555725, "eval_roc_auc": 0.8682603949467482, "eval_runtime": 80.964, "eval_samples_per_second": 16.205, "eval_steps_per_second": 0.506, "learning_rate": 0.0001, "step": 5828 }, { "epoch": 48.0, "eval_accuracy": 0.586890243902439, "eval_f1_macro": 0.5475297928828294, "eval_f1_micro": 0.8029458598726115, "eval_loss": 0.1370573788881302, "eval_roc_auc": 0.8683951178516132, "eval_runtime": 81.4744, "eval_samples_per_second": 16.103, "eval_steps_per_second": 0.503, "learning_rate": 0.0001, "step": 5952 }, { "epoch": 48.38709677419355, "grad_norm": 0.27053794264793396, "learning_rate": 0.0001, "loss": 0.113, "step": 6000 }, { "epoch": 49.0, "eval_accuracy": 0.5769817073170732, "eval_f1_macro": 0.5558247567089973, "eval_f1_micro": 0.800396432111001, "eval_loss": 0.13690504431724548, "eval_roc_auc": 0.8681134054427634, "eval_runtime": 81.5829, "eval_samples_per_second": 16.082, "eval_steps_per_second": 0.503, "learning_rate": 0.0001, "step": 6076 }, { "epoch": 50.0, "eval_accuracy": 0.5853658536585366, "eval_f1_macro": 0.5774903390899776, "eval_f1_micro": 0.8055390702274975, "eval_loss": 0.13561294972896576, "eval_roc_auc": 0.87156629583095, "eval_runtime": 81.4442, "eval_samples_per_second": 16.109, "eval_steps_per_second": 0.503, "learning_rate": 0.0001, "step": 6200 }, { "epoch": 51.0, "eval_accuracy": 0.5899390243902439, "eval_f1_macro": 0.5639229458996943, "eval_f1_micro": 0.8056984566679858, "eval_loss": 0.13570135831832886, "eval_roc_auc": 0.8716009987737595, "eval_runtime": 82.1135, "eval_samples_per_second": 15.978, "eval_steps_per_second": 0.499, "learning_rate": 1e-05, "step": 6324 }, { "epoch": 52.0, "eval_accuracy": 0.5876524390243902, "eval_f1_macro": 0.5523965140127979, "eval_f1_micro": 0.803262383131092, "eval_loss": 0.13534915447235107, "eval_roc_auc": 0.8687380584133353, "eval_runtime": 81.0519, "eval_samples_per_second": 16.187, "eval_steps_per_second": 0.506, "learning_rate": 1e-05, "step": 6448 }, { "epoch": 52.41935483870968, "grad_norm": 0.23621758818626404, "learning_rate": 1e-05, "loss": 0.1109, "step": 6500 }, { "epoch": 53.0, "eval_accuracy": 0.5891768292682927, "eval_f1_macro": 0.5553165219596603, "eval_f1_micro": 0.8047619047619048, "eval_loss": 0.13460643589496613, "eval_roc_auc": 0.8702986424124899, "eval_runtime": 80.6286, "eval_samples_per_second": 16.272, "eval_steps_per_second": 0.509, "learning_rate": 1e-05, "step": 6572 }, { "epoch": 54.0, "eval_accuracy": 0.586890243902439, "eval_f1_macro": 0.5513512052592927, "eval_f1_micro": 0.80398406374502, "eval_loss": 0.13505160808563232, "eval_roc_auc": 0.8687574543179267, "eval_runtime": 81.4945, "eval_samples_per_second": 16.099, "eval_steps_per_second": 0.503, "learning_rate": 1e-05, "step": 6696 }, { "epoch": 55.0, "eval_accuracy": 0.5899390243902439, "eval_f1_macro": 0.5586243767362515, "eval_f1_micro": 0.8044295036582955, "eval_loss": 0.13664333522319794, "eval_roc_auc": 0.8710498405551801, "eval_runtime": 81.1963, "eval_samples_per_second": 16.158, "eval_steps_per_second": 0.505, "learning_rate": 1e-05, "step": 6820 }, { "epoch": 56.0, "eval_accuracy": 0.5876524390243902, "eval_f1_macro": 0.559616045636613, "eval_f1_micro": 0.8056215360253365, "eval_loss": 0.13582760095596313, "eval_roc_auc": 0.8714468799643031, "eval_runtime": 80.4045, "eval_samples_per_second": 16.318, "eval_steps_per_second": 0.51, "learning_rate": 1e-05, "step": 6944 }, { "epoch": 56.45161290322581, "grad_norm": 0.25219833850860596, "learning_rate": 1e-05, "loss": 0.1091, "step": 7000 }, { "epoch": 57.0, "eval_accuracy": 0.5891768292682927, "eval_f1_macro": 0.5592413950447728, "eval_f1_micro": 0.8046205935072694, "eval_loss": 0.1356770098209381, "eval_roc_auc": 0.869169800765268, "eval_runtime": 81.011, "eval_samples_per_second": 16.195, "eval_steps_per_second": 0.506, "learning_rate": 1e-05, "step": 7068 }, { "epoch": 58.0, "eval_accuracy": 0.586890243902439, "eval_f1_macro": 0.5652648109787017, "eval_f1_micro": 0.804201347602061, "eval_loss": 0.13557715713977814, "eval_roc_auc": 0.8703139494507081, "eval_runtime": 80.24, "eval_samples_per_second": 16.351, "eval_steps_per_second": 0.511, "learning_rate": 1e-05, "step": 7192 }, { "epoch": 59.0, "eval_accuracy": 0.5884146341463414, "eval_f1_macro": 0.564174198827338, "eval_f1_micro": 0.806522171405846, "eval_loss": 0.13481777906417847, "eval_roc_auc": 0.8706803747833949, "eval_runtime": 81.6324, "eval_samples_per_second": 16.072, "eval_steps_per_second": 0.502, "learning_rate": 1e-05, "step": 7316 }, { "epoch": 60.0, "eval_accuracy": 0.5899390243902439, "eval_f1_macro": 0.5543815900756193, "eval_f1_micro": 0.8047030689517736, "eval_loss": 0.13674791157245636, "eval_roc_auc": 0.8690503848986212, "eval_runtime": 80.1415, "eval_samples_per_second": 16.371, "eval_steps_per_second": 0.512, "learning_rate": 1.0000000000000002e-06, "step": 7440 }, { "epoch": 60.483870967741936, "grad_norm": 0.2383713722229004, "learning_rate": 1.0000000000000002e-06, "loss": 0.109, "step": 7500 }, { "epoch": 61.0, "eval_accuracy": 0.5876524390243902, "eval_f1_macro": 0.5561036196553861, "eval_f1_micro": 0.8045563549160671, "eval_loss": 0.13405902683734894, "eval_roc_auc": 0.8681950779275025, "eval_runtime": 80.1504, "eval_samples_per_second": 16.369, "eval_steps_per_second": 0.512, "learning_rate": 1.0000000000000002e-06, "step": 7564 }, { "epoch": 62.0, "eval_accuracy": 0.5861280487804879, "eval_f1_macro": 0.5646122700508593, "eval_f1_micro": 0.8052360174533915, "eval_loss": 0.134480819106102, "eval_roc_auc": 0.8706762859170216, "eval_runtime": 81.4224, "eval_samples_per_second": 16.114, "eval_steps_per_second": 0.504, "learning_rate": 1.0000000000000002e-06, "step": 7688 }, { "epoch": 63.0, "eval_accuracy": 0.5899390243902439, "eval_f1_macro": 0.5660740924208384, "eval_f1_micro": 0.805958291956306, "eval_loss": 0.13709656894207, "eval_roc_auc": 0.870695681821613, "eval_runtime": 81.6749, "eval_samples_per_second": 16.064, "eval_steps_per_second": 0.502, "learning_rate": 1.0000000000000002e-06, "step": 7812 }, { "epoch": 64.0, "eval_accuracy": 0.5929878048780488, "eval_f1_macro": 0.5521602551489271, "eval_f1_micro": 0.8056551174830745, "eval_loss": 0.13475064933300018, "eval_roc_auc": 0.8698056719076847, "eval_runtime": 81.2325, "eval_samples_per_second": 16.151, "eval_steps_per_second": 0.505, "learning_rate": 1.0000000000000002e-06, "step": 7936 }, { "epoch": 64.51612903225806, "grad_norm": 0.19244849681854248, "learning_rate": 1.0000000000000002e-06, "loss": 0.1082, "step": 8000 }, { "epoch": 65.0, "eval_accuracy": 0.5899390243902439, "eval_f1_macro": 0.5594424812390786, "eval_f1_micro": 0.8048345551812959, "eval_loss": 0.13584908843040466, "eval_roc_auc": 0.8707262958980493, "eval_runtime": 82.3082, "eval_samples_per_second": 15.94, "eval_steps_per_second": 0.498, "learning_rate": 1.0000000000000002e-06, "step": 8060 }, { "epoch": 66.0, "eval_accuracy": 0.5853658536585366, "eval_f1_macro": 0.5680592850157375, "eval_f1_micro": 0.8040500297796307, "eval_loss": 0.13522611558437347, "eval_roc_auc": 0.8697321771556923, "eval_runtime": 80.639, "eval_samples_per_second": 16.27, "eval_steps_per_second": 0.508, "learning_rate": 1.0000000000000002e-06, "step": 8184 }, { "epoch": 67.0, "eval_accuracy": 0.5876524390243902, "eval_f1_macro": 0.5689511664868826, "eval_f1_micro": 0.8059288537549407, "eval_loss": 0.1348220705986023, "eval_roc_auc": 0.8720633552021285, "eval_runtime": 81.4283, "eval_samples_per_second": 16.112, "eval_steps_per_second": 0.504, "learning_rate": 1.0000000000000002e-06, "step": 8308 }, { "epoch": 68.0, "eval_accuracy": 0.5853658536585366, "eval_f1_macro": 0.5680973012829017, "eval_f1_micro": 0.8046843985708614, "eval_loss": 0.13475003838539124, "eval_roc_auc": 0.8701445236030336, "eval_runtime": 81.5625, "eval_samples_per_second": 16.086, "eval_steps_per_second": 0.503, "learning_rate": 1.0000000000000002e-07, "step": 8432 }, { "epoch": 68.54838709677419, "grad_norm": 0.25118520855903625, "learning_rate": 1.0000000000000002e-07, "loss": 0.1091, "step": 8500 }, { "epoch": 69.0, "eval_accuracy": 0.5884146341463414, "eval_f1_macro": 0.5523522321966495, "eval_f1_micro": 0.8036036036036036, "eval_loss": 0.13508079946041107, "eval_roc_auc": 0.867166256242336, "eval_runtime": 82.3919, "eval_samples_per_second": 15.924, "eval_steps_per_second": 0.498, "learning_rate": 1.0000000000000002e-07, "step": 8556 }, { "epoch": 70.0, "eval_accuracy": 0.5853658536585366, "eval_f1_macro": 0.567977027563922, "eval_f1_micro": 0.8053744319304486, "eval_loss": 0.13654960691928864, "eval_roc_auc": 0.8718051275642436, "eval_runtime": 81.6414, "eval_samples_per_second": 16.07, "eval_steps_per_second": 0.502, "learning_rate": 1.0000000000000002e-07, "step": 8680 }, { "epoch": 71.0, "eval_accuracy": 0.5907012195121951, "eval_f1_macro": 0.5700988754371263, "eval_f1_micro": 0.8072885719944544, "eval_loss": 0.13474920392036438, "eval_roc_auc": 0.872221562877958, "eval_runtime": 82.9116, "eval_samples_per_second": 15.824, "eval_steps_per_second": 0.495, "learning_rate": 1.0000000000000002e-07, "step": 8804 }, { "epoch": 71.0, "learning_rate": 1.0000000000000002e-07, "step": 8804, "total_flos": 4.157361617896582e+20, "train_loss": 0.13389379075850644, "train_runtime": 24026.1554, "train_samples_per_second": 24.654, "train_steps_per_second": 0.774 } ], "logging_steps": 500, "max_steps": 18600, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.157361617896582e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }