{ "best_metric": 0.8538806684223458, "best_model_checkpoint": "output/ipzs-sg-bert_xxl-bs-16/checkpoint-114021", "epoch": 9.0, "global_step": 114021, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 3.505111768884679e-05, "loss": 0.0964, "step": 500 }, { "epoch": 0.08, "learning_rate": 3.491223537769359e-05, "loss": 0.0133, "step": 1000 }, { "epoch": 0.12, "learning_rate": 3.4773353066540374e-05, "loss": 0.0109, "step": 1500 }, { "epoch": 0.16, "learning_rate": 3.463447075538717e-05, "loss": 0.0102, "step": 2000 }, { "epoch": 0.2, "learning_rate": 3.4495588444233954e-05, "loss": 0.0099, "step": 2500 }, { "epoch": 0.24, "learning_rate": 3.435670613308075e-05, "loss": 0.0098, "step": 3000 }, { "epoch": 0.28, "learning_rate": 3.421782382192754e-05, "loss": 0.0096, "step": 3500 }, { "epoch": 0.32, "learning_rate": 3.407894151077433e-05, "loss": 0.0088, "step": 4000 }, { "epoch": 0.36, "learning_rate": 3.394005919962113e-05, "loss": 0.0074, "step": 4500 }, { "epoch": 0.39, "learning_rate": 3.3801176888467914e-05, "loss": 0.0064, "step": 5000 }, { "epoch": 0.43, "learning_rate": 3.36622945773147e-05, "loss": 0.0057, "step": 5500 }, { "epoch": 0.47, "learning_rate": 3.35234122661615e-05, "loss": 0.005, "step": 6000 }, { "epoch": 0.51, "learning_rate": 3.338452995500829e-05, "loss": 0.0048, "step": 6500 }, { "epoch": 0.55, "learning_rate": 3.324564764385508e-05, "loss": 0.0042, "step": 7000 }, { "epoch": 0.59, "learning_rate": 3.310676533270187e-05, "loss": 0.0039, "step": 7500 }, { "epoch": 0.63, "learning_rate": 3.296788302154867e-05, "loss": 0.0038, "step": 8000 }, { "epoch": 0.67, "learning_rate": 3.2829000710395454e-05, "loss": 0.0036, "step": 8500 }, { "epoch": 0.71, "learning_rate": 3.269011839924224e-05, "loss": 0.0036, "step": 9000 }, { "epoch": 0.75, "learning_rate": 3.255123608808904e-05, "loss": 0.0033, "step": 9500 }, { "epoch": 0.79, "learning_rate": 3.241235377693583e-05, "loss": 0.0033, "step": 10000 }, { "epoch": 0.83, "learning_rate": 3.227347146578262e-05, "loss": 0.0031, "step": 10500 }, { "epoch": 0.87, "learning_rate": 3.2134589154629414e-05, "loss": 0.003, "step": 11000 }, { "epoch": 0.91, "learning_rate": 3.19957068434762e-05, "loss": 0.0029, "step": 11500 }, { "epoch": 0.95, "learning_rate": 3.1856824532322994e-05, "loss": 0.0029, "step": 12000 }, { "epoch": 0.99, "learning_rate": 3.171794222116978e-05, "loss": 0.0028, "step": 12500 }, { "epoch": 1.0, "eval_accuracy": 0.6213869893484506, "eval_f1": 0.7871180162568165, "eval_f1_macro": 0.09915671829069983, "eval_f1_weighted": 0.7187367346051451, "eval_loss": 0.00270162639208138, "eval_p": 0.8552003089493689, "eval_r": 0.7290764165655865, "eval_roc_auc": 0.8644347845537721, "eval_runtime": 154.8297, "eval_samples_per_second": 508.133, "eval_steps_per_second": 31.764, "step": 12669 }, { "epoch": 1.03, "learning_rate": 3.157905991001658e-05, "loss": 0.0027, "step": 13000 }, { "epoch": 1.07, "learning_rate": 3.144017759886337e-05, "loss": 0.0026, "step": 13500 }, { "epoch": 1.11, "learning_rate": 3.1301295287710154e-05, "loss": 0.0026, "step": 14000 }, { "epoch": 1.14, "learning_rate": 3.1162412976556954e-05, "loss": 0.0026, "step": 14500 }, { "epoch": 1.18, "learning_rate": 3.102353066540374e-05, "loss": 0.0025, "step": 15000 }, { "epoch": 1.22, "learning_rate": 3.0884648354250534e-05, "loss": 0.0025, "step": 15500 }, { "epoch": 1.26, "learning_rate": 3.074576604309732e-05, "loss": 0.0025, "step": 16000 }, { "epoch": 1.3, "learning_rate": 3.0606883731944114e-05, "loss": 0.0025, "step": 16500 }, { "epoch": 1.34, "learning_rate": 3.0468001420790908e-05, "loss": 0.0024, "step": 17000 }, { "epoch": 1.38, "learning_rate": 3.0329119109637698e-05, "loss": 0.0023, "step": 17500 }, { "epoch": 1.42, "learning_rate": 3.019023679848449e-05, "loss": 0.0023, "step": 18000 }, { "epoch": 1.46, "learning_rate": 3.005135448733128e-05, "loss": 0.0022, "step": 18500 }, { "epoch": 1.5, "learning_rate": 2.991247217617807e-05, "loss": 0.0024, "step": 19000 }, { "epoch": 1.54, "learning_rate": 2.9773589865024864e-05, "loss": 0.0023, "step": 19500 }, { "epoch": 1.58, "learning_rate": 2.9634707553871654e-05, "loss": 0.0023, "step": 20000 }, { "epoch": 1.62, "learning_rate": 2.9495825242718448e-05, "loss": 0.0022, "step": 20500 }, { "epoch": 1.66, "learning_rate": 2.9356942931565238e-05, "loss": 0.0022, "step": 21000 }, { "epoch": 1.7, "learning_rate": 2.921806062041203e-05, "loss": 0.0022, "step": 21500 }, { "epoch": 1.74, "learning_rate": 2.907917830925882e-05, "loss": 0.0021, "step": 22000 }, { "epoch": 1.78, "learning_rate": 2.894029599810561e-05, "loss": 0.0021, "step": 22500 }, { "epoch": 1.82, "learning_rate": 2.8801413686952405e-05, "loss": 0.0021, "step": 23000 }, { "epoch": 1.85, "learning_rate": 2.8662531375799195e-05, "loss": 0.0021, "step": 23500 }, { "epoch": 1.89, "learning_rate": 2.8523649064645988e-05, "loss": 0.0021, "step": 24000 }, { "epoch": 1.93, "learning_rate": 2.8384766753492778e-05, "loss": 0.0021, "step": 24500 }, { "epoch": 1.97, "learning_rate": 2.8245884442339568e-05, "loss": 0.0021, "step": 25000 }, { "epoch": 2.0, "eval_accuracy": 0.6822711442153697, "eval_f1": 0.8278927653585252, "eval_f1_macro": 0.17721735370294905, "eval_f1_weighted": 0.7911965688289363, "eval_loss": 0.002050888491794467, "eval_p": 0.8518841031375748, "eval_r": 0.8052157338416219, "eval_roc_auc": 0.9024905715784995, "eval_runtime": 150.3062, "eval_samples_per_second": 523.425, "eval_steps_per_second": 32.72, "step": 25338 }, { "epoch": 2.01, "learning_rate": 2.810700213118636e-05, "loss": 0.002, "step": 25500 }, { "epoch": 2.05, "learning_rate": 2.796811982003315e-05, "loss": 0.002, "step": 26000 }, { "epoch": 2.09, "learning_rate": 2.7829237508879945e-05, "loss": 0.0019, "step": 26500 }, { "epoch": 2.13, "learning_rate": 2.7690355197726735e-05, "loss": 0.0019, "step": 27000 }, { "epoch": 2.17, "learning_rate": 2.7551472886573525e-05, "loss": 0.0018, "step": 27500 }, { "epoch": 2.21, "learning_rate": 2.7412590575420318e-05, "loss": 0.0019, "step": 28000 }, { "epoch": 2.25, "learning_rate": 2.7273708264267108e-05, "loss": 0.0019, "step": 28500 }, { "epoch": 2.29, "learning_rate": 2.71348259531139e-05, "loss": 0.0018, "step": 29000 }, { "epoch": 2.33, "learning_rate": 2.699594364196069e-05, "loss": 0.0019, "step": 29500 }, { "epoch": 2.37, "learning_rate": 2.685706133080748e-05, "loss": 0.0018, "step": 30000 }, { "epoch": 2.41, "learning_rate": 2.6718179019654275e-05, "loss": 0.0017, "step": 30500 }, { "epoch": 2.45, "learning_rate": 2.6579296708501065e-05, "loss": 0.0018, "step": 31000 }, { "epoch": 2.49, "learning_rate": 2.644041439734786e-05, "loss": 0.0018, "step": 31500 }, { "epoch": 2.53, "learning_rate": 2.630153208619465e-05, "loss": 0.0018, "step": 32000 }, { "epoch": 2.57, "learning_rate": 2.616264977504144e-05, "loss": 0.0018, "step": 32500 }, { "epoch": 2.6, "learning_rate": 2.6023767463888232e-05, "loss": 0.0018, "step": 33000 }, { "epoch": 2.64, "learning_rate": 2.5884885152735022e-05, "loss": 0.0017, "step": 33500 }, { "epoch": 2.68, "learning_rate": 2.5746002841581815e-05, "loss": 0.0017, "step": 34000 }, { "epoch": 2.72, "learning_rate": 2.5607120530428605e-05, "loss": 0.0017, "step": 34500 }, { "epoch": 2.76, "learning_rate": 2.5468238219275395e-05, "loss": 0.0017, "step": 35000 }, { "epoch": 2.8, "learning_rate": 2.532935590812219e-05, "loss": 0.0018, "step": 35500 }, { "epoch": 2.84, "learning_rate": 2.519047359696898e-05, "loss": 0.0018, "step": 36000 }, { "epoch": 2.88, "learning_rate": 2.5051591285815772e-05, "loss": 0.0017, "step": 36500 }, { "epoch": 2.92, "learning_rate": 2.4912708974662562e-05, "loss": 0.0018, "step": 37000 }, { "epoch": 2.96, "learning_rate": 2.4773826663509355e-05, "loss": 0.0018, "step": 37500 }, { "epoch": 3.0, "learning_rate": 2.4634944352356145e-05, "loss": 0.0018, "step": 38000 }, { "epoch": 3.0, "eval_accuracy": 0.6974095635152655, "eval_f1": 0.8369747009165567, "eval_f1_macro": 0.21887278841705338, "eval_f1_weighted": 0.8081134454474668, "eval_loss": 0.001851799781434238, "eval_p": 0.8528991177184794, "eval_r": 0.8216340322301161, "eval_roc_auc": 0.910698290784354, "eval_runtime": 150.2603, "eval_samples_per_second": 523.585, "eval_steps_per_second": 32.73, "step": 38007 }, { "epoch": 3.04, "learning_rate": 2.4496062041202935e-05, "loss": 0.0016, "step": 38500 }, { "epoch": 3.08, "learning_rate": 2.435717973004973e-05, "loss": 0.0015, "step": 39000 }, { "epoch": 3.12, "learning_rate": 2.421829741889652e-05, "loss": 0.0016, "step": 39500 }, { "epoch": 3.16, "learning_rate": 2.4079415107743312e-05, "loss": 0.0016, "step": 40000 }, { "epoch": 3.2, "learning_rate": 2.3940532796590102e-05, "loss": 0.0015, "step": 40500 }, { "epoch": 3.24, "learning_rate": 2.3801650485436892e-05, "loss": 0.0016, "step": 41000 }, { "epoch": 3.28, "learning_rate": 2.3662768174283686e-05, "loss": 0.0015, "step": 41500 }, { "epoch": 3.32, "learning_rate": 2.3523885863130476e-05, "loss": 0.0015, "step": 42000 }, { "epoch": 3.35, "learning_rate": 2.338500355197727e-05, "loss": 0.0015, "step": 42500 }, { "epoch": 3.39, "learning_rate": 2.324612124082406e-05, "loss": 0.0016, "step": 43000 }, { "epoch": 3.43, "learning_rate": 2.310723892967085e-05, "loss": 0.0016, "step": 43500 }, { "epoch": 3.47, "learning_rate": 2.2968356618517642e-05, "loss": 0.0016, "step": 44000 }, { "epoch": 3.51, "learning_rate": 2.2829474307364432e-05, "loss": 0.0015, "step": 44500 }, { "epoch": 3.55, "learning_rate": 2.2690591996211226e-05, "loss": 0.0015, "step": 45000 }, { "epoch": 3.59, "learning_rate": 2.2551709685058016e-05, "loss": 0.0016, "step": 45500 }, { "epoch": 3.63, "learning_rate": 2.2412827373904806e-05, "loss": 0.0015, "step": 46000 }, { "epoch": 3.67, "learning_rate": 2.22739450627516e-05, "loss": 0.0015, "step": 46500 }, { "epoch": 3.71, "learning_rate": 2.213506275159839e-05, "loss": 0.0015, "step": 47000 }, { "epoch": 3.75, "learning_rate": 2.1996180440445182e-05, "loss": 0.0015, "step": 47500 }, { "epoch": 3.79, "learning_rate": 2.1857298129291972e-05, "loss": 0.0015, "step": 48000 }, { "epoch": 3.83, "learning_rate": 2.1718415818138762e-05, "loss": 0.0015, "step": 48500 }, { "epoch": 3.87, "learning_rate": 2.1579533506985556e-05, "loss": 0.0015, "step": 49000 }, { "epoch": 3.91, "learning_rate": 2.1440651195832346e-05, "loss": 0.0015, "step": 49500 }, { "epoch": 3.95, "learning_rate": 2.130176888467914e-05, "loss": 0.0015, "step": 50000 }, { "epoch": 3.99, "learning_rate": 2.116288657352593e-05, "loss": 0.0015, "step": 50500 }, { "epoch": 4.0, "eval_accuracy": 0.710870173119455, "eval_f1": 0.8427627441351563, "eval_f1_macro": 0.276326750740236, "eval_f1_weighted": 0.8243063205078164, "eval_loss": 0.0018125491915270686, "eval_p": 0.8380480784058393, "eval_r": 0.8475307572344482, "eval_roc_auc": 0.923628157802133, "eval_runtime": 150.1179, "eval_samples_per_second": 524.082, "eval_steps_per_second": 32.761, "step": 50676 }, { "epoch": 4.03, "learning_rate": 2.1024004262372723e-05, "loss": 0.0014, "step": 51000 }, { "epoch": 4.07, "learning_rate": 2.0885121951219513e-05, "loss": 0.0014, "step": 51500 }, { "epoch": 4.1, "learning_rate": 2.0746239640066303e-05, "loss": 0.0013, "step": 52000 }, { "epoch": 4.14, "learning_rate": 2.0607357328913096e-05, "loss": 0.0013, "step": 52500 }, { "epoch": 4.18, "learning_rate": 2.0468475017759886e-05, "loss": 0.0013, "step": 53000 }, { "epoch": 4.22, "learning_rate": 2.032959270660668e-05, "loss": 0.0013, "step": 53500 }, { "epoch": 4.26, "learning_rate": 2.019071039545347e-05, "loss": 0.0013, "step": 54000 }, { "epoch": 4.3, "learning_rate": 2.005182808430026e-05, "loss": 0.0013, "step": 54500 }, { "epoch": 4.34, "learning_rate": 1.9912945773147053e-05, "loss": 0.0013, "step": 55000 }, { "epoch": 4.38, "learning_rate": 1.9774063461993843e-05, "loss": 0.0013, "step": 55500 }, { "epoch": 4.42, "learning_rate": 1.9635181150840636e-05, "loss": 0.0014, "step": 56000 }, { "epoch": 4.46, "learning_rate": 1.9496298839687426e-05, "loss": 0.0013, "step": 56500 }, { "epoch": 4.5, "learning_rate": 1.9357416528534216e-05, "loss": 0.0014, "step": 57000 }, { "epoch": 4.54, "learning_rate": 1.921853421738101e-05, "loss": 0.0014, "step": 57500 }, { "epoch": 4.58, "learning_rate": 1.90796519062278e-05, "loss": 0.0013, "step": 58000 }, { "epoch": 4.62, "learning_rate": 1.8940769595074593e-05, "loss": 0.0013, "step": 58500 }, { "epoch": 4.66, "learning_rate": 1.8801887283921383e-05, "loss": 0.0013, "step": 59000 }, { "epoch": 4.7, "learning_rate": 1.8663004972768173e-05, "loss": 0.0013, "step": 59500 }, { "epoch": 4.74, "learning_rate": 1.8524122661614966e-05, "loss": 0.0014, "step": 60000 }, { "epoch": 4.78, "learning_rate": 1.8385240350461756e-05, "loss": 0.0013, "step": 60500 }, { "epoch": 4.81, "learning_rate": 1.824635803930855e-05, "loss": 0.0014, "step": 61000 }, { "epoch": 4.85, "learning_rate": 1.810747572815534e-05, "loss": 0.0013, "step": 61500 }, { "epoch": 4.89, "learning_rate": 1.796859341700213e-05, "loss": 0.0014, "step": 62000 }, { "epoch": 4.93, "learning_rate": 1.7829711105848923e-05, "loss": 0.0013, "step": 62500 }, { "epoch": 4.97, "learning_rate": 1.7690828794695713e-05, "loss": 0.0013, "step": 63000 }, { "epoch": 5.0, "eval_accuracy": 0.71883976917406, "eval_f1": 0.8474025833437978, "eval_f1_macro": 0.28796797014277603, "eval_f1_weighted": 0.8288548463559504, "eval_loss": 0.0016964372480288148, "eval_p": 0.8470321413422668, "eval_r": 0.8477733495061515, "eval_roc_auc": 0.9237584040683807, "eval_runtime": 151.5541, "eval_samples_per_second": 519.115, "eval_steps_per_second": 32.45, "step": 63345 }, { "epoch": 5.01, "learning_rate": 1.7551946483542503e-05, "loss": 0.0013, "step": 63500 }, { "epoch": 5.05, "learning_rate": 1.7413064172389297e-05, "loss": 0.0012, "step": 64000 }, { "epoch": 5.09, "learning_rate": 1.7274181861236087e-05, "loss": 0.0012, "step": 64500 }, { "epoch": 5.13, "learning_rate": 1.713529955008288e-05, "loss": 0.0011, "step": 65000 }, { "epoch": 5.17, "learning_rate": 1.699641723892967e-05, "loss": 0.0012, "step": 65500 }, { "epoch": 5.21, "learning_rate": 1.685753492777646e-05, "loss": 0.0012, "step": 66000 }, { "epoch": 5.25, "learning_rate": 1.6718652616623253e-05, "loss": 0.0012, "step": 66500 }, { "epoch": 5.29, "learning_rate": 1.6579770305470043e-05, "loss": 0.0012, "step": 67000 }, { "epoch": 5.33, "learning_rate": 1.6440887994316837e-05, "loss": 0.0012, "step": 67500 }, { "epoch": 5.37, "learning_rate": 1.6302005683163627e-05, "loss": 0.0012, "step": 68000 }, { "epoch": 5.41, "learning_rate": 1.616312337201042e-05, "loss": 0.0012, "step": 68500 }, { "epoch": 5.45, "learning_rate": 1.602424106085721e-05, "loss": 0.0012, "step": 69000 }, { "epoch": 5.49, "learning_rate": 1.5885358749704e-05, "loss": 0.0012, "step": 69500 }, { "epoch": 5.53, "learning_rate": 1.5746476438550794e-05, "loss": 0.0011, "step": 70000 }, { "epoch": 5.56, "learning_rate": 1.5607594127397584e-05, "loss": 0.0012, "step": 70500 }, { "epoch": 5.6, "learning_rate": 1.5468711816244377e-05, "loss": 0.0012, "step": 71000 }, { "epoch": 5.64, "learning_rate": 1.5329829505091167e-05, "loss": 0.0012, "step": 71500 }, { "epoch": 5.68, "learning_rate": 1.5190947193937959e-05, "loss": 0.0012, "step": 72000 }, { "epoch": 5.72, "learning_rate": 1.505206488278475e-05, "loss": 0.0012, "step": 72500 }, { "epoch": 5.76, "learning_rate": 1.4913182571631542e-05, "loss": 0.0012, "step": 73000 }, { "epoch": 5.8, "learning_rate": 1.4774300260478334e-05, "loss": 0.0012, "step": 73500 }, { "epoch": 5.84, "learning_rate": 1.4635417949325124e-05, "loss": 0.0012, "step": 74000 }, { "epoch": 5.88, "learning_rate": 1.4496535638171915e-05, "loss": 0.0012, "step": 74500 }, { "epoch": 5.92, "learning_rate": 1.4357653327018707e-05, "loss": 0.0012, "step": 75000 }, { "epoch": 5.96, "learning_rate": 1.4218771015865499e-05, "loss": 0.0011, "step": 75500 }, { "epoch": 6.0, "learning_rate": 1.407988870471229e-05, "loss": 0.0012, "step": 76000 }, { "epoch": 6.0, "eval_accuracy": 0.7229326079772225, "eval_f1": 0.8492761531099651, "eval_f1_macro": 0.3068715201994391, "eval_f1_weighted": 0.8317541964664328, "eval_loss": 0.0016757699195295572, "eval_p": 0.8489857055039438, "eval_r": 0.8495667995148155, "eval_roc_auc": 0.9246567913434838, "eval_runtime": 151.532, "eval_samples_per_second": 519.191, "eval_steps_per_second": 32.455, "step": 76014 }, { "epoch": 6.04, "learning_rate": 1.3941006393559082e-05, "loss": 0.001, "step": 76500 }, { "epoch": 6.08, "learning_rate": 1.3802124082405872e-05, "loss": 0.001, "step": 77000 }, { "epoch": 6.12, "learning_rate": 1.3663241771252664e-05, "loss": 0.001, "step": 77500 }, { "epoch": 6.16, "learning_rate": 1.3524359460099456e-05, "loss": 0.001, "step": 78000 }, { "epoch": 6.2, "learning_rate": 1.3385477148946247e-05, "loss": 0.0011, "step": 78500 }, { "epoch": 6.24, "learning_rate": 1.3246594837793039e-05, "loss": 0.0011, "step": 79000 }, { "epoch": 6.28, "learning_rate": 1.3107712526639829e-05, "loss": 0.0011, "step": 79500 }, { "epoch": 6.31, "learning_rate": 1.296883021548662e-05, "loss": 0.001, "step": 80000 }, { "epoch": 6.35, "learning_rate": 1.2829947904333412e-05, "loss": 0.0011, "step": 80500 }, { "epoch": 6.39, "learning_rate": 1.2691065593180204e-05, "loss": 0.001, "step": 81000 }, { "epoch": 6.43, "learning_rate": 1.2552183282026996e-05, "loss": 0.001, "step": 81500 }, { "epoch": 6.47, "learning_rate": 1.2413300970873786e-05, "loss": 0.0011, "step": 82000 }, { "epoch": 6.51, "learning_rate": 1.2274418659720577e-05, "loss": 0.001, "step": 82500 }, { "epoch": 6.55, "learning_rate": 1.213553634856737e-05, "loss": 0.0011, "step": 83000 }, { "epoch": 6.59, "learning_rate": 1.1996654037414161e-05, "loss": 0.001, "step": 83500 }, { "epoch": 6.63, "learning_rate": 1.1857771726260953e-05, "loss": 0.001, "step": 84000 }, { "epoch": 6.67, "learning_rate": 1.1718889415107744e-05, "loss": 0.001, "step": 84500 }, { "epoch": 6.71, "learning_rate": 1.1580007103954534e-05, "loss": 0.0011, "step": 85000 }, { "epoch": 6.75, "learning_rate": 1.1441124792801326e-05, "loss": 0.0011, "step": 85500 }, { "epoch": 6.79, "learning_rate": 1.1302242481648118e-05, "loss": 0.001, "step": 86000 }, { "epoch": 6.83, "learning_rate": 1.116336017049491e-05, "loss": 0.001, "step": 86500 }, { "epoch": 6.87, "learning_rate": 1.1024477859341701e-05, "loss": 0.0011, "step": 87000 }, { "epoch": 6.91, "learning_rate": 1.0885595548188491e-05, "loss": 0.001, "step": 87500 }, { "epoch": 6.95, "learning_rate": 1.0746713237035283e-05, "loss": 0.0011, "step": 88000 }, { "epoch": 6.99, "learning_rate": 1.0607830925882074e-05, "loss": 0.001, "step": 88500 }, { "epoch": 7.0, "eval_accuracy": 0.7276609807560313, "eval_f1": 0.851625034501794, "eval_f1_macro": 0.33216946866780483, "eval_f1_weighted": 0.8360047793004518, "eval_loss": 0.0016766807530075312, "eval_p": 0.8478514752859547, "eval_r": 0.8554323340842142, "eval_roc_auc": 0.9275875551926686, "eval_runtime": 149.9874, "eval_samples_per_second": 524.538, "eval_steps_per_second": 32.789, "step": 88683 }, { "epoch": 7.03, "learning_rate": 1.0468948614728866e-05, "loss": 0.001, "step": 89000 }, { "epoch": 7.06, "learning_rate": 1.0330066303575658e-05, "loss": 0.0009, "step": 89500 }, { "epoch": 7.1, "learning_rate": 1.019118399242245e-05, "loss": 0.0009, "step": 90000 }, { "epoch": 7.14, "learning_rate": 1.005230168126924e-05, "loss": 0.0009, "step": 90500 }, { "epoch": 7.18, "learning_rate": 9.913419370116031e-06, "loss": 0.0009, "step": 91000 }, { "epoch": 7.22, "learning_rate": 9.774537058962823e-06, "loss": 0.0009, "step": 91500 }, { "epoch": 7.26, "learning_rate": 9.635654747809615e-06, "loss": 0.0009, "step": 92000 }, { "epoch": 7.3, "learning_rate": 9.496772436656406e-06, "loss": 0.0009, "step": 92500 }, { "epoch": 7.34, "learning_rate": 9.357890125503196e-06, "loss": 0.0009, "step": 93000 }, { "epoch": 7.38, "learning_rate": 9.219007814349988e-06, "loss": 0.0009, "step": 93500 }, { "epoch": 7.42, "learning_rate": 9.08012550319678e-06, "loss": 0.0009, "step": 94000 }, { "epoch": 7.46, "learning_rate": 8.941243192043571e-06, "loss": 0.0009, "step": 94500 }, { "epoch": 7.5, "learning_rate": 8.802360880890363e-06, "loss": 0.0009, "step": 95000 }, { "epoch": 7.54, "learning_rate": 8.663478569737153e-06, "loss": 0.0009, "step": 95500 }, { "epoch": 7.58, "learning_rate": 8.524596258583945e-06, "loss": 0.0009, "step": 96000 }, { "epoch": 7.62, "learning_rate": 8.385713947430736e-06, "loss": 0.0009, "step": 96500 }, { "epoch": 7.66, "learning_rate": 8.246831636277528e-06, "loss": 0.0009, "step": 97000 }, { "epoch": 7.7, "learning_rate": 8.107949325124318e-06, "loss": 0.0009, "step": 97500 }, { "epoch": 7.74, "learning_rate": 7.96906701397111e-06, "loss": 0.0009, "step": 98000 }, { "epoch": 7.77, "learning_rate": 7.830184702817902e-06, "loss": 0.0009, "step": 98500 }, { "epoch": 7.81, "learning_rate": 7.691302391664693e-06, "loss": 0.001, "step": 99000 }, { "epoch": 7.85, "learning_rate": 7.552420080511485e-06, "loss": 0.001, "step": 99500 }, { "epoch": 7.89, "learning_rate": 7.413537769358277e-06, "loss": 0.0009, "step": 100000 }, { "epoch": 7.93, "learning_rate": 7.2746554582050675e-06, "loss": 0.001, "step": 100500 }, { "epoch": 7.97, "learning_rate": 7.135773147051859e-06, "loss": 0.0009, "step": 101000 }, { "epoch": 8.0, "eval_accuracy": 0.7294913186058927, "eval_f1": 0.8529003679860734, "eval_f1_macro": 0.34351140192299934, "eval_f1_weighted": 0.8387988374835376, "eval_loss": 0.001688135787844658, "eval_p": 0.8483892536904006, "eval_r": 0.8574597123548778, "eval_roc_auc": 0.928601476610379, "eval_runtime": 152.9938, "eval_samples_per_second": 514.23, "eval_steps_per_second": 32.145, "step": 101352 }, { "epoch": 8.01, "learning_rate": 6.99689083589865e-06, "loss": 0.0009, "step": 101500 }, { "epoch": 8.05, "learning_rate": 6.858008524745442e-06, "loss": 0.0008, "step": 102000 }, { "epoch": 8.09, "learning_rate": 6.7191262135922334e-06, "loss": 0.0008, "step": 102500 }, { "epoch": 8.13, "learning_rate": 6.580243902439024e-06, "loss": 0.0008, "step": 103000 }, { "epoch": 8.17, "learning_rate": 6.441361591285816e-06, "loss": 0.0008, "step": 103500 }, { "epoch": 8.21, "learning_rate": 6.302479280132608e-06, "loss": 0.0009, "step": 104000 }, { "epoch": 8.25, "learning_rate": 6.1635969689793985e-06, "loss": 0.0008, "step": 104500 }, { "epoch": 8.29, "learning_rate": 6.02471465782619e-06, "loss": 0.0009, "step": 105000 }, { "epoch": 8.33, "learning_rate": 5.885832346672981e-06, "loss": 0.0008, "step": 105500 }, { "epoch": 8.37, "learning_rate": 5.746950035519773e-06, "loss": 0.0008, "step": 106000 }, { "epoch": 8.41, "learning_rate": 5.6080677243665645e-06, "loss": 0.0008, "step": 106500 }, { "epoch": 8.45, "learning_rate": 5.469185413213355e-06, "loss": 0.0008, "step": 107000 }, { "epoch": 8.49, "learning_rate": 5.330303102060147e-06, "loss": 0.0008, "step": 107500 }, { "epoch": 8.52, "learning_rate": 5.191420790906939e-06, "loss": 0.0009, "step": 108000 }, { "epoch": 8.56, "learning_rate": 5.0525384797537296e-06, "loss": 0.0008, "step": 108500 }, { "epoch": 8.6, "learning_rate": 4.913656168600521e-06, "loss": 0.0009, "step": 109000 }, { "epoch": 8.64, "learning_rate": 4.774773857447313e-06, "loss": 0.0008, "step": 109500 }, { "epoch": 8.68, "learning_rate": 4.635891546294104e-06, "loss": 0.0008, "step": 110000 }, { "epoch": 8.72, "learning_rate": 4.4970092351408955e-06, "loss": 0.0009, "step": 110500 }, { "epoch": 8.76, "learning_rate": 4.358126923987686e-06, "loss": 0.0008, "step": 111000 }, { "epoch": 8.8, "learning_rate": 4.219244612834478e-06, "loss": 0.0008, "step": 111500 }, { "epoch": 8.84, "learning_rate": 4.080362301681269e-06, "loss": 0.0008, "step": 112000 }, { "epoch": 8.88, "learning_rate": 3.941479990528061e-06, "loss": 0.0009, "step": 112500 }, { "epoch": 8.92, "learning_rate": 3.8025976793748523e-06, "loss": 0.0008, "step": 113000 }, { "epoch": 8.96, "learning_rate": 3.6637153682216436e-06, "loss": 0.0008, "step": 113500 }, { "epoch": 9.0, "learning_rate": 3.524833057068435e-06, "loss": 0.0008, "step": 114000 }, { "epoch": 9.0, "eval_accuracy": 0.7333426544983095, "eval_f1": 0.8538806684223458, "eval_f1_macro": 0.35458058741266574, "eval_f1_weighted": 0.8401736902228668, "eval_loss": 0.0016993152676150203, "eval_p": 0.8495998764892057, "eval_r": 0.8582048171893952, "eval_roc_auc": 0.9289751251101112, "eval_runtime": 150.295, "eval_samples_per_second": 523.464, "eval_steps_per_second": 32.722, "step": 114021 } ], "max_steps": 126690, "num_train_epochs": 10, "total_flos": 7.245026294480845e+16, "trial_name": null, "trial_params": null }