{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 81700, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.119951040391677e-09, "loss": 1.9236, "step": 1 }, { "epoch": 0.31, "learning_rate": 1.5299877600979192e-06, "loss": 1.7998, "step": 250 }, { "epoch": 0.61, "learning_rate": 3.0599755201958383e-06, "loss": 1.4687, "step": 500 }, { "epoch": 0.92, "learning_rate": 4.589963280293758e-06, "loss": 1.3699, "step": 750 }, { "epoch": 1.0, "eval_accuracy": 0.5730027548209367, "eval_f1": 0.10407805854390793, "eval_loss": 1.3314192295074463, "eval_precision": 0.08185753640299095, "eval_recall": 0.14285714285714285, "eval_runtime": 0.5773, "eval_samples_per_second": 628.824, "eval_steps_per_second": 79.686, "step": 817 }, { "epoch": 1.22, "learning_rate": 6.119951040391677e-06, "loss": 1.3287, "step": 1000 }, { "epoch": 1.53, "learning_rate": 7.649938800489596e-06, "loss": 1.3002, "step": 1250 }, { "epoch": 1.84, "learning_rate": 9.179926560587515e-06, "loss": 1.258, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.6060606060606061, "eval_f1": 0.183279210886012, "eval_loss": 1.1635808944702148, "eval_precision": 0.15974025974025974, "eval_recall": 0.21892762410003788, "eval_runtime": 0.5777, "eval_samples_per_second": 628.372, "eval_steps_per_second": 79.628, "step": 1634 }, { "epoch": 2.14, "learning_rate": 1.0709914320685436e-05, "loss": 1.1884, "step": 1750 }, { "epoch": 2.45, "learning_rate": 1.2239902080783353e-05, "loss": 1.1126, "step": 2000 }, { "epoch": 2.75, "learning_rate": 1.3769889840881272e-05, "loss": 1.1123, "step": 2250 }, { "epoch": 3.0, "eval_accuracy": 0.6143250688705234, "eval_f1": 0.342984622186972, "eval_loss": 1.053414225578308, "eval_precision": 0.31294195637816785, "eval_recall": 0.3835992619859615, "eval_runtime": 0.5781, "eval_samples_per_second": 627.939, "eval_steps_per_second": 79.574, "step": 2451 }, { "epoch": 3.06, "learning_rate": 1.5299877600979193e-05, "loss": 1.0996, "step": 2500 }, { "epoch": 3.37, "learning_rate": 1.6829865361077112e-05, "loss": 0.9999, "step": 2750 }, { "epoch": 3.67, "learning_rate": 1.835985312117503e-05, "loss": 1.0141, "step": 3000 }, { "epoch": 3.98, "learning_rate": 1.988984088127295e-05, "loss": 0.993, "step": 3250 }, { "epoch": 4.0, "eval_accuracy": 0.6528925619834711, "eval_f1": 0.377005484318265, "eval_loss": 1.0341299772262573, "eval_precision": 0.4038621706705789, "eval_recall": 0.4236673117522871, "eval_runtime": 0.5774, "eval_samples_per_second": 628.627, "eval_steps_per_second": 79.661, "step": 3268 }, { "epoch": 4.28, "learning_rate": 2.1419828641370872e-05, "loss": 0.8827, "step": 3500 }, { "epoch": 4.59, "learning_rate": 2.294981640146879e-05, "loss": 0.9059, "step": 3750 }, { "epoch": 4.9, "learning_rate": 2.4479804161566707e-05, "loss": 0.8739, "step": 4000 }, { "epoch": 5.0, "eval_accuracy": 0.6611570247933884, "eval_f1": 0.3730793016507302, "eval_loss": 0.9818496704101562, "eval_precision": 0.48520306297883803, "eval_recall": 0.39812740967297616, "eval_runtime": 0.5781, "eval_samples_per_second": 627.881, "eval_steps_per_second": 79.566, "step": 4085 }, { "epoch": 5.2, "learning_rate": 2.600979192166463e-05, "loss": 0.8024, "step": 4250 }, { "epoch": 5.51, "learning_rate": 2.7539779681762544e-05, "loss": 0.8021, "step": 4500 }, { "epoch": 5.81, "learning_rate": 2.9069767441860467e-05, "loss": 0.8587, "step": 4750 }, { "epoch": 6.0, "eval_accuracy": 0.6446280991735537, "eval_f1": 0.372084738159454, "eval_loss": 1.1154053211212158, "eval_precision": 0.4065005040497793, "eval_recall": 0.38243464936814686, "eval_runtime": 0.5773, "eval_samples_per_second": 628.792, "eval_steps_per_second": 79.682, "step": 4902 }, { "epoch": 6.12, "learning_rate": 3.0599755201958386e-05, "loss": 0.7567, "step": 5000 }, { "epoch": 6.43, "learning_rate": 3.2123623011015916e-05, "loss": 0.6544, "step": 5250 }, { "epoch": 6.73, "learning_rate": 3.365361077111383e-05, "loss": 0.7251, "step": 5500 }, { "epoch": 7.0, "eval_accuracy": 0.650137741046832, "eval_f1": 0.47516667580958355, "eval_loss": 1.0870000123977661, "eval_precision": 0.4639813382108336, "eval_recall": 0.4947985272800543, "eval_runtime": 0.5771, "eval_samples_per_second": 628.974, "eval_steps_per_second": 79.705, "step": 5719 }, { "epoch": 7.04, "learning_rate": 3.5183598531211754e-05, "loss": 0.6829, "step": 5750 }, { "epoch": 7.34, "learning_rate": 3.6707466340269284e-05, "loss": 0.5838, "step": 6000 }, { "epoch": 7.65, "learning_rate": 3.82374541003672e-05, "loss": 0.6713, "step": 6250 }, { "epoch": 7.96, "learning_rate": 3.9767441860465115e-05, "loss": 0.6269, "step": 6500 }, { "epoch": 8.0, "eval_accuracy": 0.5950413223140496, "eval_f1": 0.44966469663834185, "eval_loss": 1.2386085987091064, "eval_precision": 0.4427425690783476, "eval_recall": 0.5007950793049315, "eval_runtime": 0.5812, "eval_samples_per_second": 624.61, "eval_steps_per_second": 79.152, "step": 6536 }, { "epoch": 8.26, "learning_rate": 4.129742962056304e-05, "loss": 0.5744, "step": 6750 }, { "epoch": 8.57, "learning_rate": 4.282741738066095e-05, "loss": 0.5549, "step": 7000 }, { "epoch": 8.87, "learning_rate": 4.435740514075888e-05, "loss": 0.6049, "step": 7250 }, { "epoch": 9.0, "eval_accuracy": 0.6198347107438017, "eval_f1": 0.32356296980065924, "eval_loss": 1.368962287902832, "eval_precision": 0.47826317360800125, "eval_recall": 0.30602226522793025, "eval_runtime": 0.5769, "eval_samples_per_second": 629.173, "eval_steps_per_second": 79.73, "step": 7353 }, { "epoch": 9.18, "learning_rate": 4.58873929008568e-05, "loss": 0.5767, "step": 7500 }, { "epoch": 9.49, "learning_rate": 4.741738066095471e-05, "loss": 0.5394, "step": 7750 }, { "epoch": 9.79, "learning_rate": 4.894124847001224e-05, "loss": 0.5507, "step": 8000 }, { "epoch": 10.0, "eval_accuracy": 0.6446280991735537, "eval_f1": 0.4573642555682361, "eval_loss": 1.2706860303878784, "eval_precision": 0.4887882409493839, "eval_recall": 0.4411244714508261, "eval_runtime": 0.5758, "eval_samples_per_second": 630.429, "eval_steps_per_second": 79.889, "step": 8170 }, { "epoch": 10.1, "learning_rate": 4.99998647112497e-05, "loss": 0.609, "step": 8250 }, { "epoch": 10.4, "learning_rate": 4.999756011662697e-05, "loss": 0.5261, "step": 8500 }, { "epoch": 10.71, "learning_rate": 4.9992429864131085e-05, "loss": 0.5329, "step": 8750 }, { "epoch": 11.0, "eval_accuracy": 0.6611570247933884, "eval_f1": 0.44773949967842547, "eval_loss": 1.4297670125961304, "eval_precision": 0.4707209760487202, "eval_recall": 0.4446715025532759, "eval_runtime": 0.5769, "eval_samples_per_second": 629.193, "eval_steps_per_second": 79.732, "step": 8987 }, { "epoch": 11.02, "learning_rate": 4.998443333261708e-05, "loss": 0.5434, "step": 9000 }, { "epoch": 11.32, "learning_rate": 4.997358633418236e-05, "loss": 0.4591, "step": 9250 }, { "epoch": 11.63, "learning_rate": 4.9959890106357945e-05, "loss": 0.5308, "step": 9500 }, { "epoch": 11.93, "learning_rate": 4.9943346211742556e-05, "loss": 0.5027, "step": 9750 }, { "epoch": 12.0, "eval_accuracy": 0.6776859504132231, "eval_f1": 0.4263136100758106, "eval_loss": 1.5769587755203247, "eval_precision": 0.4877654234638867, "eval_recall": 0.42436502673571636, "eval_runtime": 0.5768, "eval_samples_per_second": 629.365, "eval_steps_per_second": 79.754, "step": 9804 }, { "epoch": 12.24, "learning_rate": 4.9923956537824456e-05, "loss": 0.4841, "step": 10000 }, { "epoch": 12.55, "learning_rate": 4.990172329676604e-05, "loss": 0.4372, "step": 10250 }, { "epoch": 12.85, "learning_rate": 4.987664902515149e-05, "loss": 0.4622, "step": 10500 }, { "epoch": 13.0, "eval_accuracy": 0.7052341597796143, "eval_f1": 0.4098788238632856, "eval_loss": 1.5736510753631592, "eval_precision": 0.4891951861291893, "eval_recall": 0.38998470741081576, "eval_runtime": 0.5769, "eval_samples_per_second": 629.25, "eval_steps_per_second": 79.74, "step": 10621 }, { "epoch": 13.16, "learning_rate": 4.984873658369735e-05, "loss": 0.4393, "step": 10750 }, { "epoch": 13.46, "learning_rate": 4.981798915692615e-05, "loss": 0.4071, "step": 11000 }, { "epoch": 13.77, "learning_rate": 4.978441025280311e-05, "loss": 0.4388, "step": 11250 }, { "epoch": 14.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.4961367511874214, "eval_loss": 1.7623515129089355, "eval_precision": 0.5284679001386023, "eval_recall": 0.4817265964741334, "eval_runtime": 0.5819, "eval_samples_per_second": 623.821, "eval_steps_per_second": 79.052, "step": 11438 }, { "epoch": 14.08, "learning_rate": 4.9748003702335874e-05, "loss": 0.361, "step": 11500 }, { "epoch": 14.38, "learning_rate": 4.970893619788296e-05, "loss": 0.3574, "step": 11750 }, { "epoch": 14.69, "learning_rate": 4.966689840442375e-05, "loss": 0.3927, "step": 12000 }, { "epoch": 14.99, "learning_rate": 4.9622046371513854e-05, "loss": 0.3973, "step": 12250 }, { "epoch": 15.0, "eval_accuracy": 0.650137741046832, "eval_f1": 0.45953933518622214, "eval_loss": 1.8296639919281006, "eval_precision": 0.547006976703725, "eval_recall": 0.4533570015097109, "eval_runtime": 0.5782, "eval_samples_per_second": 627.762, "eval_steps_per_second": 79.551, "step": 12255 }, { "epoch": 15.3, "learning_rate": 4.957438521630898e-05, "loss": 0.3236, "step": 12500 }, { "epoch": 15.61, "learning_rate": 4.952392037645683e-05, "loss": 0.3474, "step": 12750 }, { "epoch": 15.91, "learning_rate": 4.94706576094767e-05, "loss": 0.3578, "step": 13000 }, { "epoch": 16.0, "eval_accuracy": 0.7079889807162535, "eval_f1": 0.49300722979484873, "eval_loss": 1.4506721496582031, "eval_precision": 0.5450297500169623, "eval_recall": 0.4661049985865257, "eval_runtime": 0.5767, "eval_samples_per_second": 629.403, "eval_steps_per_second": 79.759, "step": 13072 }, { "epoch": 16.22, "learning_rate": 4.9414602992102564e-05, "loss": 0.3665, "step": 13250 }, { "epoch": 16.52, "learning_rate": 4.9355762919589846e-05, "loss": 0.3197, "step": 13500 }, { "epoch": 16.83, "learning_rate": 4.929414410498574e-05, "loss": 0.3632, "step": 13750 }, { "epoch": 17.0, "eval_accuracy": 0.6336088154269972, "eval_f1": 0.44717986010161176, "eval_loss": 1.922100305557251, "eval_precision": 0.45028944394750214, "eval_recall": 0.46335067846767364, "eval_runtime": 0.5779, "eval_samples_per_second": 628.173, "eval_steps_per_second": 79.603, "step": 13889 }, { "epoch": 17.14, "learning_rate": 4.922975357836337e-05, "loss": 0.3885, "step": 14000 }, { "epoch": 17.44, "learning_rate": 4.916259868601966e-05, "loss": 0.3569, "step": 14250 }, { "epoch": 17.75, "learning_rate": 4.909268708963725e-05, "loss": 0.3409, "step": 14500 }, { "epoch": 18.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.50463296007239, "eval_loss": 1.6839758157730103, "eval_precision": 0.532328893998914, "eval_recall": 0.4880845242606326, "eval_runtime": 0.5777, "eval_samples_per_second": 628.346, "eval_steps_per_second": 79.625, "step": 14706 }, { "epoch": 18.05, "learning_rate": 4.902002676541038e-05, "loss": 0.3504, "step": 14750 }, { "epoch": 18.36, "learning_rate": 4.894462600313483e-05, "loss": 0.3144, "step": 15000 }, { "epoch": 18.67, "learning_rate": 4.8866493405262234e-05, "loss": 0.3445, "step": 15250 }, { "epoch": 18.97, "learning_rate": 4.878563788591853e-05, "loss": 0.2723, "step": 15500 }, { "epoch": 19.0, "eval_accuracy": 0.696969696969697, "eval_f1": 0.5104610371804872, "eval_loss": 1.8228646516799927, "eval_precision": 0.5547209922405293, "eval_recall": 0.49795298092711887, "eval_runtime": 0.5785, "eval_samples_per_second": 627.463, "eval_steps_per_second": 79.513, "step": 15523 }, { "epoch": 19.28, "learning_rate": 4.8702068669887026e-05, "loss": 0.2524, "step": 15750 }, { "epoch": 19.58, "learning_rate": 4.8615795291555885e-05, "loss": 0.2412, "step": 16000 }, { "epoch": 19.89, "learning_rate": 4.852682759383042e-05, "loss": 0.3177, "step": 16250 }, { "epoch": 20.0, "eval_accuracy": 0.6914600550964187, "eval_f1": 0.4887299967298263, "eval_loss": 1.9973198175430298, "eval_precision": 0.6312488850962071, "eval_recall": 0.4400535390991056, "eval_runtime": 0.5767, "eval_samples_per_second": 629.418, "eval_steps_per_second": 79.761, "step": 16340 }, { "epoch": 20.2, "learning_rate": 4.84355476675853e-05, "loss": 0.2544, "step": 16500 }, { "epoch": 20.5, "learning_rate": 4.8341232761821675e-05, "loss": 0.2752, "step": 16750 }, { "epoch": 20.81, "learning_rate": 4.824425486142494e-05, "loss": 0.3368, "step": 17000 }, { "epoch": 21.0, "eval_accuracy": 0.6831955922865014, "eval_f1": 0.45711959899817944, "eval_loss": 1.6511964797973633, "eval_precision": 0.5667705375640166, "eval_recall": 0.4405123543670341, "eval_runtime": 0.5771, "eval_samples_per_second": 629.043, "eval_steps_per_second": 79.713, "step": 17157 }, { "epoch": 21.11, "learning_rate": 4.81446250305763e-05, "loss": 0.2763, "step": 17250 }, { "epoch": 21.42, "learning_rate": 4.804235463601491e-05, "loss": 0.2232, "step": 17500 }, { "epoch": 21.73, "learning_rate": 4.793745534574111e-05, "loss": 0.2307, "step": 17750 }, { "epoch": 22.0, "eval_accuracy": 0.696969696969697, "eval_f1": 0.4706521650995364, "eval_loss": 1.9451159238815308, "eval_precision": 0.6334310394975756, "eval_recall": 0.4397407703134304, "eval_runtime": 0.5762, "eval_samples_per_second": 630.015, "eval_steps_per_second": 79.837, "step": 17974 }, { "epoch": 22.03, "learning_rate": 4.782993912768523e-05, "loss": 0.2376, "step": 18000 }, { "epoch": 22.34, "learning_rate": 4.7719818248342104e-05, "loss": 0.2096, "step": 18250 }, { "epoch": 22.64, "learning_rate": 4.760756126979301e-05, "loss": 0.225, "step": 18500 }, { "epoch": 22.95, "learning_rate": 4.7492279345533425e-05, "loss": 0.259, "step": 18750 }, { "epoch": 23.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4849675763556479, "eval_loss": 2.131509304046631, "eval_precision": 0.6931105898377018, "eval_recall": 0.424335591884853, "eval_runtime": 0.5766, "eval_samples_per_second": 629.576, "eval_steps_per_second": 79.781, "step": 18791 }, { "epoch": 23.26, "learning_rate": 4.737443128349549e-05, "loss": 0.2214, "step": 19000 }, { "epoch": 23.56, "learning_rate": 4.7254030528931215e-05, "loss": 0.2338, "step": 19250 }, { "epoch": 23.87, "learning_rate": 4.7131090818328586e-05, "loss": 0.2387, "step": 19500 }, { "epoch": 24.0, "eval_accuracy": 0.7024793388429752, "eval_f1": 0.5396161727004175, "eval_loss": 1.6646941900253296, "eval_precision": 0.5457638718508284, "eval_recall": 0.5445803409781244, "eval_runtime": 0.577, "eval_samples_per_second": 629.121, "eval_steps_per_second": 79.723, "step": 19608 }, { "epoch": 24.17, "learning_rate": 4.700562617784434e-05, "loss": 0.2034, "step": 19750 }, { "epoch": 24.48, "learning_rate": 4.687765092170378e-05, "loss": 0.1752, "step": 20000 }, { "epoch": 24.79, "learning_rate": 4.6747179650567614e-05, "loss": 0.2089, "step": 20250 }, { "epoch": 25.0, "eval_accuracy": 0.6721763085399449, "eval_f1": 0.4391025799309439, "eval_loss": 2.24601674079895, "eval_precision": 0.4972889051140895, "eval_recall": 0.428331664230679, "eval_runtime": 0.5771, "eval_samples_per_second": 629.0, "eval_steps_per_second": 79.708, "step": 20425 }, { "epoch": 25.09, "learning_rate": 4.661422724986621e-05, "loss": 0.2451, "step": 20500 }, { "epoch": 25.4, "learning_rate": 4.647880888810129e-05, "loss": 0.1822, "step": 20750 }, { "epoch": 25.7, "learning_rate": 4.634094001511539e-05, "loss": 0.2117, "step": 21000 }, { "epoch": 26.0, "eval_accuracy": 0.6721763085399449, "eval_f1": 0.5210259674763756, "eval_loss": 2.145235300064087, "eval_precision": 0.5857309337602864, "eval_recall": 0.48917402725900266, "eval_runtime": 0.5764, "eval_samples_per_second": 629.763, "eval_steps_per_second": 79.805, "step": 21242 }, { "epoch": 26.01, "learning_rate": 4.620063636032916e-05, "loss": 0.2111, "step": 21250 }, { "epoch": 26.32, "learning_rate": 4.605791393094685e-05, "loss": 0.1778, "step": 21500 }, { "epoch": 26.62, "learning_rate": 4.591278901012997e-05, "loss": 0.2428, "step": 21750 }, { "epoch": 26.93, "learning_rate": 4.576587292912579e-05, "loss": 0.2081, "step": 22000 }, { "epoch": 27.0, "eval_accuracy": 0.6749311294765841, "eval_f1": 0.43598115798812087, "eval_loss": 2.0688838958740234, "eval_precision": 0.470814340268371, "eval_recall": 0.4225478100170218, "eval_runtime": 0.5781, "eval_samples_per_second": 627.962, "eval_steps_per_second": 79.576, "step": 22059 }, { "epoch": 27.23, "learning_rate": 4.561600241196927e-05, "loss": 0.1615, "step": 22250 }, { "epoch": 27.54, "learning_rate": 4.546377982093819e-05, "loss": 0.2104, "step": 22500 }, { "epoch": 27.85, "learning_rate": 4.530922252306483e-05, "loss": 0.1858, "step": 22750 }, { "epoch": 28.0, "eval_accuracy": 0.6584022038567493, "eval_f1": 0.44101738523719697, "eval_loss": 2.2427115440368652, "eval_precision": 0.5339727994900408, "eval_recall": 0.41865492881502736, "eval_runtime": 0.5762, "eval_samples_per_second": 630.011, "eval_steps_per_second": 79.836, "step": 22876 }, { "epoch": 28.15, "learning_rate": 4.5152348151747534e-05, "loss": 0.1984, "step": 23000 }, { "epoch": 28.46, "learning_rate": 4.499317460473887e-05, "loss": 0.2388, "step": 23250 }, { "epoch": 28.76, "learning_rate": 4.483172004210372e-05, "loss": 0.1924, "step": 23500 }, { "epoch": 29.0, "eval_accuracy": 0.6831955922865014, "eval_f1": 0.4384895896523804, "eval_loss": 2.0502805709838867, "eval_precision": 0.6932407169452727, "eval_recall": 0.39043344791497503, "eval_runtime": 0.5764, "eval_samples_per_second": 629.797, "eval_steps_per_second": 79.809, "step": 23693 }, { "epoch": 29.07, "learning_rate": 4.46680028841474e-05, "loss": 0.1952, "step": 23750 }, { "epoch": 29.38, "learning_rate": 4.450204180931408e-05, "loss": 0.1551, "step": 24000 }, { "epoch": 29.68, "learning_rate": 4.433453290310271e-05, "loss": 0.1758, "step": 24250 }, { "epoch": 29.99, "learning_rate": 4.4164835737020996e-05, "loss": 0.151, "step": 24500 }, { "epoch": 30.0, "eval_accuracy": 0.6804407713498623, "eval_f1": 0.4790753635178762, "eval_loss": 2.1431803703308105, "eval_precision": 0.5843463803065593, "eval_recall": 0.43816301719626843, "eval_runtime": 0.5767, "eval_samples_per_second": 629.492, "eval_steps_per_second": 79.77, "step": 24510 }, { "epoch": 30.29, "learning_rate": 4.399227494450688e-05, "loss": 0.1475, "step": 24750 }, { "epoch": 30.6, "learning_rate": 4.381754732872496e-05, "loss": 0.1696, "step": 25000 }, { "epoch": 30.91, "learning_rate": 4.3640672824299424e-05, "loss": 0.1978, "step": 25250 }, { "epoch": 31.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.42594599441215986, "eval_loss": 2.2966179847717285, "eval_precision": 0.5307515065579581, "eval_recall": 0.41329913928436096, "eval_runtime": 0.5775, "eval_samples_per_second": 628.614, "eval_steps_per_second": 79.659, "step": 25327 }, { "epoch": 31.21, "learning_rate": 4.346167161079236e-05, "loss": 0.167, "step": 25500 }, { "epoch": 31.52, "learning_rate": 4.328056411040151e-05, "loss": 0.1383, "step": 25750 }, { "epoch": 31.82, "learning_rate": 4.309737098563029e-05, "loss": 0.1922, "step": 26000 }, { "epoch": 32.0, "eval_accuracy": 0.6831955922865014, "eval_f1": 0.485788264860417, "eval_loss": 2.0524113178253174, "eval_precision": 0.5313209433300384, "eval_recall": 0.4710779846863592, "eval_runtime": 0.577, "eval_samples_per_second": 629.091, "eval_steps_per_second": 79.719, "step": 26144 }, { "epoch": 32.13, "learning_rate": 4.29121131369304e-05, "loss": 0.199, "step": 26250 }, { "epoch": 32.44, "learning_rate": 4.272481170031731e-05, "loss": 0.1964, "step": 26500 }, { "epoch": 32.74, "learning_rate": 4.253548804495887e-05, "loss": 0.1876, "step": 26750 }, { "epoch": 33.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.45277059579119083, "eval_loss": 2.3500843048095703, "eval_precision": 0.6191480424964332, "eval_recall": 0.4147965198457809, "eval_runtime": 0.5777, "eval_samples_per_second": 628.4, "eval_steps_per_second": 79.632, "step": 26961 }, { "epoch": 33.05, "learning_rate": 4.234416377073727e-05, "loss": 0.1475, "step": 27000 }, { "epoch": 33.35, "learning_rate": 4.215086070578477e-05, "loss": 0.143, "step": 27250 }, { "epoch": 33.66, "learning_rate": 4.195560090399327e-05, "loss": 0.2021, "step": 27500 }, { "epoch": 33.97, "learning_rate": 4.175840664249825e-05, "loss": 0.1649, "step": 27750 }, { "epoch": 34.0, "eval_accuracy": 0.6584022038567493, "eval_f1": 0.4327576346908582, "eval_loss": 2.3630762100219727, "eval_precision": 0.46295462482075056, "eval_recall": 0.4282663662281889, "eval_runtime": 0.5765, "eval_samples_per_second": 629.661, "eval_steps_per_second": 79.792, "step": 27778 }, { "epoch": 34.27, "learning_rate": 4.1559300419137124e-05, "loss": 0.1333, "step": 28000 }, { "epoch": 34.58, "learning_rate": 4.135830494988252e-05, "loss": 0.1386, "step": 28250 }, { "epoch": 34.88, "learning_rate": 4.1155443166250585e-05, "loss": 0.2109, "step": 28500 }, { "epoch": 35.0, "eval_accuracy": 0.6749311294765841, "eval_f1": 0.4902477696595343, "eval_loss": 2.1397335529327393, "eval_precision": 0.5198444762520393, "eval_recall": 0.49638105463228616, "eval_runtime": 0.578, "eval_samples_per_second": 628.013, "eval_steps_per_second": 79.583, "step": 28595 }, { "epoch": 35.19, "learning_rate": 4.095073821268473e-05, "loss": 0.1839, "step": 28750 }, { "epoch": 35.5, "learning_rate": 4.0744213443915125e-05, "loss": 0.1558, "step": 29000 }, { "epoch": 35.8, "learning_rate": 4.0536729253084094e-05, "loss": 0.127, "step": 29250 }, { "epoch": 36.0, "eval_accuracy": 0.6694214876033058, "eval_f1": 0.4276520300779697, "eval_loss": 2.290356159210205, "eval_precision": 0.5064721567763604, "eval_recall": 0.40090887150616217, "eval_runtime": 0.5778, "eval_samples_per_second": 628.209, "eval_steps_per_second": 79.608, "step": 29412 }, { "epoch": 36.11, "learning_rate": 4.032748663336774e-05, "loss": 0.2138, "step": 29500 }, { "epoch": 36.41, "learning_rate": 4.011565850261534e-05, "loss": 0.1587, "step": 29750 }, { "epoch": 36.72, "learning_rate": 3.990210583066674e-05, "loss": 0.1653, "step": 30000 }, { "epoch": 37.0, "eval_accuracy": 0.6831955922865014, "eval_f1": 0.5118761031802896, "eval_loss": 2.097479820251465, "eval_precision": 0.558650914911419, "eval_recall": 0.4861388167114768, "eval_runtime": 0.5771, "eval_samples_per_second": 629.03, "eval_steps_per_second": 79.712, "step": 30229 }, { "epoch": 37.03, "learning_rate": 3.968685298168575e-05, "loss": 0.1729, "step": 30250 }, { "epoch": 37.33, "learning_rate": 3.946992451380895e-05, "loss": 0.1092, "step": 30500 }, { "epoch": 37.64, "learning_rate": 3.9251345176343774e-05, "loss": 0.1322, "step": 30750 }, { "epoch": 37.94, "learning_rate": 3.9031139906944916e-05, "loss": 0.1626, "step": 31000 }, { "epoch": 38.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.4688775335224489, "eval_loss": 2.4606480598449707, "eval_precision": 0.4988188909516061, "eval_recall": 0.45583700987026105, "eval_runtime": 0.5758, "eval_samples_per_second": 630.398, "eval_steps_per_second": 79.885, "step": 31046 }, { "epoch": 38.25, "learning_rate": 3.880933382876915e-05, "loss": 0.1369, "step": 31250 }, { "epoch": 38.56, "learning_rate": 3.8585952247609145e-05, "loss": 0.1002, "step": 31500 }, { "epoch": 38.86, "learning_rate": 3.836102064900617e-05, "loss": 0.1454, "step": 31750 }, { "epoch": 39.0, "eval_accuracy": 0.6914600550964187, "eval_f1": 0.5079144154287897, "eval_loss": 2.172344446182251, "eval_precision": 0.5805565750272889, "eval_recall": 0.4738757992144692, "eval_runtime": 0.5781, "eval_samples_per_second": 627.915, "eval_steps_per_second": 79.571, "step": 31863 }, { "epoch": 39.17, "learning_rate": 3.8134564695342595e-05, "loss": 0.158, "step": 32000 }, { "epoch": 39.47, "learning_rate": 3.790661022291403e-05, "loss": 0.1078, "step": 32250 }, { "epoch": 39.78, "learning_rate": 3.767718323898163e-05, "loss": 0.1206, "step": 32500 }, { "epoch": 40.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.48536884689746623, "eval_loss": 2.1714024543762207, "eval_precision": 0.5719430108062781, "eval_recall": 0.4474171388272374, "eval_runtime": 0.5772, "eval_samples_per_second": 628.944, "eval_steps_per_second": 79.701, "step": 32680 }, { "epoch": 40.09, "learning_rate": 3.744630991880502e-05, "loss": 0.1158, "step": 32750 }, { "epoch": 40.39, "learning_rate": 3.7214016602655875e-05, "loss": 0.115, "step": 33000 }, { "epoch": 40.7, "learning_rate": 3.698032979281283e-05, "loss": 0.1092, "step": 33250 }, { "epoch": 41.0, "eval_accuracy": 0.6914600550964187, "eval_f1": 0.4980899463610581, "eval_loss": 2.2357773780822754, "eval_precision": 0.5844052283360602, "eval_recall": 0.46764869298736295, "eval_runtime": 0.5777, "eval_samples_per_second": 628.373, "eval_steps_per_second": 79.629, "step": 33497 }, { "epoch": 41.0, "learning_rate": 3.6745276150537816e-05, "loss": 0.1262, "step": 33500 }, { "epoch": 41.31, "learning_rate": 3.650983070128024e-05, "loss": 0.1034, "step": 33750 }, { "epoch": 41.62, "learning_rate": 3.6272129196890194e-05, "loss": 0.0885, "step": 34000 }, { "epoch": 41.92, "learning_rate": 3.603410013431832e-05, "loss": 0.1156, "step": 34250 }, { "epoch": 42.0, "eval_accuracy": 0.6584022038567493, "eval_f1": 0.4280488256791789, "eval_loss": 2.558166265487671, "eval_precision": 0.45057717568928324, "eval_recall": 0.416898799749785, "eval_runtime": 0.5768, "eval_samples_per_second": 629.389, "eval_steps_per_second": 79.757, "step": 34314 }, { "epoch": 42.23, "learning_rate": 3.579385880846232e-05, "loss": 0.1169, "step": 34500 }, { "epoch": 42.53, "learning_rate": 3.5552386014312834e-05, "loss": 0.099, "step": 34750 }, { "epoch": 42.84, "learning_rate": 3.530970930143252e-05, "loss": 0.093, "step": 35000 }, { "epoch": 43.0, "eval_accuracy": 0.628099173553719, "eval_f1": 0.4313278010079715, "eval_loss": 2.627509355545044, "eval_precision": 0.5279577964214083, "eval_recall": 0.40927325616365023, "eval_runtime": 0.576, "eval_samples_per_second": 630.259, "eval_steps_per_second": 79.868, "step": 35131 }, { "epoch": 43.15, "learning_rate": 3.5065856356738794e-05, "loss": 0.1584, "step": 35250 }, { "epoch": 43.45, "learning_rate": 3.482085500134504e-05, "loss": 0.0967, "step": 35500 }, { "epoch": 43.76, "learning_rate": 3.4574733187386486e-05, "loss": 0.1484, "step": 35750 }, { "epoch": 44.0, "eval_accuracy": 0.6694214876033058, "eval_f1": 0.43577912693695875, "eval_loss": 2.5017268657684326, "eval_precision": 0.5655394346579834, "eval_recall": 0.3971485787064605, "eval_runtime": 0.5768, "eval_samples_per_second": 629.305, "eval_steps_per_second": 79.747, "step": 35948 }, { "epoch": 44.06, "learning_rate": 3.432751899483116e-05, "loss": 0.1379, "step": 36000 }, { "epoch": 44.37, "learning_rate": 3.4079240628276246e-05, "loss": 0.0877, "step": 36250 }, { "epoch": 44.68, "learning_rate": 3.382992641373025e-05, "loss": 0.1466, "step": 36500 }, { "epoch": 44.98, "learning_rate": 3.357960479538127e-05, "loss": 0.1411, "step": 36750 }, { "epoch": 45.0, "eval_accuracy": 0.6914600550964187, "eval_f1": 0.5234198940288918, "eval_loss": 2.3302204608917236, "eval_precision": 0.5942265966479041, "eval_recall": 0.5061625600726586, "eval_runtime": 0.5778, "eval_samples_per_second": 628.231, "eval_steps_per_second": 79.611, "step": 36765 }, { "epoch": 45.29, "learning_rate": 3.332830433235184e-05, "loss": 0.0849, "step": 37000 }, { "epoch": 45.59, "learning_rate": 3.307605369544058e-05, "loss": 0.126, "step": 37250 }, { "epoch": 45.9, "learning_rate": 3.28228816638512e-05, "loss": 0.1003, "step": 37500 }, { "epoch": 46.0, "eval_accuracy": 0.696969696969697, "eval_f1": 0.5745871087976351, "eval_loss": 2.3890013694763184, "eval_precision": 0.6828113242342491, "eval_recall": 0.5128949833691213, "eval_runtime": 0.577, "eval_samples_per_second": 629.101, "eval_steps_per_second": 79.721, "step": 37582 }, { "epoch": 46.21, "learning_rate": 3.256881712190906e-05, "loss": 0.11, "step": 37750 }, { "epoch": 46.51, "learning_rate": 3.231388905576575e-05, "loss": 0.1225, "step": 38000 }, { "epoch": 46.82, "learning_rate": 3.205915122367602e-05, "loss": 0.1245, "step": 38250 }, { "epoch": 47.0, "eval_accuracy": 0.6804407713498623, "eval_f1": 0.5184040661055641, "eval_loss": 2.339553117752075, "eval_precision": 0.6566468598124048, "eval_recall": 0.48311657554268395, "eval_runtime": 0.5772, "eval_samples_per_second": 628.951, "eval_steps_per_second": 79.702, "step": 38399 }, { "epoch": 47.12, "learning_rate": 3.180258662113338e-05, "loss": 0.1008, "step": 38500 }, { "epoch": 47.43, "learning_rate": 3.154524591341034e-05, "loss": 0.121, "step": 38750 }, { "epoch": 47.74, "learning_rate": 3.128715846043534e-05, "loss": 0.0998, "step": 39000 }, { "epoch": 48.0, "eval_accuracy": 0.696969696969697, "eval_f1": 0.5459744525861538, "eval_loss": 2.4267241954803467, "eval_precision": 0.6807294155120241, "eval_recall": 0.4855936141034663, "eval_runtime": 0.5758, "eval_samples_per_second": 630.432, "eval_steps_per_second": 79.889, "step": 39216 }, { "epoch": 48.04, "learning_rate": 3.102835370733277e-05, "loss": 0.0962, "step": 39250 }, { "epoch": 48.35, "learning_rate": 3.07688611810636e-05, "loss": 0.0778, "step": 39500 }, { "epoch": 48.65, "learning_rate": 3.0508710487056635e-05, "loss": 0.1145, "step": 39750 }, { "epoch": 48.96, "learning_rate": 3.0247931305830845e-05, "loss": 0.1048, "step": 40000 }, { "epoch": 49.0, "eval_accuracy": 0.6749311294765841, "eval_f1": 0.536908729765805, "eval_loss": 2.3678715229034424, "eval_precision": 0.6530412239759585, "eval_recall": 0.48379632135789763, "eval_runtime": 0.578, "eval_samples_per_second": 628.037, "eval_steps_per_second": 79.586, "step": 40033 }, { "epoch": 49.27, "learning_rate": 2.998655338960914e-05, "loss": 0.0751, "step": 40250 }, { "epoch": 49.57, "learning_rate": 2.9724606558923933e-05, "loss": 0.107, "step": 40500 }, { "epoch": 49.88, "learning_rate": 2.9462120699214922e-05, "loss": 0.0605, "step": 40750 }, { "epoch": 50.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.5723258179466014, "eval_loss": 2.3588879108428955, "eval_precision": 0.6557361185869716, "eval_recall": 0.5364202334337802, "eval_runtime": 0.5756, "eval_samples_per_second": 630.622, "eval_steps_per_second": 79.913, "step": 40850 }, { "epoch": 50.18, "learning_rate": 2.9199125757419482e-05, "loss": 0.0718, "step": 41000 }, { "epoch": 50.49, "learning_rate": 2.8935651738556013e-05, "loss": 0.0661, "step": 41250 }, { "epoch": 50.8, "learning_rate": 2.8671728702300687e-05, "loss": 0.101, "step": 41500 }, { "epoch": 51.0, "eval_accuracy": 0.7024793388429752, "eval_f1": 0.5646342747625367, "eval_loss": 2.261863946914673, "eval_precision": 0.6370711351914359, "eval_recall": 0.5380352030290453, "eval_runtime": 0.5759, "eval_samples_per_second": 630.369, "eval_steps_per_second": 79.881, "step": 41667 }, { "epoch": 51.1, "learning_rate": 2.840738675955793e-05, "loss": 0.1039, "step": 41750 }, { "epoch": 51.41, "learning_rate": 2.814371572615747e-05, "loss": 0.0769, "step": 42000 }, { "epoch": 51.71, "learning_rate": 2.787862786484319e-05, "loss": 0.0858, "step": 42250 }, { "epoch": 52.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.5195773061626721, "eval_loss": 2.5068295001983643, "eval_precision": 0.6063341583445594, "eval_recall": 0.4876037550298634, "eval_runtime": 0.5762, "eval_samples_per_second": 629.983, "eval_steps_per_second": 79.833, "step": 42484 }, { "epoch": 52.02, "learning_rate": 2.761321158169134e-05, "loss": 0.0844, "step": 42500 }, { "epoch": 52.33, "learning_rate": 2.734749715797047e-05, "loss": 0.069, "step": 42750 }, { "epoch": 52.63, "learning_rate": 2.7081514908963913e-05, "loss": 0.0784, "step": 43000 }, { "epoch": 52.94, "learning_rate": 2.681529518051109e-05, "loss": 0.0795, "step": 43250 }, { "epoch": 53.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4958589687437866, "eval_loss": 2.4998273849487305, "eval_precision": 0.6786654073384627, "eval_recall": 0.45129423122033957, "eval_runtime": 0.5744, "eval_samples_per_second": 631.991, "eval_steps_per_second": 80.087, "step": 43301 }, { "epoch": 53.24, "learning_rate": 2.6548868345545402e-05, "loss": 0.072, "step": 43500 }, { "epoch": 53.55, "learning_rate": 2.628226480062897e-05, "loss": 0.0401, "step": 43750 }, { "epoch": 53.86, "learning_rate": 2.6015514962484717e-05, "loss": 0.0674, "step": 44000 }, { "epoch": 54.0, "eval_accuracy": 0.6639118457300276, "eval_f1": 0.5109508473836721, "eval_loss": 2.704511880874634, "eval_precision": 0.6101414588534028, "eval_recall": 0.47100482987182496, "eval_runtime": 0.5772, "eval_samples_per_second": 628.872, "eval_steps_per_second": 79.692, "step": 44118 }, { "epoch": 54.16, "learning_rate": 2.574971691776212e-05, "loss": 0.0601, "step": 44250 }, { "epoch": 54.47, "learning_rate": 2.5482766087609973e-05, "loss": 0.0502, "step": 44500 }, { "epoch": 54.77, "learning_rate": 2.5215760178811658e-05, "loss": 0.1174, "step": 44750 }, { "epoch": 55.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.516165985819526, "eval_loss": 2.3755171298980713, "eval_precision": 0.5969723691945914, "eval_recall": 0.4715294694358734, "eval_runtime": 0.5774, "eval_samples_per_second": 628.713, "eval_steps_per_second": 79.672, "step": 44935 }, { "epoch": 55.08, "learning_rate": 2.4948729653995663e-05, "loss": 0.0826, "step": 45000 }, { "epoch": 55.39, "learning_rate": 2.4681704978598928e-05, "loss": 0.0422, "step": 45250 }, { "epoch": 55.69, "learning_rate": 2.4414716617390998e-05, "loss": 0.059, "step": 45500 }, { "epoch": 56.0, "learning_rate": 2.414779503099838e-05, "loss": 0.0886, "step": 45750 }, { "epoch": 56.0, "eval_accuracy": 0.6804407713498623, "eval_f1": 0.47301725201698097, "eval_loss": 2.507392406463623, "eval_precision": 0.4981856952904839, "eval_recall": 0.4597725584486668, "eval_runtime": 0.5761, "eval_samples_per_second": 630.088, "eval_steps_per_second": 79.846, "step": 45752 }, { "epoch": 56.3, "learning_rate": 2.388097067242925e-05, "loss": 0.0528, "step": 46000 }, { "epoch": 56.61, "learning_rate": 2.3614273983599068e-05, "loss": 0.0643, "step": 46250 }, { "epoch": 56.92, "learning_rate": 2.334773539185752e-05, "loss": 0.058, "step": 46500 }, { "epoch": 57.0, "eval_accuracy": 0.6804407713498623, "eval_f1": 0.4807142977366666, "eval_loss": 2.5247209072113037, "eval_precision": 0.5200869236583522, "eval_recall": 0.45992668729737696, "eval_runtime": 0.5758, "eval_samples_per_second": 630.387, "eval_steps_per_second": 79.884, "step": 46569 }, { "epoch": 57.22, "learning_rate": 2.308138530651701e-05, "loss": 0.0823, "step": 46750 }, { "epoch": 57.53, "learning_rate": 2.2815254115383325e-05, "loss": 0.0428, "step": 47000 }, { "epoch": 57.83, "learning_rate": 2.254937218128868e-05, "loss": 0.0558, "step": 47250 }, { "epoch": 58.0, "eval_accuracy": 0.6914600550964187, "eval_f1": 0.4594613590535599, "eval_loss": 2.47005033493042, "eval_precision": 0.6139090044619999, "eval_recall": 0.4228677965439049, "eval_runtime": 0.5759, "eval_samples_per_second": 630.356, "eval_steps_per_second": 79.88, "step": 47386 }, { "epoch": 58.14, "learning_rate": 2.2283769838627582e-05, "loss": 0.0931, "step": 47500 }, { "epoch": 58.45, "learning_rate": 2.2018477389896086e-05, "loss": 0.0508, "step": 47750 }, { "epoch": 58.75, "learning_rate": 2.1753525102234484e-05, "loss": 0.0452, "step": 48000 }, { "epoch": 59.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.46439631667557835, "eval_loss": 2.541719675064087, "eval_precision": 0.5814331265699046, "eval_recall": 0.43331322139819684, "eval_runtime": 0.5754, "eval_samples_per_second": 630.853, "eval_steps_per_second": 79.943, "step": 48203 }, { "epoch": 59.06, "learning_rate": 2.1488943203974225e-05, "loss": 0.0487, "step": 48250 }, { "epoch": 59.36, "learning_rate": 2.122581776857167e-05, "loss": 0.0777, "step": 48500 }, { "epoch": 59.67, "learning_rate": 2.0963119490709552e-05, "loss": 0.0656, "step": 48750 }, { "epoch": 59.98, "learning_rate": 2.0699825885128935e-05, "loss": 0.0589, "step": 49000 }, { "epoch": 60.0, "eval_accuracy": 0.6721763085399449, "eval_f1": 0.4951052024505615, "eval_loss": 2.631096363067627, "eval_precision": 0.5573207158969782, "eval_recall": 0.4686150056839712, "eval_runtime": 0.5753, "eval_samples_per_second": 630.965, "eval_steps_per_second": 79.957, "step": 49020 }, { "epoch": 60.28, "learning_rate": 2.0437022885194815e-05, "loss": 0.0345, "step": 49250 }, { "epoch": 60.59, "learning_rate": 2.017474047402702e-05, "loss": 0.0656, "step": 49500 }, { "epoch": 60.89, "learning_rate": 1.991300857535151e-05, "loss": 0.0993, "step": 49750 }, { "epoch": 61.0, "eval_accuracy": 0.6666666666666666, "eval_f1": 0.4534585450214669, "eval_loss": 2.720975160598755, "eval_precision": 0.5187203506875638, "eval_recall": 0.423817756545589, "eval_runtime": 0.5751, "eval_samples_per_second": 631.214, "eval_steps_per_second": 79.989, "step": 49837 }, { "epoch": 61.2, "learning_rate": 1.9651857050086467e-05, "loss": 0.0831, "step": 50000 }, { "epoch": 61.51, "learning_rate": 1.9391315692935428e-05, "loss": 0.0378, "step": 50250 }, { "epoch": 61.81, "learning_rate": 1.9131414228988008e-05, "loss": 0.0795, "step": 50500 }, { "epoch": 62.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.5159346012569708, "eval_loss": 2.37697696685791, "eval_precision": 0.6010404846231928, "eval_recall": 0.4778067239274136, "eval_runtime": 0.5752, "eval_samples_per_second": 631.136, "eval_steps_per_second": 79.979, "step": 50654 }, { "epoch": 62.12, "learning_rate": 1.8872182310328605e-05, "loss": 0.0624, "step": 50750 }, { "epoch": 62.42, "learning_rate": 1.861364951265337e-05, "loss": 0.0685, "step": 51000 }, { "epoch": 62.73, "learning_rate": 1.835584533189595e-05, "loss": 0.0438, "step": 51250 }, { "epoch": 63.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.5006627978013612, "eval_loss": 2.5155351161956787, "eval_precision": 0.5974857177798354, "eval_recall": 0.4612218207353675, "eval_runtime": 0.5756, "eval_samples_per_second": 630.635, "eval_steps_per_second": 79.915, "step": 51471 }, { "epoch": 63.04, "learning_rate": 1.8098799180862294e-05, "loss": 0.0512, "step": 51500 }, { "epoch": 63.34, "learning_rate": 1.784254038587494e-05, "loss": 0.0443, "step": 51750 }, { "epoch": 63.65, "learning_rate": 1.758709818342722e-05, "loss": 0.0331, "step": 52000 }, { "epoch": 63.95, "learning_rate": 1.7332501716847632e-05, "loss": 0.0432, "step": 52250 }, { "epoch": 64.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4691190044905215, "eval_loss": 2.5460140705108643, "eval_precision": 0.533927553927554, "eval_recall": 0.44402096603451285, "eval_runtime": 0.5748, "eval_samples_per_second": 631.506, "eval_steps_per_second": 80.026, "step": 52288 }, { "epoch": 64.26, "learning_rate": 1.7078780032974923e-05, "loss": 0.0442, "step": 52500 }, { "epoch": 64.57, "learning_rate": 1.682596207884414e-05, "loss": 0.0281, "step": 52750 }, { "epoch": 64.87, "learning_rate": 1.657608800518716e-05, "loss": 0.0609, "step": 53000 }, { "epoch": 65.0, "eval_accuracy": 0.7052341597796143, "eval_f1": 0.5783901018472066, "eval_loss": 2.4328320026397705, "eval_precision": 0.6903590657995944, "eval_recall": 0.5364771859229988, "eval_runtime": 0.5756, "eval_samples_per_second": 630.632, "eval_steps_per_second": 79.915, "step": 53105 }, { "epoch": 65.18, "learning_rate": 1.6325156131696083e-05, "loss": 0.0475, "step": 53250 }, { "epoch": 65.48, "learning_rate": 1.6075213968684297e-05, "loss": 0.0447, "step": 53500 }, { "epoch": 65.79, "learning_rate": 1.5826290031982382e-05, "loss": 0.0426, "step": 53750 }, { "epoch": 66.0, "eval_accuracy": 0.7107438016528925, "eval_f1": 0.5337400275204507, "eval_loss": 2.376704216003418, "eval_precision": 0.6895915165326075, "eval_recall": 0.47468881009521396, "eval_runtime": 0.5754, "eval_samples_per_second": 630.914, "eval_steps_per_second": 79.95, "step": 53922 }, { "epoch": 66.1, "learning_rate": 1.5578412721251766e-05, "loss": 0.0541, "step": 54000 }, { "epoch": 66.4, "learning_rate": 1.533161031674459e-05, "loss": 0.0469, "step": 54250 }, { "epoch": 66.71, "learning_rate": 1.5085910976077283e-05, "loss": 0.0194, "step": 54500 }, { "epoch": 67.0, "eval_accuracy": 0.7079889807162535, "eval_f1": 0.5368473703383648, "eval_loss": 2.5384085178375244, "eval_precision": 0.6938002473716759, "eval_recall": 0.4738072682653963, "eval_runtime": 0.575, "eval_samples_per_second": 631.275, "eval_steps_per_second": 79.996, "step": 54739 }, { "epoch": 67.01, "learning_rate": 1.4841342731017988e-05, "loss": 0.0356, "step": 54750 }, { "epoch": 67.32, "learning_rate": 1.4598904775700689e-05, "loss": 0.0421, "step": 55000 }, { "epoch": 67.63, "learning_rate": 1.4356677495577313e-05, "loss": 0.0423, "step": 55250 }, { "epoch": 67.93, "learning_rate": 1.4115664509103169e-05, "loss": 0.0557, "step": 55500 }, { "epoch": 68.0, "eval_accuracy": 0.7162534435261708, "eval_f1": 0.5433935912807646, "eval_loss": 2.393681049346924, "eval_precision": 0.6931352645638361, "eval_recall": 0.4979390717383328, "eval_runtime": 0.575, "eval_samples_per_second": 631.299, "eval_steps_per_second": 79.999, "step": 55556 }, { "epoch": 68.24, "learning_rate": 1.3875893313381589e-05, "loss": 0.0432, "step": 55750 }, { "epoch": 68.54, "learning_rate": 1.3637391263840368e-05, "loss": 0.0429, "step": 56000 }, { "epoch": 68.85, "learning_rate": 1.3400185571110769e-05, "loss": 0.0466, "step": 56250 }, { "epoch": 69.0, "eval_accuracy": 0.7107438016528925, "eval_f1": 0.5718625572894164, "eval_loss": 2.403398275375366, "eval_precision": 0.6765347833670815, "eval_recall": 0.5215229283579037, "eval_runtime": 0.5744, "eval_samples_per_second": 631.944, "eval_steps_per_second": 80.081, "step": 56373 }, { "epoch": 69.16, "learning_rate": 1.316430329792307e-05, "loss": 0.0465, "step": 56500 }, { "epoch": 69.46, "learning_rate": 1.2929771356018988e-05, "loss": 0.0548, "step": 56750 }, { "epoch": 69.77, "learning_rate": 1.2696616503081343e-05, "loss": 0.0517, "step": 57000 }, { "epoch": 70.0, "eval_accuracy": 0.7052341597796143, "eval_f1": 0.5117271310290573, "eval_loss": 2.4453024864196777, "eval_precision": 0.7065175565175564, "eval_recall": 0.46373618855145954, "eval_runtime": 0.5763, "eval_samples_per_second": 629.826, "eval_steps_per_second": 79.813, "step": 57190 }, { "epoch": 70.07, "learning_rate": 1.2464865339681253e-05, "loss": 0.0226, "step": 57250 }, { "epoch": 70.38, "learning_rate": 1.2234544306243286e-05, "loss": 0.0292, "step": 57500 }, { "epoch": 70.69, "learning_rate": 1.2005679680028897e-05, "loss": 0.0368, "step": 57750 }, { "epoch": 70.99, "learning_rate": 1.177920411295463e-05, "loss": 0.0437, "step": 58000 }, { "epoch": 71.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4571751135533447, "eval_loss": 2.6550002098083496, "eval_precision": 0.6424798334634401, "eval_recall": 0.41016626367611586, "eval_runtime": 0.5744, "eval_samples_per_second": 632.012, "eval_steps_per_second": 80.09, "step": 58007 }, { "epoch": 71.3, "learning_rate": 1.1553324380057995e-05, "loss": 0.037, "step": 58250 }, { "epoch": 71.6, "learning_rate": 1.1328978774573311e-05, "loss": 0.0361, "step": 58500 }, { "epoch": 71.91, "learning_rate": 1.1106192892027184e-05, "loss": 0.0451, "step": 58750 }, { "epoch": 72.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.476209725136688, "eval_loss": 2.6152384281158447, "eval_precision": 0.6680127132820342, "eval_recall": 0.4248004219371215, "eval_runtime": 0.5754, "eval_samples_per_second": 630.887, "eval_steps_per_second": 79.947, "step": 58824 }, { "epoch": 72.22, "learning_rate": 1.0884992149997797e-05, "loss": 0.0112, "step": 59000 }, { "epoch": 72.52, "learning_rate": 1.066540178521517e-05, "loss": 0.043, "step": 59250 }, { "epoch": 72.83, "learning_rate": 1.04474468506818e-05, "loss": 0.0294, "step": 59500 }, { "epoch": 73.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.5392699243016044, "eval_loss": 2.5824713706970215, "eval_precision": 0.6700918129489558, "eval_recall": 0.4843369001004469, "eval_runtime": 0.5751, "eval_samples_per_second": 631.242, "eval_steps_per_second": 79.992, "step": 59641 }, { "epoch": 73.13, "learning_rate": 1.0231152212814419e-05, "loss": 0.0468, "step": 59750 }, { "epoch": 73.44, "learning_rate": 1.0016542548606983e-05, "loss": 0.0316, "step": 60000 }, { "epoch": 73.75, "learning_rate": 9.803642342815278e-06, "loss": 0.0429, "step": 60250 }, { "epoch": 74.0, "eval_accuracy": 0.696969696969697, "eval_f1": 0.48452204181281855, "eval_loss": 2.451073408126831, "eval_precision": 0.6746470741527906, "eval_recall": 0.4329222604161029, "eval_runtime": 0.5756, "eval_samples_per_second": 630.696, "eval_steps_per_second": 79.923, "step": 60458 }, { "epoch": 74.05, "learning_rate": 9.592475885163487e-06, "loss": 0.0236, "step": 60500 }, { "epoch": 74.36, "learning_rate": 9.383067267572908e-06, "loss": 0.0221, "step": 60750 }, { "epoch": 74.66, "learning_rate": 9.17544038141342e-06, "loss": 0.0313, "step": 61000 }, { "epoch": 74.97, "learning_rate": 8.969618914777637e-06, "loss": 0.0389, "step": 61250 }, { "epoch": 75.0, "eval_accuracy": 0.6749311294765841, "eval_f1": 0.47978279633622645, "eval_loss": 2.7327232360839844, "eval_precision": 0.5541808071284741, "eval_recall": 0.44874471450826137, "eval_runtime": 0.5752, "eval_samples_per_second": 631.071, "eval_steps_per_second": 79.97, "step": 61275 }, { "epoch": 75.28, "learning_rate": 8.765626349778362e-06, "loss": 0.0227, "step": 61500 }, { "epoch": 75.58, "learning_rate": 8.563485959869575e-06, "loss": 0.0303, "step": 61750 }, { "epoch": 75.89, "learning_rate": 8.363220807191058e-06, "loss": 0.0305, "step": 62000 }, { "epoch": 76.0, "eval_accuracy": 0.7024793388429752, "eval_f1": 0.5534820298880379, "eval_loss": 2.4534895420074463, "eval_precision": 0.6256500777723188, "eval_recall": 0.5284146306621677, "eval_runtime": 0.5743, "eval_samples_per_second": 632.097, "eval_steps_per_second": 80.1, "step": 62092 }, { "epoch": 76.19, "learning_rate": 8.164853739937368e-06, "loss": 0.034, "step": 62250 }, { "epoch": 76.5, "learning_rate": 7.968407389751003e-06, "loss": 0.034, "step": 62500 }, { "epoch": 76.81, "learning_rate": 7.773904169140392e-06, "loss": 0.023, "step": 62750 }, { "epoch": 77.0, "eval_accuracy": 0.7024793388429752, "eval_f1": 0.4687311706012172, "eval_loss": 2.6240921020507812, "eval_precision": 0.5393879300200679, "eval_recall": 0.4422954747770019, "eval_runtime": 0.5739, "eval_samples_per_second": 632.494, "eval_steps_per_second": 80.151, "step": 62909 }, { "epoch": 77.11, "learning_rate": 7.581366268922896e-06, "loss": 0.0132, "step": 63000 }, { "epoch": 77.42, "learning_rate": 7.3908156556929705e-06, "loss": 0.0201, "step": 63250 }, { "epoch": 77.72, "learning_rate": 7.20227406931612e-06, "loss": 0.0305, "step": 63500 }, { "epoch": 78.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.4474972103847281, "eval_loss": 2.737638235092163, "eval_precision": 0.539260468004995, "eval_recall": 0.407899366643209, "eval_runtime": 0.5754, "eval_samples_per_second": 630.913, "eval_steps_per_second": 79.95, "step": 63726 }, { "epoch": 78.03, "learning_rate": 7.01576302044851e-06, "loss": 0.0258, "step": 63750 }, { "epoch": 78.34, "learning_rate": 6.831303788082866e-06, "loss": 0.0212, "step": 64000 }, { "epoch": 78.64, "learning_rate": 6.6489174171207504e-06, "loss": 0.0178, "step": 64250 }, { "epoch": 78.95, "learning_rate": 6.469341688844232e-06, "loss": 0.0278, "step": 64500 }, { "epoch": 79.0, "eval_accuracy": 0.7052341597796143, "eval_f1": 0.5369051843353192, "eval_loss": 2.641106605529785, "eval_precision": 0.6364117161569525, "eval_recall": 0.48457279242377765, "eval_runtime": 0.5738, "eval_samples_per_second": 632.582, "eval_steps_per_second": 80.162, "step": 64543 }, { "epoch": 79.25, "learning_rate": 6.291154729438112e-06, "loss": 0.0211, "step": 64750 }, { "epoch": 79.56, "learning_rate": 6.115102256888608e-06, "loss": 0.027, "step": 65000 }, { "epoch": 79.87, "learning_rate": 5.9412043569724075e-06, "loss": 0.0245, "step": 65250 }, { "epoch": 80.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.5139284663519239, "eval_loss": 2.8429274559020996, "eval_precision": 0.6295462275834203, "eval_recall": 0.46902355991025946, "eval_runtime": 0.5746, "eval_samples_per_second": 631.731, "eval_steps_per_second": 80.054, "step": 65360 }, { "epoch": 80.17, "learning_rate": 5.769480869651678e-06, "loss": 0.0177, "step": 65500 }, { "epoch": 80.48, "learning_rate": 5.599951386810407e-06, "loss": 0.0229, "step": 65750 }, { "epoch": 80.78, "learning_rate": 5.432635250019285e-06, "loss": 0.0227, "step": 66000 }, { "epoch": 81.0, "eval_accuracy": 0.7245179063360881, "eval_f1": 0.5777058546571006, "eval_loss": 2.6588237285614014, "eval_precision": 0.6858633718409165, "eval_recall": 0.5245856565437846, "eval_runtime": 0.5738, "eval_samples_per_second": 632.57, "eval_steps_per_second": 80.16, "step": 66177 }, { "epoch": 81.09, "learning_rate": 5.26755154832895e-06, "loss": 0.0261, "step": 66250 }, { "epoch": 81.4, "learning_rate": 5.1047191160921495e-06, "loss": 0.0191, "step": 66500 }, { "epoch": 81.7, "learning_rate": 4.9441565308149724e-06, "loss": 0.0226, "step": 66750 }, { "epoch": 82.0, "eval_accuracy": 0.7024793388429752, "eval_f1": 0.5153196455189113, "eval_loss": 2.683469533920288, "eval_precision": 0.5957972186543615, "eval_recall": 0.47307357735313404, "eval_runtime": 0.5753, "eval_samples_per_second": 630.959, "eval_steps_per_second": 79.956, "step": 66994 }, { "epoch": 82.01, "learning_rate": 4.785882111037252e-06, "loss": 0.0164, "step": 67000 }, { "epoch": 82.31, "learning_rate": 4.629913914242723e-06, "loss": 0.0207, "step": 67250 }, { "epoch": 82.62, "learning_rate": 4.4762697347987634e-06, "loss": 0.0211, "step": 67500 }, { "epoch": 82.93, "learning_rate": 4.324967101926272e-06, "loss": 0.0224, "step": 67750 }, { "epoch": 83.0, "eval_accuracy": 0.699724517906336, "eval_f1": 0.5510071098243216, "eval_loss": 2.7483408451080322, "eval_precision": 0.6299525870954442, "eval_recall": 0.5086697101475426, "eval_runtime": 0.5745, "eval_samples_per_second": 631.809, "eval_steps_per_second": 80.064, "step": 67811 }, { "epoch": 83.23, "learning_rate": 4.176023277699789e-06, "loss": 0.027, "step": 68000 }, { "epoch": 83.54, "learning_rate": 4.029455255077999e-06, "loss": 0.0154, "step": 68250 }, { "epoch": 83.84, "learning_rate": 3.8852797559650935e-06, "loss": 0.0195, "step": 68500 }, { "epoch": 84.0, "eval_accuracy": 0.699724517906336, "eval_f1": 0.52634249226563, "eval_loss": 2.7812142372131348, "eval_precision": 0.5949473728885494, "eval_recall": 0.49608820981973695, "eval_runtime": 0.5756, "eval_samples_per_second": 630.699, "eval_steps_per_second": 79.923, "step": 68628 }, { "epoch": 84.15, "learning_rate": 3.7440754752084e-06, "loss": 0.012, "step": 68750 }, { "epoch": 84.46, "learning_rate": 3.604724362660877e-06, "loss": 0.0262, "step": 69000 }, { "epoch": 84.76, "learning_rate": 3.467814231049432e-06, "loss": 0.0106, "step": 69250 }, { "epoch": 85.0, "eval_accuracy": 0.6942148760330579, "eval_f1": 0.5085355239316488, "eval_loss": 2.8171160221099854, "eval_precision": 0.6049143980994077, "eval_recall": 0.4679249219581732, "eval_runtime": 0.5729, "eval_samples_per_second": 633.612, "eval_steps_per_second": 80.292, "step": 69445 }, { "epoch": 85.07, "learning_rate": 3.33336070041218e-06, "loss": 0.0258, "step": 69500 }, { "epoch": 85.37, "learning_rate": 3.2013791105143466e-06, "loss": 0.0118, "step": 69750 }, { "epoch": 85.68, "learning_rate": 3.071884519098131e-06, "loss": 0.0217, "step": 70000 }, { "epoch": 85.99, "learning_rate": 2.9448917001647703e-06, "loss": 0.0155, "step": 70250 }, { "epoch": 86.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4962240507440637, "eval_loss": 2.8894941806793213, "eval_precision": 0.5999622071050642, "eval_recall": 0.45233290929103737, "eval_runtime": 0.575, "eval_samples_per_second": 631.309, "eval_steps_per_second": 80.001, "step": 70262 }, { "epoch": 86.29, "learning_rate": 2.820415142289015e-06, "loss": 0.0196, "step": 70500 }, { "epoch": 86.6, "learning_rate": 2.698951772130373e-06, "loss": 0.0064, "step": 70750 }, { "epoch": 86.9, "learning_rate": 2.5795398473242555e-06, "loss": 0.0111, "step": 71000 }, { "epoch": 87.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4869283447078967, "eval_loss": 2.887157678604126, "eval_precision": 0.596748772605851, "eval_recall": 0.44054806714905237, "eval_runtime": 0.575, "eval_samples_per_second": 631.314, "eval_steps_per_second": 80.001, "step": 71079 }, { "epoch": 87.21, "learning_rate": 2.462685866465117e-06, "loss": 0.0174, "step": 71250 }, { "epoch": 87.52, "learning_rate": 2.3484031613905387e-06, "loss": 0.0105, "step": 71500 }, { "epoch": 87.82, "learning_rate": 2.2367047705819572e-06, "loss": 0.0097, "step": 71750 }, { "epoch": 88.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4890889302500872, "eval_loss": 2.9936180114746094, "eval_precision": 0.5928653178112443, "eval_recall": 0.44590630018585686, "eval_runtime": 0.5751, "eval_samples_per_second": 631.226, "eval_steps_per_second": 79.99, "step": 71896 }, { "epoch": 88.13, "learning_rate": 2.127603437677139e-06, "loss": 0.0083, "step": 72000 }, { "epoch": 88.43, "learning_rate": 2.0211116100162435e-06, "loss": 0.0124, "step": 72250 }, { "epoch": 88.74, "learning_rate": 1.9172414372217146e-06, "loss": 0.0086, "step": 72500 }, { "epoch": 89.0, "eval_accuracy": 0.6831955922865014, "eval_f1": 0.49100657070581877, "eval_loss": 3.0106709003448486, "eval_precision": 0.5958884973599873, "eval_recall": 0.44583543850785234, "eval_runtime": 0.5744, "eval_samples_per_second": 631.965, "eval_steps_per_second": 80.084, "step": 72713 }, { "epoch": 89.05, "learning_rate": 1.8160047698121518e-06, "loss": 0.0104, "step": 72750 }, { "epoch": 89.35, "learning_rate": 1.7178022403391475e-06, "loss": 0.0108, "step": 73000 }, { "epoch": 89.66, "learning_rate": 1.6218562848936775e-06, "loss": 0.0174, "step": 73250 }, { "epoch": 89.96, "learning_rate": 1.528577535241521e-06, "loss": 0.0072, "step": 73500 }, { "epoch": 90.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4893894643187819, "eval_loss": 3.0227720737457275, "eval_precision": 0.5889153512253815, "eval_recall": 0.4449414911853336, "eval_runtime": 0.5765, "eval_samples_per_second": 629.693, "eval_steps_per_second": 79.796, "step": 73530 }, { "epoch": 90.27, "learning_rate": 1.4383336890610749e-06, "loss": 0.0151, "step": 73750 }, { "epoch": 90.58, "learning_rate": 1.3504101989962132e-06, "loss": 0.0109, "step": 74000 }, { "epoch": 90.88, "learning_rate": 1.2651848839316887e-06, "loss": 0.0107, "step": 74250 }, { "epoch": 91.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.49676183815019276, "eval_loss": 2.977621078491211, "eval_precision": 0.5930682617874735, "eval_recall": 0.45364507208719035, "eval_runtime": 0.5751, "eval_samples_per_second": 631.186, "eval_steps_per_second": 79.985, "step": 74347 }, { "epoch": 91.19, "learning_rate": 1.182667467199558e-06, "loss": 0.0081, "step": 74500 }, { "epoch": 91.49, "learning_rate": 1.1028673631885173e-06, "loss": 0.0136, "step": 74750 }, { "epoch": 91.8, "learning_rate": 1.0257936762698288e-06, "loss": 0.0105, "step": 75000 }, { "epoch": 92.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4999633782778992, "eval_loss": 3.0019350051879883, "eval_precision": 0.5966416670279332, "eval_recall": 0.4551455728179866, "eval_runtime": 0.5756, "eval_samples_per_second": 630.598, "eval_steps_per_second": 79.91, "step": 75164 }, { "epoch": 92.11, "learning_rate": 9.514551997585913e-07, "loss": 0.0061, "step": 75250 }, { "epoch": 92.41, "learning_rate": 8.798604149105355e-07, "loss": 0.0094, "step": 75500 }, { "epoch": 92.72, "learning_rate": 8.110174899543743e-07, "loss": 0.0138, "step": 75750 }, { "epoch": 93.0, "eval_accuracy": 0.6887052341597796, "eval_f1": 0.4999633782778992, "eval_loss": 3.0192649364471436, "eval_precision": 0.5966416670279332, "eval_recall": 0.4551455728179866, "eval_runtime": 0.5738, "eval_samples_per_second": 632.575, "eval_steps_per_second": 80.161, "step": 75981 }, { "epoch": 93.02, "learning_rate": 7.449342791599201e-07, "loss": 0.0106, "step": 76000 }, { "epoch": 93.33, "learning_rate": 6.81618321941968e-07, "loss": 0.0107, "step": 76250 }, { "epoch": 93.64, "learning_rate": 6.210768420001373e-07, "loss": 0.0081, "step": 76500 }, { "epoch": 93.94, "learning_rate": 5.633167464947242e-07, "loss": 0.0123, "step": 76750 }, { "epoch": 94.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.48841087906946806, "eval_loss": 3.0272629261016846, "eval_precision": 0.5842731342731343, "eval_recall": 0.4449414911853336, "eval_runtime": 0.5754, "eval_samples_per_second": 630.882, "eval_steps_per_second": 79.946, "step": 76798 }, { "epoch": 94.25, "learning_rate": 5.08344625258661e-07, "loss": 0.0113, "step": 77000 }, { "epoch": 94.55, "learning_rate": 4.561667500456862e-07, "loss": 0.0064, "step": 77250 }, { "epoch": 94.86, "learning_rate": 4.067890738147978e-07, "loss": 0.0114, "step": 77500 }, { "epoch": 95.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.49834117213459417, "eval_loss": 3.0193073749542236, "eval_precision": 0.592981803003125, "eval_recall": 0.45445875963117344, "eval_runtime": 0.5755, "eval_samples_per_second": 630.739, "eval_steps_per_second": 79.928, "step": 77615 }, { "epoch": 95.17, "learning_rate": 3.6021723005109365e-07, "loss": 0.013, "step": 77750 }, { "epoch": 95.47, "learning_rate": 3.1645653212303004e-07, "loss": 0.0127, "step": 78000 }, { "epoch": 95.78, "learning_rate": 2.755119726762373e-07, "loss": 0.0087, "step": 78250 }, { "epoch": 96.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4971705409799476, "eval_loss": 3.0252628326416016, "eval_precision": 0.5904316041796341, "eval_recall": 0.45445875963117344, "eval_runtime": 0.5748, "eval_samples_per_second": 631.48, "eval_steps_per_second": 80.022, "step": 78432 }, { "epoch": 96.08, "learning_rate": 2.3738822306390575e-07, "loss": 0.0076, "step": 78500 }, { "epoch": 96.39, "learning_rate": 2.0208963281382054e-07, "loss": 0.0059, "step": 78750 }, { "epoch": 96.7, "learning_rate": 1.6962022913215026e-07, "loss": 0.0112, "step": 79000 }, { "epoch": 97.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4971705409799476, "eval_loss": 3.0260462760925293, "eval_precision": 0.5904316041796341, "eval_recall": 0.45445875963117344, "eval_runtime": 0.575, "eval_samples_per_second": 631.313, "eval_steps_per_second": 80.001, "step": 79249 }, { "epoch": 97.0, "learning_rate": 1.399837164439588e-07, "loss": 0.0198, "step": 79250 }, { "epoch": 97.31, "learning_rate": 1.131834759705852e-07, "loss": 0.0108, "step": 79500 }, { "epoch": 97.61, "learning_rate": 8.922256534386886e-08, "loss": 0.0096, "step": 79750 }, { "epoch": 97.92, "learning_rate": 6.818252895382516e-08, "loss": 0.0154, "step": 80000 }, { "epoch": 98.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4971705409799476, "eval_loss": 3.032817840576172, "eval_precision": 0.5904316041796341, "eval_recall": 0.45445875963117344, "eval_runtime": 0.5745, "eval_samples_per_second": 631.834, "eval_steps_per_second": 80.067, "step": 80066 }, { "epoch": 98.23, "learning_rate": 4.989677258919745e-08, "loss": 0.0066, "step": 80250 }, { "epoch": 98.53, "learning_rate": 3.4457566433288015e-08, "loss": 0.0089, "step": 80500 }, { "epoch": 98.84, "learning_rate": 2.1866671940751205e-08, "loss": 0.0113, "step": 80750 }, { "epoch": 99.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4971705409799476, "eval_loss": 3.0352275371551514, "eval_precision": 0.5904316041796341, "eval_recall": 0.45445875963117344, "eval_runtime": 0.5755, "eval_samples_per_second": 630.795, "eval_steps_per_second": 79.935, "step": 80883 }, { "epoch": 99.14, "learning_rate": 1.212552560317659e-08, "loss": 0.0052, "step": 81000 }, { "epoch": 99.45, "learning_rate": 5.235238785186725e-09, "loss": 0.0124, "step": 81250 }, { "epoch": 99.76, "learning_rate": 1.1965975976552247e-09, "loss": 0.0094, "step": 81500 }, { "epoch": 100.0, "eval_accuracy": 0.6859504132231405, "eval_f1": 0.4971705409799476, "eval_loss": 3.034996271133423, "eval_precision": 0.5904316041796341, "eval_recall": 0.45445875963117344, "eval_runtime": 0.5749, "eval_samples_per_second": 631.447, "eval_steps_per_second": 80.018, "step": 81700 }, { "epoch": 100.0, "step": 81700, "total_flos": 2.247320628668568e+16, "train_loss": 0.19591666076364248, "train_runtime": 5839.9501, "train_samples_per_second": 111.885, "train_steps_per_second": 13.99 } ], "max_steps": 81700, "num_train_epochs": 100, "total_flos": 2.247320628668568e+16, "trial_name": null, "trial_params": { "learning_rate": 5e-06 } }