{ "tests": { "22": { "id": 22, "task": 2, "model": "xlm-roberta-large", "languages": [ "cy" ], "augmentation": [ "" ], "data_percentage": 1, "use_token_type_ids": false, "tokenizer_config": { "strip_accent": false, "add_prefix_space": true }, "opimizer_config": { "adafactor": true, "num_train_epochs": 2 }, "result": [ { "loss": 1.882, "grad_norm": Infinity, "learning_rate": 0.0, "epoch": 0.0, "step": 1 }, { "loss": 0.5285, "grad_norm": 2.4953722953796387, "learning_rate": 3.99453087019932e-05, "epoch": 0.01, "step": 100 }, { "loss": 0.1702, "grad_norm": 1.8322360515594482, "learning_rate": 3.982377248420029e-05, "epoch": 0.01, "step": 200 }, { "loss": 0.1472, "grad_norm": 1.7121275663375854, "learning_rate": 3.970223626640739e-05, "epoch": 0.02, "step": 300 }, { "loss": 0.1342, "grad_norm": 1.7097556591033936, "learning_rate": 3.958070004861449e-05, "epoch": 0.02, "step": 400 }, { "loss": 0.1288, "grad_norm": 1.57424795627594, "learning_rate": 3.9459163830821586e-05, "epoch": 0.03, "step": 500 }, { "loss": 0.1247, "grad_norm": 1.7552311420440674, "learning_rate": 3.9337627613028686e-05, "epoch": 0.04, "step": 600 }, { "loss": 0.1218, "grad_norm": 1.6224812269210815, "learning_rate": 3.9216091395235786e-05, "epoch": 0.04, "step": 700 }, { "loss": 0.1176, "grad_norm": 1.8368713855743408, "learning_rate": 3.909455517744288e-05, "epoch": 0.05, "step": 800 }, { "loss": 0.1119, "grad_norm": 1.4631482362747192, "learning_rate": 3.897301895964998e-05, "epoch": 0.05, "step": 900 }, { "loss": 0.1098, "grad_norm": 1.2774118185043335, "learning_rate": 3.885148274185708e-05, "epoch": 0.06, "step": 1000 }, { "loss": 0.1083, "grad_norm": 1.187245488166809, "learning_rate": 3.872994652406417e-05, "epoch": 0.07, "step": 1100 }, { "loss": 0.1075, "grad_norm": 1.6492900848388672, "learning_rate": 3.860841030627127e-05, "epoch": 0.07, "step": 1200 }, { "loss": 0.107, "grad_norm": 1.4514034986495972, "learning_rate": 3.8486874088478366e-05, "epoch": 0.08, "step": 1300 }, { "loss": 0.1036, "grad_norm": 1.0488823652267456, "learning_rate": 3.8365337870685466e-05, "epoch": 0.08, "step": 1400 }, { "loss": 0.1021, "grad_norm": 1.5489355325698853, "learning_rate": 3.8243801652892566e-05, "epoch": 0.09, "step": 1500 }, { "loss": 0.1008, "grad_norm": 1.2730894088745117, "learning_rate": 3.812226543509966e-05, "epoch": 0.1, "step": 1600 }, { "loss": 0.1004, "grad_norm": 1.6920459270477295, "learning_rate": 3.800072921730676e-05, "epoch": 0.1, "step": 1700 }, { "loss": 0.1006, "grad_norm": 0.9863981008529663, "learning_rate": 3.787919299951386e-05, "epoch": 0.11, "step": 1800 }, { "loss": 0.0982, "grad_norm": 0.9981995820999146, "learning_rate": 3.775765678172095e-05, "epoch": 0.12, "step": 1900 }, { "loss": 0.0975, "grad_norm": 1.021620273590088, "learning_rate": 3.763612056392805e-05, "epoch": 0.12, "step": 2000 }, { "loss": 0.0989, "grad_norm": 1.2811397314071655, "learning_rate": 3.751458434613515e-05, "epoch": 0.13, "step": 2100 }, { "loss": 0.0959, "grad_norm": 1.5976190567016602, "learning_rate": 3.7393048128342246e-05, "epoch": 0.13, "step": 2200 }, { "loss": 0.0961, "grad_norm": 0.9754481911659241, "learning_rate": 3.7271511910549346e-05, "epoch": 0.14, "step": 2300 }, { "loss": 0.0956, "grad_norm": 0.9418678283691406, "learning_rate": 3.7149975692756447e-05, "epoch": 0.15, "step": 2400 }, { "loss": 0.0954, "grad_norm": 1.294745922088623, "learning_rate": 3.702843947496354e-05, "epoch": 0.15, "step": 2500 }, { "loss": 0.0943, "grad_norm": 1.3049461841583252, "learning_rate": 3.690690325717064e-05, "epoch": 0.16, "step": 2600 }, { "loss": 0.0936, "grad_norm": 1.1144427061080933, "learning_rate": 3.678536703937774e-05, "epoch": 0.16, "step": 2700 }, { "loss": 0.0939, "grad_norm": 1.3424856662750244, "learning_rate": 3.666383082158483e-05, "epoch": 0.17, "step": 2800 }, { "loss": 0.0947, "grad_norm": 1.123299241065979, "learning_rate": 3.6542294603791933e-05, "epoch": 0.18, "step": 2900 }, { "loss": 0.0932, "grad_norm": 1.456009864807129, "learning_rate": 3.642075838599903e-05, "epoch": 0.18, "step": 3000 }, { "loss": 0.0927, "grad_norm": 1.4363266229629517, "learning_rate": 3.629922216820613e-05, "epoch": 0.19, "step": 3100 }, { "loss": 0.0907, "grad_norm": 0.7776892185211182, "learning_rate": 3.617768595041323e-05, "epoch": 0.19, "step": 3200 }, { "loss": 0.092, "grad_norm": 25.731966018676758, "learning_rate": 3.605614973262032e-05, "epoch": 0.2, "step": 3300 }, { "loss": 0.091, "grad_norm": 0.9259088039398193, "learning_rate": 3.593461351482742e-05, "epoch": 0.21, "step": 3400 }, { "loss": 0.0915, "grad_norm": 0.851094663143158, "learning_rate": 3.581307729703452e-05, "epoch": 0.21, "step": 3500 }, { "loss": 0.0902, "grad_norm": 1.5700650215148926, "learning_rate": 3.5691541079241614e-05, "epoch": 0.22, "step": 3600 }, { "loss": 0.0888, "grad_norm": 1.13387930393219, "learning_rate": 3.5570004861448714e-05, "epoch": 0.22, "step": 3700 }, { "loss": 0.089, "grad_norm": 1.2357937097549438, "learning_rate": 3.5448468643655814e-05, "epoch": 0.23, "step": 3800 }, { "loss": 0.0898, "grad_norm": 0.9063655734062195, "learning_rate": 3.532693242586291e-05, "epoch": 0.24, "step": 3900 }, { "loss": 0.0893, "grad_norm": 1.1259723901748657, "learning_rate": 3.520539620807001e-05, "epoch": 0.24, "step": 4000 }, { "loss": 0.0889, "grad_norm": 0.8327601552009583, "learning_rate": 3.508385999027711e-05, "epoch": 0.25, "step": 4100 }, { "loss": 0.0862, "grad_norm": 1.2368316650390625, "learning_rate": 3.49623237724842e-05, "epoch": 0.25, "step": 4200 }, { "loss": 0.0867, "grad_norm": 1.1474043130874634, "learning_rate": 3.48407875546913e-05, "epoch": 0.26, "step": 4300 }, { "loss": 0.0858, "grad_norm": 0.6887868046760559, "learning_rate": 3.47192513368984e-05, "epoch": 0.27, "step": 4400 }, { "loss": 0.0877, "grad_norm": 0.8170347809791565, "learning_rate": 3.4597715119105494e-05, "epoch": 0.27, "step": 4500 }, { "loss": 0.0871, "grad_norm": 0.7361243367195129, "learning_rate": 3.4476178901312594e-05, "epoch": 0.28, "step": 4600 }, { "loss": 0.0878, "grad_norm": 1.0975162982940674, "learning_rate": 3.435464268351969e-05, "epoch": 0.29, "step": 4700 }, { "loss": 0.0863, "grad_norm": 0.931176483631134, "learning_rate": 3.4233106465726794e-05, "epoch": 0.29, "step": 4800 }, { "loss": 0.0853, "grad_norm": 1.0259523391723633, "learning_rate": 3.411157024793389e-05, "epoch": 0.3, "step": 4900 }, { "loss": 0.0876, "grad_norm": 1.1680504083633423, "learning_rate": 3.399003403014098e-05, "epoch": 0.3, "step": 5000 }, { "loss": 0.0855, "grad_norm": 1.2358198165893555, "learning_rate": 3.386849781234809e-05, "epoch": 0.31, "step": 5100 }, { "loss": 0.085, "grad_norm": 0.8484376668930054, "learning_rate": 3.374696159455518e-05, "epoch": 0.32, "step": 5200 }, { "loss": 0.085, "grad_norm": 1.5419291257858276, "learning_rate": 3.3625425376762274e-05, "epoch": 0.32, "step": 5300 }, { "loss": 0.0849, "grad_norm": 1.0334900617599487, "learning_rate": 3.3503889158969374e-05, "epoch": 0.33, "step": 5400 }, { "loss": 0.0854, "grad_norm": 1.0367408990859985, "learning_rate": 3.3382352941176474e-05, "epoch": 0.33, "step": 5500 }, { "loss": 0.0853, "grad_norm": 0.8429509401321411, "learning_rate": 3.326081672338357e-05, "epoch": 0.34, "step": 5600 }, { "loss": 0.086, "grad_norm": 0.9059005379676819, "learning_rate": 3.313928050559067e-05, "epoch": 0.35, "step": 5700 }, { "loss": 0.0846, "grad_norm": 1.1803362369537354, "learning_rate": 3.301774428779777e-05, "epoch": 0.35, "step": 5800 }, { "loss": 0.0817, "grad_norm": 0.7263641357421875, "learning_rate": 3.289620807000487e-05, "epoch": 0.36, "step": 5900 }, { "loss": 0.0831, "grad_norm": 0.8227238655090332, "learning_rate": 3.277467185221196e-05, "epoch": 0.36, "step": 6000 }, { "loss": 0.0839, "grad_norm": 1.0349544286727905, "learning_rate": 3.2653135634419055e-05, "epoch": 0.37, "step": 6100 }, { "loss": 0.0827, "grad_norm": 0.8446714282035828, "learning_rate": 3.253159941662616e-05, "epoch": 0.38, "step": 6200 }, { "loss": 0.082, "grad_norm": 1.1419836282730103, "learning_rate": 3.2410063198833255e-05, "epoch": 0.38, "step": 6300 }, { "loss": 0.0812, "grad_norm": 0.9505990147590637, "learning_rate": 3.228852698104035e-05, "epoch": 0.39, "step": 6400 }, { "loss": 0.0806, "grad_norm": 1.0036993026733398, "learning_rate": 3.2166990763247455e-05, "epoch": 0.39, "step": 6500 }, { "loss": 0.0819, "grad_norm": 0.7694116234779358, "learning_rate": 3.204545454545455e-05, "epoch": 0.4, "step": 6600 }, { "loss": 0.0818, "grad_norm": 0.7389699220657349, "learning_rate": 3.192391832766165e-05, "epoch": 0.41, "step": 6700 }, { "loss": 0.0829, "grad_norm": 0.8264873623847961, "learning_rate": 3.180238210986874e-05, "epoch": 0.41, "step": 6800 }, { "loss": 0.0849, "grad_norm": 0.8844084143638611, "learning_rate": 3.168084589207584e-05, "epoch": 0.42, "step": 6900 }, { "loss": 0.0816, "grad_norm": 0.8728023171424866, "learning_rate": 3.155930967428294e-05, "epoch": 0.42, "step": 7000 }, { "loss": 0.0799, "grad_norm": 1.218404769897461, "learning_rate": 3.1437773456490035e-05, "epoch": 0.43, "step": 7100 }, { "loss": 0.0797, "grad_norm": 0.7085688710212708, "learning_rate": 3.1316237238697135e-05, "epoch": 0.44, "step": 7200 }, { "loss": 0.0795, "grad_norm": 0.8446517586708069, "learning_rate": 3.1194701020904235e-05, "epoch": 0.44, "step": 7300 }, { "loss": 0.0817, "grad_norm": 1.3226453065872192, "learning_rate": 3.107316480311133e-05, "epoch": 0.45, "step": 7400 }, { "loss": 0.0816, "grad_norm": 0.7685155868530273, "learning_rate": 3.095162858531843e-05, "epoch": 0.46, "step": 7500 }, { "loss": 0.0806, "grad_norm": 0.7135798335075378, "learning_rate": 3.083009236752553e-05, "epoch": 0.46, "step": 7600 }, { "loss": 0.0795, "grad_norm": 1.0276037454605103, "learning_rate": 3.070855614973262e-05, "epoch": 0.47, "step": 7700 }, { "loss": 0.081, "grad_norm": 1.1788092851638794, "learning_rate": 3.058701993193972e-05, "epoch": 0.47, "step": 7800 }, { "loss": 0.0791, "grad_norm": 1.0305782556533813, "learning_rate": 3.046548371414682e-05, "epoch": 0.48, "step": 7900 }, { "loss": 0.0805, "grad_norm": 1.4414223432540894, "learning_rate": 3.0343947496353915e-05, "epoch": 0.49, "step": 8000 }, { "loss": 0.0799, "grad_norm": 0.8137165904045105, "learning_rate": 3.0222411278561012e-05, "epoch": 0.49, "step": 8100 }, { "loss": 0.08, "grad_norm": 1.1238079071044922, "learning_rate": 3.0100875060768112e-05, "epoch": 0.5, "step": 8200 }, { "loss": 0.0792, "grad_norm": 0.9724037647247314, "learning_rate": 2.997933884297521e-05, "epoch": 0.5, "step": 8300 }, { "loss": 0.0793, "grad_norm": 1.0247116088867188, "learning_rate": 2.9857802625182306e-05, "epoch": 0.51, "step": 8400 }, { "loss": 0.0783, "grad_norm": 1.454062581062317, "learning_rate": 2.9737481769567335e-05, "epoch": 0.52, "step": 8500 }, { "loss": 0.0788, "grad_norm": 0.7570217251777649, "learning_rate": 2.961594555177443e-05, "epoch": 0.52, "step": 8600 }, { "loss": 0.0768, "grad_norm": 1.1738083362579346, "learning_rate": 2.9494409333981528e-05, "epoch": 0.53, "step": 8700 }, { "loss": 0.0778, "grad_norm": 0.7776427268981934, "learning_rate": 2.9372873116188625e-05, "epoch": 0.53, "step": 8800 }, { "loss": 0.0763, "grad_norm": 1.226198673248291, "learning_rate": 2.9251336898395725e-05, "epoch": 0.54, "step": 8900 }, { "loss": 0.0761, "grad_norm": 0.8859773874282837, "learning_rate": 2.912980068060282e-05, "epoch": 0.55, "step": 9000 }, { "loss": 0.0765, "grad_norm": 1.0220259428024292, "learning_rate": 2.9008264462809918e-05, "epoch": 0.55, "step": 9100 }, { "loss": 0.0777, "grad_norm": 1.0430243015289307, "learning_rate": 2.888672824501702e-05, "epoch": 0.56, "step": 9200 }, { "loss": 0.0775, "grad_norm": 1.1380356550216675, "learning_rate": 2.8765192027224115e-05, "epoch": 0.56, "step": 9300 }, { "loss": 0.0775, "grad_norm": 0.6778531670570374, "learning_rate": 2.8643655809431212e-05, "epoch": 0.57, "step": 9400 }, { "loss": 0.0782, "grad_norm": 1.0413175821304321, "learning_rate": 2.852211959163831e-05, "epoch": 0.58, "step": 9500 }, { "loss": 0.0791, "grad_norm": 1.1399835348129272, "learning_rate": 2.840058337384541e-05, "epoch": 0.58, "step": 9600 }, { "loss": 0.0763, "grad_norm": 0.968399703502655, "learning_rate": 2.8279047156052505e-05, "epoch": 0.59, "step": 9700 }, { "loss": 0.0763, "grad_norm": 1.0254497528076172, "learning_rate": 2.8157510938259602e-05, "epoch": 0.59, "step": 9800 }, { "loss": 0.0771, "grad_norm": 0.8642473220825195, "learning_rate": 2.8035974720466702e-05, "epoch": 0.6, "step": 9900 }, { "loss": 0.0772, "grad_norm": 1.1130231618881226, "learning_rate": 2.79144385026738e-05, "epoch": 0.61, "step": 10000 }, { "loss": 0.0793, "grad_norm": 1.4455962181091309, "learning_rate": 2.7792902284880895e-05, "epoch": 0.61, "step": 10100 }, { "loss": 0.077, "grad_norm": 0.9273576736450195, "learning_rate": 2.7671366067087992e-05, "epoch": 0.62, "step": 10200 }, { "loss": 0.0766, "grad_norm": 0.8223456740379333, "learning_rate": 2.7549829849295092e-05, "epoch": 0.62, "step": 10300 }, { "loss": 0.0765, "grad_norm": 1.1068949699401855, "learning_rate": 2.742829363150219e-05, "epoch": 0.63, "step": 10400 }, { "loss": 0.0762, "grad_norm": 1.0787135362625122, "learning_rate": 2.7306757413709285e-05, "epoch": 0.64, "step": 10500 }, { "loss": 0.0765, "grad_norm": 0.6019480228424072, "learning_rate": 2.7185221195916386e-05, "epoch": 0.64, "step": 10600 }, { "loss": 0.0756, "grad_norm": 0.7752580046653748, "learning_rate": 2.7063684978123482e-05, "epoch": 0.65, "step": 10700 }, { "loss": 0.0762, "grad_norm": 0.9023341536521912, "learning_rate": 2.6943364122508508e-05, "epoch": 0.66, "step": 10800 }, { "loss": 0.0759, "grad_norm": 1.1154266595840454, "learning_rate": 2.6821827904715608e-05, "epoch": 0.66, "step": 10900 }, { "loss": 0.0752, "grad_norm": 1.5197564363479614, "learning_rate": 2.6700291686922705e-05, "epoch": 0.67, "step": 11000 }, { "loss": 0.0757, "grad_norm": 0.8111494183540344, "learning_rate": 2.65787554691298e-05, "epoch": 0.67, "step": 11100 }, { "loss": 0.0749, "grad_norm": 0.6413083076477051, "learning_rate": 2.6457219251336898e-05, "epoch": 0.68, "step": 11200 }, { "loss": 0.0754, "grad_norm": 0.8996323943138123, "learning_rate": 2.6335683033544e-05, "epoch": 0.69, "step": 11300 }, { "loss": 0.0744, "grad_norm": 0.7931196093559265, "learning_rate": 2.6214146815751095e-05, "epoch": 0.69, "step": 11400 }, { "loss": 0.0742, "grad_norm": 1.0821586847305298, "learning_rate": 2.609261059795819e-05, "epoch": 0.7, "step": 11500 }, { "loss": 0.0722, "grad_norm": 0.9964590072631836, "learning_rate": 2.5971074380165292e-05, "epoch": 0.7, "step": 11600 }, { "loss": 0.0752, "grad_norm": 0.7918893694877625, "learning_rate": 2.584953816237239e-05, "epoch": 0.71, "step": 11700 }, { "loss": 0.0734, "grad_norm": 0.6565855145454407, "learning_rate": 2.5728001944579485e-05, "epoch": 0.72, "step": 11800 }, { "loss": 0.0717, "grad_norm": 1.9885566234588623, "learning_rate": 2.5606465726786582e-05, "epoch": 0.72, "step": 11900 }, { "loss": 0.0747, "grad_norm": 0.6101750135421753, "learning_rate": 2.5484929508993682e-05, "epoch": 0.73, "step": 12000 }, { "loss": 0.073, "grad_norm": 1.001930594444275, "learning_rate": 2.536339329120078e-05, "epoch": 0.73, "step": 12100 }, { "loss": 0.074, "grad_norm": 0.880673348903656, "learning_rate": 2.5241857073407875e-05, "epoch": 0.74, "step": 12200 }, { "loss": 0.0738, "grad_norm": 0.7980429530143738, "learning_rate": 2.5120320855614975e-05, "epoch": 0.75, "step": 12300 }, { "loss": 0.0758, "grad_norm": 1.0153135061264038, "learning_rate": 2.4998784637822072e-05, "epoch": 0.75, "step": 12400 }, { "loss": 0.0742, "grad_norm": 0.8344822525978088, "learning_rate": 2.487724842002917e-05, "epoch": 0.76, "step": 12500 }, { "loss": 0.0738, "grad_norm": 0.6752304434776306, "learning_rate": 2.4755712202236272e-05, "epoch": 0.76, "step": 12600 }, { "loss": 0.0732, "grad_norm": 1.1106210947036743, "learning_rate": 2.4634175984443366e-05, "epoch": 0.77, "step": 12700 }, { "loss": 0.0754, "grad_norm": 0.8022058606147766, "learning_rate": 2.4512639766650462e-05, "epoch": 0.78, "step": 12800 }, { "loss": 0.0735, "grad_norm": 0.737308144569397, "learning_rate": 2.439110354885756e-05, "epoch": 0.78, "step": 12900 }, { "loss": 0.0738, "grad_norm": 2.094043493270874, "learning_rate": 2.4269567331064662e-05, "epoch": 0.79, "step": 13000 }, { "loss": 0.072, "grad_norm": 1.1105279922485352, "learning_rate": 2.4148031113271756e-05, "epoch": 0.79, "step": 13100 }, { "loss": 0.0716, "grad_norm": 1.2243571281433105, "learning_rate": 2.4026494895478852e-05, "epoch": 0.8, "step": 13200 }, { "loss": 0.0718, "grad_norm": 1.0883300304412842, "learning_rate": 2.3904958677685956e-05, "epoch": 0.81, "step": 13300 }, { "loss": 0.0727, "grad_norm": 0.9934273362159729, "learning_rate": 2.378342245989305e-05, "epoch": 0.81, "step": 13400 }, { "loss": 0.0721, "grad_norm": 0.7145100831985474, "learning_rate": 2.3661886242100146e-05, "epoch": 0.82, "step": 13500 }, { "loss": 0.0721, "grad_norm": 0.8873516321182251, "learning_rate": 2.3540350024307243e-05, "epoch": 0.83, "step": 13600 }, { "loss": 0.0723, "grad_norm": 0.7798359990119934, "learning_rate": 2.3418813806514346e-05, "epoch": 0.83, "step": 13700 }, { "loss": 0.0726, "grad_norm": 0.9411553740501404, "learning_rate": 2.329727758872144e-05, "epoch": 0.84, "step": 13800 }, { "loss": 0.0715, "grad_norm": 0.7994709610939026, "learning_rate": 2.3175741370928536e-05, "epoch": 0.84, "step": 13900 }, { "loss": 0.0732, "grad_norm": 0.5489715337753296, "learning_rate": 2.305420515313564e-05, "epoch": 0.85, "step": 14000 }, { "loss": 0.0699, "grad_norm": 0.5710996389389038, "learning_rate": 2.2932668935342736e-05, "epoch": 0.86, "step": 14100 }, { "loss": 0.073, "grad_norm": 0.7003745436668396, "learning_rate": 2.281113271754983e-05, "epoch": 0.86, "step": 14200 }, { "loss": 0.0722, "grad_norm": 0.6743086576461792, "learning_rate": 2.2689596499756926e-05, "epoch": 0.87, "step": 14300 }, { "loss": 0.0699, "grad_norm": 0.6730968356132507, "learning_rate": 2.256806028196403e-05, "epoch": 0.87, "step": 14400 }, { "loss": 0.0719, "grad_norm": 0.7155641913414001, "learning_rate": 2.2446524064171126e-05, "epoch": 0.88, "step": 14500 }, { "loss": 0.0708, "grad_norm": 0.8122462630271912, "learning_rate": 2.232498784637822e-05, "epoch": 0.89, "step": 14600 }, { "loss": 0.0718, "grad_norm": 0.8022533655166626, "learning_rate": 2.2203451628585323e-05, "epoch": 0.89, "step": 14700 }, { "loss": 0.0712, "grad_norm": 0.545359194278717, "learning_rate": 2.208191541079242e-05, "epoch": 0.9, "step": 14800 }, { "loss": 0.0711, "grad_norm": 0.8318025469779968, "learning_rate": 2.1960379192999513e-05, "epoch": 0.9, "step": 14900 }, { "loss": 0.0706, "grad_norm": 0.9334779381752014, "learning_rate": 2.1838842975206616e-05, "epoch": 0.91, "step": 15000 }, { "loss": 0.0701, "grad_norm": 0.8202875256538391, "learning_rate": 2.1717306757413713e-05, "epoch": 0.92, "step": 15100 }, { "loss": 0.07, "grad_norm": 0.8788963556289673, "learning_rate": 2.159577053962081e-05, "epoch": 0.92, "step": 15200 }, { "loss": 0.0713, "grad_norm": 1.023823618888855, "learning_rate": 2.1474234321827903e-05, "epoch": 0.93, "step": 15300 }, { "loss": 0.0697, "grad_norm": 0.8784018158912659, "learning_rate": 2.1353913466212936e-05, "epoch": 0.93, "step": 15400 }, { "loss": 0.0695, "grad_norm": 1.1254814863204956, "learning_rate": 2.1232377248420032e-05, "epoch": 0.94, "step": 15500 }, { "loss": 0.0697, "grad_norm": 0.9760749340057373, "learning_rate": 2.1110841030627126e-05, "epoch": 0.95, "step": 15600 }, { "loss": 0.0709, "grad_norm": 1.0121357440948486, "learning_rate": 2.098930481283423e-05, "epoch": 0.95, "step": 15700 }, { "loss": 0.0717, "grad_norm": 0.7810111045837402, "learning_rate": 2.0867768595041326e-05, "epoch": 0.96, "step": 15800 }, { "loss": 0.0692, "grad_norm": 0.6813214421272278, "learning_rate": 2.074623237724842e-05, "epoch": 0.96, "step": 15900 }, { "loss": 0.0696, "grad_norm": 0.7685451507568359, "learning_rate": 2.0624696159455516e-05, "epoch": 0.97, "step": 16000 }, { "loss": 0.0702, "grad_norm": 3.3225691318511963, "learning_rate": 2.050315994166262e-05, "epoch": 0.98, "step": 16100 }, { "loss": 0.0702, "grad_norm": 0.7979671955108643, "learning_rate": 2.0381623723869716e-05, "epoch": 0.98, "step": 16200 }, { "loss": 0.0691, "grad_norm": 3.4929583072662354, "learning_rate": 2.026008750607681e-05, "epoch": 0.99, "step": 16300 }, { "loss": 0.0703, "grad_norm": 0.7738245725631714, "learning_rate": 2.0138551288283913e-05, "epoch": 1.0, "step": 16400 }, { "eval_loss": 0.06881729513406754, "eval_f1": 0.8973916467400326, "eval_precision": 0.9049522471305407, "eval_recall": 0.8906029559155776, "eval_accuracy": 0.9730252863363563, "eval_runtime": 304.4852, "eval_samples_per_second": 86.796, "eval_steps_per_second": 10.851, "epoch": 1.0, "step": 16481 }, { "loss": 0.0684, "grad_norm": 0.891858696937561, "learning_rate": 2.001701507049101e-05, "epoch": 1.0, "step": 16500 }, { "loss": 0.0619, "grad_norm": 0.6408938765525818, "learning_rate": 1.9895478852698106e-05, "epoch": 1.01, "step": 16600 }, { "loss": 0.0629, "grad_norm": 0.7390792965888977, "learning_rate": 1.9773942634905203e-05, "epoch": 1.01, "step": 16700 }, { "loss": 0.0604, "grad_norm": 0.5206795930862427, "learning_rate": 1.9652406417112303e-05, "epoch": 1.02, "step": 16800 }, { "loss": 0.0613, "grad_norm": 0.909116268157959, "learning_rate": 1.95308701993194e-05, "epoch": 1.03, "step": 16900 }, { "loss": 0.0616, "grad_norm": 0.8701964020729065, "learning_rate": 1.9409333981526496e-05, "epoch": 1.03, "step": 17000 }, { "loss": 0.0625, "grad_norm": 1.0762407779693604, "learning_rate": 1.9287797763733593e-05, "epoch": 1.04, "step": 17100 }, { "loss": 0.0615, "grad_norm": 0.7816362380981445, "learning_rate": 1.9166261545940693e-05, "epoch": 1.04, "step": 17200 }, { "loss": 0.0626, "grad_norm": 0.6983965039253235, "learning_rate": 1.904594069032572e-05, "epoch": 1.05, "step": 17300 }, { "loss": 0.0621, "grad_norm": 0.910698413848877, "learning_rate": 1.8924404472532816e-05, "epoch": 1.06, "step": 17400 }, { "loss": 0.0631, "grad_norm": 0.8654133677482605, "learning_rate": 1.8802868254739916e-05, "epoch": 1.06, "step": 17500 }, { "loss": 0.062, "grad_norm": 0.8351789712905884, "learning_rate": 1.8681332036947012e-05, "epoch": 1.07, "step": 17600 }, { "loss": 0.0604, "grad_norm": 0.7861587405204773, "learning_rate": 1.855979581915411e-05, "epoch": 1.07, "step": 17700 }, { "loss": 0.0609, "grad_norm": 0.7295276522636414, "learning_rate": 1.843825960136121e-05, "epoch": 1.08, "step": 17800 }, { "loss": 0.0616, "grad_norm": 1.0210868120193481, "learning_rate": 1.8316723383568306e-05, "epoch": 1.09, "step": 17900 }, { "loss": 0.0616, "grad_norm": 0.8220874071121216, "learning_rate": 1.8195187165775403e-05, "epoch": 1.09, "step": 18000 }, { "loss": 0.0607, "grad_norm": 0.7961727380752563, "learning_rate": 1.80736509479825e-05, "epoch": 1.1, "step": 18100 }, { "loss": 0.0614, "grad_norm": 1.0390113592147827, "learning_rate": 1.79521147301896e-05, "epoch": 1.1, "step": 18200 }, { "loss": 0.0625, "grad_norm": 0.8423497080802917, "learning_rate": 1.7830578512396696e-05, "epoch": 1.11, "step": 18300 }, { "loss": 0.0618, "grad_norm": 0.7576957941055298, "learning_rate": 1.7709042294603793e-05, "epoch": 1.12, "step": 18400 }, { "loss": 0.061, "grad_norm": 0.7174555659294128, "learning_rate": 1.7587506076810893e-05, "epoch": 1.12, "step": 18500 }, { "loss": 0.0602, "grad_norm": 0.7977816462516785, "learning_rate": 1.746596985901799e-05, "epoch": 1.13, "step": 18600 }, { "loss": 0.0617, "grad_norm": 0.8125550150871277, "learning_rate": 1.7344433641225086e-05, "epoch": 1.13, "step": 18700 }, { "loss": 0.0605, "grad_norm": 1.3914258480072021, "learning_rate": 1.7222897423432183e-05, "epoch": 1.14, "step": 18800 }, { "loss": 0.0614, "grad_norm": 0.8273860812187195, "learning_rate": 1.7101361205639283e-05, "epoch": 1.15, "step": 18900 }, { "loss": 0.0606, "grad_norm": 0.7267687916755676, "learning_rate": 1.697982498784638e-05, "epoch": 1.15, "step": 19000 }, { "loss": 0.0624, "grad_norm": 1.075861930847168, "learning_rate": 1.6858288770053476e-05, "epoch": 1.16, "step": 19100 }, { "loss": 0.062, "grad_norm": 0.867139995098114, "learning_rate": 1.6736752552260576e-05, "epoch": 1.16, "step": 19200 }, { "loss": 0.0595, "grad_norm": 0.6730388402938843, "learning_rate": 1.6615216334467673e-05, "epoch": 1.17, "step": 19300 }, { "loss": 0.0603, "grad_norm": 0.7329290509223938, "learning_rate": 1.649368011667477e-05, "epoch": 1.18, "step": 19400 }, { "loss": 0.0605, "grad_norm": 1.0000228881835938, "learning_rate": 1.6372143898881866e-05, "epoch": 1.18, "step": 19500 }, { "loss": 0.0599, "grad_norm": 1.0037493705749512, "learning_rate": 1.6250607681088967e-05, "epoch": 1.19, "step": 19600 }, { "loss": 0.0616, "grad_norm": 0.7647894024848938, "learning_rate": 1.6129071463296063e-05, "epoch": 1.2, "step": 19700 }, { "loss": 0.0604, "grad_norm": 0.78948575258255, "learning_rate": 1.600753524550316e-05, "epoch": 1.2, "step": 19800 }, { "loss": 0.0609, "grad_norm": 0.8443770408630371, "learning_rate": 1.588599902771026e-05, "epoch": 1.21, "step": 19900 }, { "loss": 0.0599, "grad_norm": 1.1531789302825928, "learning_rate": 1.5764462809917357e-05, "epoch": 1.21, "step": 20000 }, { "loss": 0.0605, "grad_norm": 0.7325319647789001, "learning_rate": 1.5642926592124453e-05, "epoch": 1.22, "step": 20100 }, { "loss": 0.0606, "grad_norm": 0.8585038185119629, "learning_rate": 1.5521390374331553e-05, "epoch": 1.23, "step": 20200 }, { "loss": 0.0602, "grad_norm": 0.6652311086654663, "learning_rate": 1.539985415653865e-05, "epoch": 1.23, "step": 20300 }, { "loss": 0.0605, "grad_norm": 0.9240396618843079, "learning_rate": 1.5278317938745747e-05, "epoch": 1.24, "step": 20400 }, { "loss": 0.0609, "grad_norm": 0.9992942214012146, "learning_rate": 1.5156781720952845e-05, "epoch": 1.24, "step": 20500 }, { "loss": 0.0604, "grad_norm": 0.7454150915145874, "learning_rate": 1.5035245503159944e-05, "epoch": 1.25, "step": 20600 }, { "loss": 0.0598, "grad_norm": 0.8551883101463318, "learning_rate": 1.491370928536704e-05, "epoch": 1.26, "step": 20700 }, { "loss": 0.061, "grad_norm": 0.8273564577102661, "learning_rate": 1.4792173067574139e-05, "epoch": 1.26, "step": 20800 }, { "loss": 0.06, "grad_norm": 0.925244927406311, "learning_rate": 1.4671852211959166e-05, "epoch": 1.27, "step": 20900 }, { "loss": 0.0587, "grad_norm": 0.5892955660820007, "learning_rate": 1.4550315994166261e-05, "epoch": 1.27, "step": 21000 }, { "loss": 0.0602, "grad_norm": 0.7904210090637207, "learning_rate": 1.4428779776373361e-05, "epoch": 1.28, "step": 21100 }, { "loss": 0.0625, "grad_norm": 1.2804646492004395, "learning_rate": 1.430724355858046e-05, "epoch": 1.29, "step": 21200 }, { "loss": 0.0607, "grad_norm": 0.9952909350395203, "learning_rate": 1.4185707340787556e-05, "epoch": 1.29, "step": 21300 }, { "loss": 0.0602, "grad_norm": 0.9036094546318054, "learning_rate": 1.4064171122994655e-05, "epoch": 1.3, "step": 21400 }, { "loss": 0.0594, "grad_norm": 0.8128438591957092, "learning_rate": 1.3942634905201751e-05, "epoch": 1.3, "step": 21500 }, { "loss": 0.0593, "grad_norm": 0.786703884601593, "learning_rate": 1.382109868740885e-05, "epoch": 1.31, "step": 21600 }, { "loss": 0.0604, "grad_norm": 1.107258677482605, "learning_rate": 1.3699562469615946e-05, "epoch": 1.32, "step": 21700 }, { "loss": 0.0596, "grad_norm": 1.0990906953811646, "learning_rate": 1.3578026251823045e-05, "epoch": 1.32, "step": 21800 }, { "loss": 0.0611, "grad_norm": 0.7040949463844299, "learning_rate": 1.3456490034030143e-05, "epoch": 1.33, "step": 21900 }, { "loss": 0.0582, "grad_norm": 0.7568740248680115, "learning_rate": 1.333495381623724e-05, "epoch": 1.33, "step": 22000 }, { "loss": 0.0595, "grad_norm": 0.6342681646347046, "learning_rate": 1.3213417598444338e-05, "epoch": 1.34, "step": 22100 }, { "loss": 0.0597, "grad_norm": 0.7555422186851501, "learning_rate": 1.3091881380651435e-05, "epoch": 1.35, "step": 22200 }, { "loss": 0.0587, "grad_norm": 0.8620259165763855, "learning_rate": 1.2970345162858533e-05, "epoch": 1.35, "step": 22300 }, { "loss": 0.0586, "grad_norm": 1.4132779836654663, "learning_rate": 1.2848808945065632e-05, "epoch": 1.36, "step": 22400 }, { "loss": 0.0594, "grad_norm": 0.9352446794509888, "learning_rate": 1.2727272727272728e-05, "epoch": 1.37, "step": 22500 }, { "loss": 0.0581, "grad_norm": 0.8808399438858032, "learning_rate": 1.2605736509479827e-05, "epoch": 1.37, "step": 22600 }, { "loss": 0.0603, "grad_norm": 0.8254494071006775, "learning_rate": 1.2484200291686924e-05, "epoch": 1.38, "step": 22700 }, { "loss": 0.0589, "grad_norm": 0.9145941138267517, "learning_rate": 1.2362664073894022e-05, "epoch": 1.38, "step": 22800 }, { "loss": 0.0594, "grad_norm": 1.267179012298584, "learning_rate": 1.2241127856101119e-05, "epoch": 1.39, "step": 22900 }, { "loss": 0.0585, "grad_norm": 0.9012957215309143, "learning_rate": 1.2119591638308217e-05, "epoch": 1.4, "step": 23000 }, { "loss": 0.0581, "grad_norm": 1.053276777267456, "learning_rate": 1.1998055420515315e-05, "epoch": 1.4, "step": 23100 }, { "loss": 0.0579, "grad_norm": 1.031724214553833, "learning_rate": 1.1876519202722412e-05, "epoch": 1.41, "step": 23200 }, { "loss": 0.0574, "grad_norm": 0.8730105757713318, "learning_rate": 1.175498298492951e-05, "epoch": 1.41, "step": 23300 }, { "loss": 0.0589, "grad_norm": 0.871724545955658, "learning_rate": 1.1633446767136607e-05, "epoch": 1.42, "step": 23400 }, { "loss": 0.0585, "grad_norm": 0.9031744599342346, "learning_rate": 1.1511910549343706e-05, "epoch": 1.43, "step": 23500 }, { "loss": 0.0586, "grad_norm": 0.5891318917274475, "learning_rate": 1.1390374331550802e-05, "epoch": 1.43, "step": 23600 }, { "loss": 0.0584, "grad_norm": 0.7399836182594299, "learning_rate": 1.12688381137579e-05, "epoch": 1.44, "step": 23700 }, { "loss": 0.0596, "grad_norm": 0.47165361046791077, "learning_rate": 1.1147301895964999e-05, "epoch": 1.44, "step": 23800 }, { "loss": 0.0588, "grad_norm": 0.8805158734321594, "learning_rate": 1.1025765678172096e-05, "epoch": 1.45, "step": 23900 }, { "loss": 0.0587, "grad_norm": 0.6524300575256348, "learning_rate": 1.0904229460379194e-05, "epoch": 1.46, "step": 24000 }, { "loss": 0.0599, "grad_norm": 0.7314462661743164, "learning_rate": 1.078269324258629e-05, "epoch": 1.46, "step": 24100 }, { "loss": 0.0587, "grad_norm": 0.7969116568565369, "learning_rate": 1.0661157024793389e-05, "epoch": 1.47, "step": 24200 }, { "loss": 0.0574, "grad_norm": 0.6548510193824768, "learning_rate": 1.0539620807000488e-05, "epoch": 1.47, "step": 24300 }, { "loss": 0.0601, "grad_norm": 0.6944112181663513, "learning_rate": 1.0418084589207584e-05, "epoch": 1.48, "step": 24400 }, { "loss": 0.0595, "grad_norm": 1.0091618299484253, "learning_rate": 1.0296548371414683e-05, "epoch": 1.49, "step": 24500 }, { "loss": 0.0567, "grad_norm": 0.7692497372627258, "learning_rate": 1.017501215362178e-05, "epoch": 1.49, "step": 24600 }, { "loss": 0.0567, "grad_norm": 1.2263282537460327, "learning_rate": 1.0053475935828878e-05, "epoch": 1.5, "step": 24700 }, { "loss": 0.058, "grad_norm": 1.412335753440857, "learning_rate": 9.931939718035976e-06, "epoch": 1.5, "step": 24800 }, { "loss": 0.0584, "grad_norm": 0.9114163517951965, "learning_rate": 9.810403500243073e-06, "epoch": 1.51, "step": 24900 }, { "loss": 0.0579, "grad_norm": 0.8343012928962708, "learning_rate": 9.688867282450171e-06, "epoch": 1.52, "step": 25000 }, { "loss": 0.0581, "grad_norm": 0.7137165665626526, "learning_rate": 9.567331064657268e-06, "epoch": 1.52, "step": 25100 }, { "loss": 0.0572, "grad_norm": 0.8871126174926758, "learning_rate": 9.445794846864366e-06, "epoch": 1.53, "step": 25200 }, { "loss": 0.0588, "grad_norm": 1.9913699626922607, "learning_rate": 9.324258629071465e-06, "epoch": 1.54, "step": 25300 }, { "loss": 0.0586, "grad_norm": 0.702129065990448, "learning_rate": 9.202722411278561e-06, "epoch": 1.54, "step": 25400 }, { "loss": 0.0589, "grad_norm": 0.759503960609436, "learning_rate": 9.08118619348566e-06, "epoch": 1.55, "step": 25500 }, { "loss": 0.0598, "grad_norm": 0.7731884717941284, "learning_rate": 8.959649975692756e-06, "epoch": 1.55, "step": 25600 }, { "loss": 0.0574, "grad_norm": 0.830560028553009, "learning_rate": 8.838113757899855e-06, "epoch": 1.56, "step": 25700 }, { "loss": 0.0561, "grad_norm": 0.612714946269989, "learning_rate": 8.716577540106953e-06, "epoch": 1.57, "step": 25800 }, { "loss": 0.0583, "grad_norm": 0.6476453542709351, "learning_rate": 8.59504132231405e-06, "epoch": 1.57, "step": 25900 }, { "loss": 0.0567, "grad_norm": 0.6660561561584473, "learning_rate": 8.473505104521148e-06, "epoch": 1.58, "step": 26000 }, { "loss": 0.0575, "grad_norm": 0.6638226509094238, "learning_rate": 8.351968886728245e-06, "epoch": 1.58, "step": 26100 }, { "loss": 0.0567, "grad_norm": 0.6452857255935669, "learning_rate": 8.231648031113272e-06, "epoch": 1.59, "step": 26200 }, { "loss": 0.0567, "grad_norm": 0.819333016872406, "learning_rate": 8.11011181332037e-06, "epoch": 1.6, "step": 26300 }, { "loss": 0.0571, "grad_norm": 1.2114768028259277, "learning_rate": 7.988575595527467e-06, "epoch": 1.6, "step": 26400 }, { "loss": 0.0577, "grad_norm": 0.7581117153167725, "learning_rate": 7.867039377734566e-06, "epoch": 1.61, "step": 26500 }, { "loss": 0.0575, "grad_norm": 0.5861278772354126, "learning_rate": 7.745503159941663e-06, "epoch": 1.61, "step": 26600 }, { "loss": 0.0567, "grad_norm": 0.7154746055603027, "learning_rate": 7.623966942148761e-06, "epoch": 1.62, "step": 26700 }, { "loss": 0.0574, "grad_norm": 1.072407841682434, "learning_rate": 7.502430724355859e-06, "epoch": 1.63, "step": 26800 }, { "loss": 0.0572, "grad_norm": 0.8198044896125793, "learning_rate": 7.380894506562957e-06, "epoch": 1.63, "step": 26900 }, { "loss": 0.0562, "grad_norm": 0.7912253141403198, "learning_rate": 7.259358288770054e-06, "epoch": 1.64, "step": 27000 }, { "loss": 0.0567, "grad_norm": 0.9015645980834961, "learning_rate": 7.137822070977152e-06, "epoch": 1.64, "step": 27100 }, { "loss": 0.0551, "grad_norm": 0.6205886602401733, "learning_rate": 7.0162858531842495e-06, "epoch": 1.65, "step": 27200 }, { "loss": 0.0581, "grad_norm": 0.8834924697875977, "learning_rate": 6.894749635391347e-06, "epoch": 1.66, "step": 27300 }, { "loss": 0.0565, "grad_norm": 0.7698688507080078, "learning_rate": 6.773213417598445e-06, "epoch": 1.66, "step": 27400 }, { "loss": 0.0575, "grad_norm": 0.8447450399398804, "learning_rate": 6.651677199805543e-06, "epoch": 1.67, "step": 27500 }, { "loss": 0.057, "grad_norm": 1.6002224683761597, "learning_rate": 6.5301409820126404e-06, "epoch": 1.67, "step": 27600 }, { "loss": 0.0558, "grad_norm": 0.8625892996788025, "learning_rate": 6.408604764219738e-06, "epoch": 1.68, "step": 27700 }, { "loss": 0.0566, "grad_norm": 0.7483322024345398, "learning_rate": 6.2870685464268355e-06, "epoch": 1.69, "step": 27800 }, { "loss": 0.0571, "grad_norm": 0.781535804271698, "learning_rate": 6.165532328633933e-06, "epoch": 1.69, "step": 27900 }, { "loss": 0.0563, "grad_norm": 0.8761783838272095, "learning_rate": 6.0439961108410314e-06, "epoch": 1.7, "step": 28000 }, { "loss": 0.0565, "grad_norm": 0.5183244943618774, "learning_rate": 5.922459893048129e-06, "epoch": 1.7, "step": 28100 }, { "loss": 0.0564, "grad_norm": 0.7939796447753906, "learning_rate": 5.8009236752552265e-06, "epoch": 1.71, "step": 28200 }, { "loss": 0.0576, "grad_norm": 0.7260966300964355, "learning_rate": 5.679387457462324e-06, "epoch": 1.72, "step": 28300 }, { "loss": 0.0569, "grad_norm": 0.9087544083595276, "learning_rate": 5.557851239669422e-06, "epoch": 1.72, "step": 28400 }, { "loss": 0.056, "grad_norm": 0.7275218367576599, "learning_rate": 5.436315021876519e-06, "epoch": 1.73, "step": 28500 }, { "loss": 0.0563, "grad_norm": 0.5983753800392151, "learning_rate": 5.315994166261547e-06, "epoch": 1.74, "step": 28600 }, { "loss": 0.0564, "grad_norm": 0.912756085395813, "learning_rate": 5.194457948468644e-06, "epoch": 1.74, "step": 28700 }, { "loss": 0.0555, "grad_norm": 0.6085710525512695, "learning_rate": 5.072921730675742e-06, "epoch": 1.75, "step": 28800 }, { "loss": 0.0571, "grad_norm": 0.6775307655334473, "learning_rate": 4.95138551288284e-06, "epoch": 1.75, "step": 28900 }, { "loss": 0.0543, "grad_norm": 0.7438898682594299, "learning_rate": 4.829849295089938e-06, "epoch": 1.76, "step": 29000 }, { "loss": 0.0567, "grad_norm": 0.719668984413147, "learning_rate": 4.708313077297035e-06, "epoch": 1.77, "step": 29100 }, { "loss": 0.0565, "grad_norm": 0.8647979497909546, "learning_rate": 4.586776859504133e-06, "epoch": 1.77, "step": 29200 }, { "loss": 0.057, "grad_norm": 0.8238335847854614, "learning_rate": 4.46524064171123e-06, "epoch": 1.78, "step": 29300 }, { "loss": 0.0563, "grad_norm": 3.2504589557647705, "learning_rate": 4.343704423918328e-06, "epoch": 1.78, "step": 29400 }, { "loss": 0.0536, "grad_norm": 0.7106683850288391, "learning_rate": 4.222168206125426e-06, "epoch": 1.79, "step": 29500 }, { "loss": 0.056, "grad_norm": 0.9477577209472656, "learning_rate": 4.100631988332524e-06, "epoch": 1.8, "step": 29600 }, { "loss": 0.0562, "grad_norm": 0.8888897895812988, "learning_rate": 3.979095770539621e-06, "epoch": 1.8, "step": 29700 }, { "loss": 0.0562, "grad_norm": 0.7125309705734253, "learning_rate": 3.857559552746719e-06, "epoch": 1.81, "step": 29800 }, { "loss": 0.0552, "grad_norm": 0.7241693139076233, "learning_rate": 3.7360233349538167e-06, "epoch": 1.81, "step": 29900 }, { "loss": 0.0556, "grad_norm": 0.9381842613220215, "learning_rate": 3.6144871171609143e-06, "epoch": 1.82, "step": 30000 }, { "loss": 0.0551, "grad_norm": 0.6808192133903503, "learning_rate": 3.492950899368012e-06, "epoch": 1.83, "step": 30100 }, { "loss": 0.0561, "grad_norm": 0.6042631268501282, "learning_rate": 3.3714146815751098e-06, "epoch": 1.83, "step": 30200 }, { "loss": 0.0553, "grad_norm": 0.5585273504257202, "learning_rate": 3.2498784637822073e-06, "epoch": 1.84, "step": 30300 }, { "loss": 0.0545, "grad_norm": 0.9048868417739868, "learning_rate": 3.128342245989305e-06, "epoch": 1.84, "step": 30400 }, { "loss": 0.0557, "grad_norm": 0.8429957628250122, "learning_rate": 3.006806028196403e-06, "epoch": 1.85, "step": 30500 }, { "loss": 0.0563, "grad_norm": 0.7962875962257385, "learning_rate": 2.8852698104035003e-06, "epoch": 1.86, "step": 30600 }, { "loss": 0.0559, "grad_norm": 0.7854676246643066, "learning_rate": 2.763733592610598e-06, "epoch": 1.86, "step": 30700 }, { "loss": 0.0561, "grad_norm": 1.694869041442871, "learning_rate": 2.642197374817696e-06, "epoch": 1.87, "step": 30800 }, { "loss": 0.0568, "grad_norm": 0.6683087944984436, "learning_rate": 2.5206611570247934e-06, "epoch": 1.87, "step": 30900 }, { "loss": 0.0548, "grad_norm": 0.5675504803657532, "learning_rate": 2.3991249392318913e-06, "epoch": 1.88, "step": 31000 }, { "loss": 0.0552, "grad_norm": 0.9730797410011292, "learning_rate": 2.2775887214389893e-06, "epoch": 1.89, "step": 31100 }, { "loss": 0.0568, "grad_norm": 0.8015105128288269, "learning_rate": 2.156052503646087e-06, "epoch": 1.89, "step": 31200 }, { "loss": 0.0552, "grad_norm": 0.5437925457954407, "learning_rate": 2.0345162858531844e-06, "epoch": 1.9, "step": 31300 }, { "loss": 0.0558, "grad_norm": 0.8105918765068054, "learning_rate": 1.9129800680602823e-06, "epoch": 1.91, "step": 31400 }, { "loss": 0.0567, "grad_norm": 0.8699814677238464, "learning_rate": 1.7914438502673799e-06, "epoch": 1.91, "step": 31500 }, { "loss": 0.0556, "grad_norm": 0.542261004447937, "learning_rate": 1.6699076324744776e-06, "epoch": 1.92, "step": 31600 }, { "loss": 0.0553, "grad_norm": 0.6852170825004578, "learning_rate": 1.5483714146815754e-06, "epoch": 1.92, "step": 31700 }, { "loss": 0.0559, "grad_norm": 0.8324136137962341, "learning_rate": 1.426835196888673e-06, "epoch": 1.93, "step": 31800 }, { "loss": 0.0539, "grad_norm": 0.5395376086235046, "learning_rate": 1.3052989790957707e-06, "epoch": 1.94, "step": 31900 }, { "loss": 0.0557, "grad_norm": 1.0665556192398071, "learning_rate": 1.1837627613028684e-06, "epoch": 1.94, "step": 32000 }, { "loss": 0.0556, "grad_norm": 0.5730076432228088, "learning_rate": 1.062226543509966e-06, "epoch": 1.95, "step": 32100 }, { "loss": 0.0566, "grad_norm": 0.8526155352592468, "learning_rate": 9.406903257170638e-07, "epoch": 1.95, "step": 32200 }, { "loss": 0.0554, "grad_norm": 0.47227638959884644, "learning_rate": 8.191541079241614e-07, "epoch": 1.96, "step": 32300 }, { "loss": 0.0559, "grad_norm": 0.5771980881690979, "learning_rate": 6.976178901312592e-07, "epoch": 1.97, "step": 32400 }, { "loss": 0.0553, "grad_norm": 0.7183811068534851, "learning_rate": 5.772970345162859e-07, "epoch": 1.97, "step": 32500 }, { "loss": 0.0556, "grad_norm": 0.7808952927589417, "learning_rate": 4.557608167233836e-07, "epoch": 1.98, "step": 32600 }, { "loss": 0.0549, "grad_norm": 0.7201197743415833, "learning_rate": 3.3422459893048135e-07, "epoch": 1.98, "step": 32700 }, { "loss": 0.0546, "grad_norm": 0.822515606880188, "learning_rate": 2.1268838113757902e-07, "epoch": 1.99, "step": 32800 }, { "loss": 0.0556, "grad_norm": 0.6968460083007812, "learning_rate": 9.115216334467672e-08, "epoch": 2.0, "step": 32900 }, { "eval_loss": 0.06514331698417664, "eval_f1": 0.9055283859012663, "eval_precision": 0.9128121708644065, "eval_recall": 0.898553824781504, "eval_accuracy": 0.9750088848296079, "eval_runtime": 304.326, "eval_samples_per_second": 86.841, "eval_steps_per_second": 10.857, "epoch": 2.0, "step": 32962 }, { "train_runtime": 12949.9436, "train_samples_per_second": 20.363, "train_steps_per_second": 2.545, "total_flos": 2.448996403000443e+17, "train_loss": 0.07225221031304233, "epoch": 2.0, "step": 32962 } ] } } }