{ "best_metric": 0.01777876727283001, "best_model_checkpoint": "autotrain-vp92t-1q2id/checkpoint-20391", "epoch": 2.9998896612600685, "eval_steps": 500, "global_step": 20391, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003677957997719666, "grad_norm": 4.279318809509277, "learning_rate": 3.6764705882352943e-07, "loss": 0.685, "step": 25 }, { "epoch": 0.007355915995439332, "grad_norm": 5.647529602050781, "learning_rate": 7.352941176470589e-07, "loss": 0.6757, "step": 50 }, { "epoch": 0.011033873993158997, "grad_norm": 5.691235065460205, "learning_rate": 1.1029411764705884e-06, "loss": 0.6611, "step": 75 }, { "epoch": 0.014711831990878664, "grad_norm": 3.6039223670959473, "learning_rate": 1.4705882352941177e-06, "loss": 0.6402, "step": 100 }, { "epoch": 0.018389789988598332, "grad_norm": 5.447757244110107, "learning_rate": 1.8382352941176471e-06, "loss": 0.5745, "step": 125 }, { "epoch": 0.022067747986317995, "grad_norm": 11.862848281860352, "learning_rate": 2.2058823529411767e-06, "loss": 0.4869, "step": 150 }, { "epoch": 0.02574570598403766, "grad_norm": 8.085945129394531, "learning_rate": 2.573529411764706e-06, "loss": 0.3687, "step": 175 }, { "epoch": 0.029423663981757327, "grad_norm": 7.246406555175781, "learning_rate": 2.9411764705882355e-06, "loss": 0.3029, "step": 200 }, { "epoch": 0.033101621979477, "grad_norm": 17.822601318359375, "learning_rate": 3.308823529411765e-06, "loss": 0.2098, "step": 225 }, { "epoch": 0.036779579977196664, "grad_norm": 21.39044952392578, "learning_rate": 3.6764705882352942e-06, "loss": 0.1483, "step": 250 }, { "epoch": 0.04045753797491632, "grad_norm": 1.4089692831039429, "learning_rate": 4.044117647058823e-06, "loss": 0.0803, "step": 275 }, { "epoch": 0.04413549597263599, "grad_norm": 0.7371423840522766, "learning_rate": 4.411764705882353e-06, "loss": 0.0396, "step": 300 }, { "epoch": 0.047813453970355656, "grad_norm": 0.22398647665977478, "learning_rate": 4.779411764705882e-06, "loss": 0.0381, "step": 325 }, { "epoch": 0.05149141196807532, "grad_norm": 0.25332173705101013, "learning_rate": 5.147058823529412e-06, "loss": 0.0306, "step": 350 }, { "epoch": 0.05516936996579499, "grad_norm": 0.24578100442886353, "learning_rate": 5.5147058823529415e-06, "loss": 0.0342, "step": 375 }, { "epoch": 0.058847327963514655, "grad_norm": 0.09213005006313324, "learning_rate": 5.882352941176471e-06, "loss": 0.0094, "step": 400 }, { "epoch": 0.06252528596123433, "grad_norm": 11.745342254638672, "learning_rate": 6.25e-06, "loss": 0.0627, "step": 425 }, { "epoch": 0.066203243958954, "grad_norm": 12.147088050842285, "learning_rate": 6.61764705882353e-06, "loss": 0.0755, "step": 450 }, { "epoch": 0.06988120195667366, "grad_norm": 0.14864382147789001, "learning_rate": 6.985294117647059e-06, "loss": 0.0532, "step": 475 }, { "epoch": 0.07355915995439333, "grad_norm": 0.06755024939775467, "learning_rate": 7.3529411764705884e-06, "loss": 0.049, "step": 500 }, { "epoch": 0.077237117952113, "grad_norm": 0.0582403726875782, "learning_rate": 7.720588235294117e-06, "loss": 0.0213, "step": 525 }, { "epoch": 0.08091507594983265, "grad_norm": 0.04456046596169472, "learning_rate": 8.088235294117646e-06, "loss": 0.0446, "step": 550 }, { "epoch": 0.08459303394755231, "grad_norm": 0.058339089155197144, "learning_rate": 8.455882352941177e-06, "loss": 0.0442, "step": 575 }, { "epoch": 0.08827099194527198, "grad_norm": 0.03951073810458183, "learning_rate": 8.823529411764707e-06, "loss": 0.0131, "step": 600 }, { "epoch": 0.09194894994299165, "grad_norm": 0.08159155398607254, "learning_rate": 9.191176470588236e-06, "loss": 0.0429, "step": 625 }, { "epoch": 0.09562690794071131, "grad_norm": 0.0362938717007637, "learning_rate": 9.558823529411764e-06, "loss": 0.0336, "step": 650 }, { "epoch": 0.09930486593843098, "grad_norm": 0.10369551926851273, "learning_rate": 9.926470588235293e-06, "loss": 0.0277, "step": 675 }, { "epoch": 0.10298282393615064, "grad_norm": 0.032908402383327484, "learning_rate": 1.0294117647058824e-05, "loss": 0.0213, "step": 700 }, { "epoch": 0.10666078193387031, "grad_norm": 0.017092719674110413, "learning_rate": 1.0661764705882354e-05, "loss": 0.0784, "step": 725 }, { "epoch": 0.11033873993158998, "grad_norm": 0.015081583522260189, "learning_rate": 1.1029411764705883e-05, "loss": 0.0265, "step": 750 }, { "epoch": 0.11401669792930964, "grad_norm": 0.09147176891565323, "learning_rate": 1.139705882352941e-05, "loss": 0.0381, "step": 775 }, { "epoch": 0.11769465592702931, "grad_norm": 0.08481771498918533, "learning_rate": 1.1764705882352942e-05, "loss": 0.128, "step": 800 }, { "epoch": 0.12137261392474898, "grad_norm": 0.014765892177820206, "learning_rate": 1.2132352941176471e-05, "loss": 0.0141, "step": 825 }, { "epoch": 0.12505057192246866, "grad_norm": 0.018918083980679512, "learning_rate": 1.25e-05, "loss": 0.0538, "step": 850 }, { "epoch": 0.12872852992018832, "grad_norm": 0.015013976022601128, "learning_rate": 1.2867647058823528e-05, "loss": 0.0019, "step": 875 }, { "epoch": 0.132406487917908, "grad_norm": 11.109874725341797, "learning_rate": 1.323529411764706e-05, "loss": 0.0394, "step": 900 }, { "epoch": 0.13608444591562766, "grad_norm": 0.015857884660363197, "learning_rate": 1.3602941176470589e-05, "loss": 0.0005, "step": 925 }, { "epoch": 0.13976240391334732, "grad_norm": 0.015862109139561653, "learning_rate": 1.3970588235294118e-05, "loss": 0.0214, "step": 950 }, { "epoch": 0.143440361911067, "grad_norm": 0.020424969494342804, "learning_rate": 1.4338235294117647e-05, "loss": 0.0367, "step": 975 }, { "epoch": 0.14711831990878665, "grad_norm": 0.027131319046020508, "learning_rate": 1.4705882352941177e-05, "loss": 0.08, "step": 1000 }, { "epoch": 0.15079627790650632, "grad_norm": 0.03147580847144127, "learning_rate": 1.5073529411764706e-05, "loss": 0.0329, "step": 1025 }, { "epoch": 0.154474235904226, "grad_norm": 24.15705680847168, "learning_rate": 1.5441176470588234e-05, "loss": 0.0064, "step": 1050 }, { "epoch": 0.15815219390194565, "grad_norm": 0.011522412300109863, "learning_rate": 1.5808823529411767e-05, "loss": 0.0762, "step": 1075 }, { "epoch": 0.1618301518996653, "grad_norm": 0.04401927441358566, "learning_rate": 1.6176470588235293e-05, "loss": 0.0656, "step": 1100 }, { "epoch": 0.16550810989738496, "grad_norm": 0.034190475940704346, "learning_rate": 1.6544117647058825e-05, "loss": 0.0308, "step": 1125 }, { "epoch": 0.16918606789510462, "grad_norm": 0.021350180730223656, "learning_rate": 1.6911764705882355e-05, "loss": 0.0539, "step": 1150 }, { "epoch": 0.1728640258928243, "grad_norm": 0.0446242094039917, "learning_rate": 1.727941176470588e-05, "loss": 0.0681, "step": 1175 }, { "epoch": 0.17654198389054396, "grad_norm": 1.6311242580413818, "learning_rate": 1.7647058823529414e-05, "loss": 0.0293, "step": 1200 }, { "epoch": 0.18021994188826362, "grad_norm": 0.00914335809648037, "learning_rate": 1.801470588235294e-05, "loss": 0.0386, "step": 1225 }, { "epoch": 0.1838978998859833, "grad_norm": 0.009417989291250706, "learning_rate": 1.8382352941176472e-05, "loss": 0.0004, "step": 1250 }, { "epoch": 0.18757585788370296, "grad_norm": 0.9801831245422363, "learning_rate": 1.8750000000000002e-05, "loss": 0.057, "step": 1275 }, { "epoch": 0.19125381588142262, "grad_norm": 0.005170044023543596, "learning_rate": 1.9117647058823528e-05, "loss": 0.0002, "step": 1300 }, { "epoch": 0.1949317738791423, "grad_norm": 0.03409629687666893, "learning_rate": 1.948529411764706e-05, "loss": 0.0394, "step": 1325 }, { "epoch": 0.19860973187686196, "grad_norm": 0.008645136840641499, "learning_rate": 1.9852941176470586e-05, "loss": 0.0053, "step": 1350 }, { "epoch": 0.20228768987458162, "grad_norm": 0.004454991314560175, "learning_rate": 2.022058823529412e-05, "loss": 0.0002, "step": 1375 }, { "epoch": 0.2059656478723013, "grad_norm": 0.00453265942633152, "learning_rate": 2.058823529411765e-05, "loss": 0.0216, "step": 1400 }, { "epoch": 0.20964360587002095, "grad_norm": 0.005607594270259142, "learning_rate": 2.0955882352941175e-05, "loss": 0.0206, "step": 1425 }, { "epoch": 0.21332156386774062, "grad_norm": 0.002988673048093915, "learning_rate": 2.1323529411764707e-05, "loss": 0.0002, "step": 1450 }, { "epoch": 0.2169995218654603, "grad_norm": 0.15066058933734894, "learning_rate": 2.1691176470588237e-05, "loss": 0.0002, "step": 1475 }, { "epoch": 0.22067747986317995, "grad_norm": 0.2750360071659088, "learning_rate": 2.2058823529411766e-05, "loss": 0.0002, "step": 1500 }, { "epoch": 0.22435543786089962, "grad_norm": 0.00299286050722003, "learning_rate": 2.2426470588235296e-05, "loss": 0.0004, "step": 1525 }, { "epoch": 0.2280333958586193, "grad_norm": 0.004124614410102367, "learning_rate": 2.279411764705882e-05, "loss": 0.0001, "step": 1550 }, { "epoch": 0.23171135385633895, "grad_norm": 0.5924888849258423, "learning_rate": 2.3161764705882354e-05, "loss": 0.1041, "step": 1575 }, { "epoch": 0.23538931185405862, "grad_norm": 0.011225424706935883, "learning_rate": 2.3529411764705884e-05, "loss": 0.0528, "step": 1600 }, { "epoch": 0.23906726985177829, "grad_norm": 0.008940880186855793, "learning_rate": 2.389705882352941e-05, "loss": 0.0972, "step": 1625 }, { "epoch": 0.24274522784949795, "grad_norm": 0.008405734784901142, "learning_rate": 2.4264705882352942e-05, "loss": 0.0495, "step": 1650 }, { "epoch": 0.24642318584721762, "grad_norm": 0.008656616322696209, "learning_rate": 2.4632352941176472e-05, "loss": 0.0216, "step": 1675 }, { "epoch": 0.2501011438449373, "grad_norm": 0.02361353114247322, "learning_rate": 2.5e-05, "loss": 0.0385, "step": 1700 }, { "epoch": 0.25377910184265695, "grad_norm": 0.05051364749670029, "learning_rate": 2.536764705882353e-05, "loss": 0.0814, "step": 1725 }, { "epoch": 0.25745705984037665, "grad_norm": 0.00914950855076313, "learning_rate": 2.5735294117647057e-05, "loss": 0.0006, "step": 1750 }, { "epoch": 0.2611350178380963, "grad_norm": 0.008635000325739384, "learning_rate": 2.610294117647059e-05, "loss": 0.0003, "step": 1775 }, { "epoch": 0.264812975835816, "grad_norm": 0.005725966300815344, "learning_rate": 2.647058823529412e-05, "loss": 0.0002, "step": 1800 }, { "epoch": 0.2684909338335356, "grad_norm": 0.014133188873529434, "learning_rate": 2.6838235294117648e-05, "loss": 0.0562, "step": 1825 }, { "epoch": 0.2721688918312553, "grad_norm": 0.0024135392159223557, "learning_rate": 2.7205882352941177e-05, "loss": 0.003, "step": 1850 }, { "epoch": 0.27584684982897495, "grad_norm": 0.03300013393163681, "learning_rate": 2.7573529411764707e-05, "loss": 0.0947, "step": 1875 }, { "epoch": 0.27952480782669464, "grad_norm": 0.046893417835235596, "learning_rate": 2.7941176470588236e-05, "loss": 0.0613, "step": 1900 }, { "epoch": 0.2832027658244143, "grad_norm": 0.009986027143895626, "learning_rate": 2.8308823529411766e-05, "loss": 0.0616, "step": 1925 }, { "epoch": 0.286880723822134, "grad_norm": 0.05905308201909065, "learning_rate": 2.8676470588235295e-05, "loss": 0.1182, "step": 1950 }, { "epoch": 0.2905586818198536, "grad_norm": 0.00858025811612606, "learning_rate": 2.9044117647058824e-05, "loss": 0.0007, "step": 1975 }, { "epoch": 0.2942366398175733, "grad_norm": 95.36400604248047, "learning_rate": 2.9411764705882354e-05, "loss": 0.0869, "step": 2000 }, { "epoch": 0.29791459781529295, "grad_norm": 0.005693793762475252, "learning_rate": 2.9779411764705883e-05, "loss": 0.0348, "step": 2025 }, { "epoch": 0.30159255581301264, "grad_norm": 0.009921176359057426, "learning_rate": 2.998365211705084e-05, "loss": 0.0514, "step": 2050 }, { "epoch": 0.3052705138107323, "grad_norm": 0.01893715187907219, "learning_rate": 2.9942782409677946e-05, "loss": 0.0575, "step": 2075 }, { "epoch": 0.308948471808452, "grad_norm": 0.012356853112578392, "learning_rate": 2.990191270230505e-05, "loss": 0.0085, "step": 2100 }, { "epoch": 0.3126264298061716, "grad_norm": 0.027572082355618477, "learning_rate": 2.9861042994932156e-05, "loss": 0.0071, "step": 2125 }, { "epoch": 0.3163043878038913, "grad_norm": 0.0033696063328534365, "learning_rate": 2.9820173287559262e-05, "loss": 0.0153, "step": 2150 }, { "epoch": 0.31998234580161095, "grad_norm": 0.0032816240563988686, "learning_rate": 2.9779303580186367e-05, "loss": 0.0425, "step": 2175 }, { "epoch": 0.3236603037993306, "grad_norm": 0.013268062844872475, "learning_rate": 2.9738433872813472e-05, "loss": 0.0529, "step": 2200 }, { "epoch": 0.3273382617970503, "grad_norm": 13.58910083770752, "learning_rate": 2.9697564165440574e-05, "loss": 0.1147, "step": 2225 }, { "epoch": 0.3310162197947699, "grad_norm": 80.59501647949219, "learning_rate": 2.965669445806768e-05, "loss": 0.0043, "step": 2250 }, { "epoch": 0.3346941777924896, "grad_norm": 81.63990020751953, "learning_rate": 2.9615824750694785e-05, "loss": 0.0355, "step": 2275 }, { "epoch": 0.33837213579020925, "grad_norm": 0.014708608388900757, "learning_rate": 2.957495504332189e-05, "loss": 0.0668, "step": 2300 }, { "epoch": 0.34205009378792894, "grad_norm": 0.015443817712366581, "learning_rate": 2.9534085335948996e-05, "loss": 0.0006, "step": 2325 }, { "epoch": 0.3457280517856486, "grad_norm": 0.005841911304742098, "learning_rate": 2.94932156285761e-05, "loss": 0.0487, "step": 2350 }, { "epoch": 0.3494060097833683, "grad_norm": 0.06746743619441986, "learning_rate": 2.9452345921203207e-05, "loss": 0.0229, "step": 2375 }, { "epoch": 0.3530839677810879, "grad_norm": 0.002315863035619259, "learning_rate": 2.941147621383031e-05, "loss": 0.0003, "step": 2400 }, { "epoch": 0.3567619257788076, "grad_norm": 0.0017154604429379106, "learning_rate": 2.9370606506457414e-05, "loss": 0.0308, "step": 2425 }, { "epoch": 0.36043988377652725, "grad_norm": 0.009691163897514343, "learning_rate": 2.932973679908452e-05, "loss": 0.0256, "step": 2450 }, { "epoch": 0.36411784177424694, "grad_norm": 0.011957678943872452, "learning_rate": 2.9288867091711625e-05, "loss": 0.0713, "step": 2475 }, { "epoch": 0.3677957997719666, "grad_norm": 0.02373524010181427, "learning_rate": 2.924799738433873e-05, "loss": 0.0504, "step": 2500 }, { "epoch": 0.3714737577696863, "grad_norm": 0.0031693673226982355, "learning_rate": 2.9207127676965836e-05, "loss": 0.0251, "step": 2525 }, { "epoch": 0.3751517157674059, "grad_norm": 0.025995498523116112, "learning_rate": 2.916625796959294e-05, "loss": 0.0459, "step": 2550 }, { "epoch": 0.3788296737651256, "grad_norm": 0.006027763709425926, "learning_rate": 2.9125388262220043e-05, "loss": 0.0013, "step": 2575 }, { "epoch": 0.38250763176284525, "grad_norm": 0.014026220887899399, "learning_rate": 2.9084518554847148e-05, "loss": 0.0831, "step": 2600 }, { "epoch": 0.38618558976056494, "grad_norm": 0.025293108075857162, "learning_rate": 2.9043648847474254e-05, "loss": 0.0275, "step": 2625 }, { "epoch": 0.3898635477582846, "grad_norm": 0.0041050901636481285, "learning_rate": 2.900277914010136e-05, "loss": 0.0007, "step": 2650 }, { "epoch": 0.3935415057560043, "grad_norm": 0.027650628238916397, "learning_rate": 2.8961909432728464e-05, "loss": 0.0002, "step": 2675 }, { "epoch": 0.3972194637537239, "grad_norm": 0.06839253753423691, "learning_rate": 2.8921039725355566e-05, "loss": 0.0353, "step": 2700 }, { "epoch": 0.4008974217514436, "grad_norm": 0.005996390245854855, "learning_rate": 2.8880170017982672e-05, "loss": 0.0134, "step": 2725 }, { "epoch": 0.40457537974916324, "grad_norm": 0.0020017025526612997, "learning_rate": 2.8839300310609774e-05, "loss": 0.0002, "step": 2750 }, { "epoch": 0.40825333774688294, "grad_norm": 0.04533281922340393, "learning_rate": 2.879843060323688e-05, "loss": 0.0367, "step": 2775 }, { "epoch": 0.4119312957446026, "grad_norm": 0.005575124174356461, "learning_rate": 2.8757560895863984e-05, "loss": 0.064, "step": 2800 }, { "epoch": 0.41560925374232227, "grad_norm": 0.0019691460765898228, "learning_rate": 2.871669118849109e-05, "loss": 0.0005, "step": 2825 }, { "epoch": 0.4192872117400419, "grad_norm": 0.004097863100469112, "learning_rate": 2.8675821481118195e-05, "loss": 0.0557, "step": 2850 }, { "epoch": 0.4229651697377616, "grad_norm": 0.0018194678705185652, "learning_rate": 2.86349517737453e-05, "loss": 0.0009, "step": 2875 }, { "epoch": 0.42664312773548124, "grad_norm": 0.006679282058030367, "learning_rate": 2.8594082066372406e-05, "loss": 0.0882, "step": 2900 }, { "epoch": 0.43032108573320094, "grad_norm": 0.0030163535848259926, "learning_rate": 2.8553212358999508e-05, "loss": 0.0005, "step": 2925 }, { "epoch": 0.4339990437309206, "grad_norm": 0.2035808265209198, "learning_rate": 2.8512342651626613e-05, "loss": 0.0007, "step": 2950 }, { "epoch": 0.43767700172864027, "grad_norm": 0.022791976109147072, "learning_rate": 2.847147294425372e-05, "loss": 0.0941, "step": 2975 }, { "epoch": 0.4413549597263599, "grad_norm": 0.007299873046576977, "learning_rate": 2.8430603236880824e-05, "loss": 0.0511, "step": 3000 }, { "epoch": 0.4450329177240796, "grad_norm": 0.003951882012188435, "learning_rate": 2.838973352950793e-05, "loss": 0.0096, "step": 3025 }, { "epoch": 0.44871087572179924, "grad_norm": 0.009184204041957855, "learning_rate": 2.8348863822135035e-05, "loss": 0.0372, "step": 3050 }, { "epoch": 0.45238883371951893, "grad_norm": 0.032343972474336624, "learning_rate": 2.830799411476214e-05, "loss": 0.0197, "step": 3075 }, { "epoch": 0.4560667917172386, "grad_norm": 0.002111822599545121, "learning_rate": 2.8267124407389246e-05, "loss": 0.0004, "step": 3100 }, { "epoch": 0.45974474971495827, "grad_norm": 0.0024695848114788532, "learning_rate": 2.8226254700016348e-05, "loss": 0.0757, "step": 3125 }, { "epoch": 0.4634227077126779, "grad_norm": 0.001880451338365674, "learning_rate": 2.8185384992643453e-05, "loss": 0.0181, "step": 3150 }, { "epoch": 0.4671006657103976, "grad_norm": 0.02519827149808407, "learning_rate": 2.814451528527056e-05, "loss": 0.0006, "step": 3175 }, { "epoch": 0.47077862370811724, "grad_norm": 0.0016058700857684016, "learning_rate": 2.8103645577897664e-05, "loss": 0.0088, "step": 3200 }, { "epoch": 0.47445658170583693, "grad_norm": 0.02888200432062149, "learning_rate": 2.806277587052477e-05, "loss": 0.0496, "step": 3225 }, { "epoch": 0.47813453970355657, "grad_norm": 0.0747719258069992, "learning_rate": 2.8021906163151874e-05, "loss": 0.1338, "step": 3250 }, { "epoch": 0.48181249770127627, "grad_norm": 4.114503860473633, "learning_rate": 2.798103645577898e-05, "loss": 0.063, "step": 3275 }, { "epoch": 0.4854904556989959, "grad_norm": 0.10994021594524384, "learning_rate": 2.7940166748406082e-05, "loss": 0.0525, "step": 3300 }, { "epoch": 0.4891684136967156, "grad_norm": 0.016218269243836403, "learning_rate": 2.7899297041033187e-05, "loss": 0.0547, "step": 3325 }, { "epoch": 0.49284637169443524, "grad_norm": 0.08570988476276398, "learning_rate": 2.7858427333660293e-05, "loss": 0.0551, "step": 3350 }, { "epoch": 0.49652432969215493, "grad_norm": 0.013475511223077774, "learning_rate": 2.7817557626287398e-05, "loss": 0.0445, "step": 3375 }, { "epoch": 0.5002022876898746, "grad_norm": 0.006590835750102997, "learning_rate": 2.7776687918914503e-05, "loss": 0.0005, "step": 3400 }, { "epoch": 0.5038802456875943, "grad_norm": 0.004501729272305965, "learning_rate": 2.773581821154161e-05, "loss": 0.0219, "step": 3425 }, { "epoch": 0.5075582036853139, "grad_norm": 0.06999096274375916, "learning_rate": 2.769494850416871e-05, "loss": 0.0569, "step": 3450 }, { "epoch": 0.5112361616830335, "grad_norm": 0.003883685451000929, "learning_rate": 2.7654078796795813e-05, "loss": 0.001, "step": 3475 }, { "epoch": 0.5149141196807533, "grad_norm": 0.0029312793631106615, "learning_rate": 2.7613209089422918e-05, "loss": 0.0229, "step": 3500 }, { "epoch": 0.5185920776784729, "grad_norm": 0.006315870210528374, "learning_rate": 2.7572339382050023e-05, "loss": 0.0731, "step": 3525 }, { "epoch": 0.5222700356761926, "grad_norm": 0.0030722382944077253, "learning_rate": 2.753146967467713e-05, "loss": 0.0392, "step": 3550 }, { "epoch": 0.5259479936739122, "grad_norm": 0.005796592216938734, "learning_rate": 2.7490599967304234e-05, "loss": 0.0139, "step": 3575 }, { "epoch": 0.529625951671632, "grad_norm": 0.6967291831970215, "learning_rate": 2.744973025993134e-05, "loss": 0.0325, "step": 3600 }, { "epoch": 0.5333039096693516, "grad_norm": 0.017705194652080536, "learning_rate": 2.7408860552558445e-05, "loss": 0.0433, "step": 3625 }, { "epoch": 0.5369818676670712, "grad_norm": 0.020230021327733994, "learning_rate": 2.7367990845185547e-05, "loss": 0.0007, "step": 3650 }, { "epoch": 0.5406598256647909, "grad_norm": 0.0023030710872262716, "learning_rate": 2.7327121137812652e-05, "loss": 0.0002, "step": 3675 }, { "epoch": 0.5443377836625106, "grad_norm": 0.0020703673362731934, "learning_rate": 2.7286251430439758e-05, "loss": 0.0002, "step": 3700 }, { "epoch": 0.5480157416602303, "grad_norm": 0.002691243775188923, "learning_rate": 2.7245381723066863e-05, "loss": 0.0005, "step": 3725 }, { "epoch": 0.5516936996579499, "grad_norm": 0.002691768342629075, "learning_rate": 2.720451201569397e-05, "loss": 0.0957, "step": 3750 }, { "epoch": 0.5553716576556695, "grad_norm": 0.05962933972477913, "learning_rate": 2.7163642308321074e-05, "loss": 0.1317, "step": 3775 }, { "epoch": 0.5590496156533893, "grad_norm": 0.06475093215703964, "learning_rate": 2.712277260094818e-05, "loss": 0.0257, "step": 3800 }, { "epoch": 0.5627275736511089, "grad_norm": 0.0121241370216012, "learning_rate": 2.708190289357528e-05, "loss": 0.0609, "step": 3825 }, { "epoch": 0.5664055316488286, "grad_norm": 0.007753327488899231, "learning_rate": 2.7041033186202387e-05, "loss": 0.0008, "step": 3850 }, { "epoch": 0.5700834896465482, "grad_norm": 0.005270315799862146, "learning_rate": 2.7000163478829492e-05, "loss": 0.0002, "step": 3875 }, { "epoch": 0.573761447644268, "grad_norm": 0.004358434583991766, "learning_rate": 2.6959293771456597e-05, "loss": 0.0174, "step": 3900 }, { "epoch": 0.5774394056419876, "grad_norm": 0.003769191913306713, "learning_rate": 2.6918424064083703e-05, "loss": 0.0513, "step": 3925 }, { "epoch": 0.5811173636397072, "grad_norm": 0.0043784258887171745, "learning_rate": 2.6877554356710808e-05, "loss": 0.0185, "step": 3950 }, { "epoch": 0.5847953216374269, "grad_norm": 0.004602793138474226, "learning_rate": 2.6836684649337913e-05, "loss": 0.0236, "step": 3975 }, { "epoch": 0.5884732796351466, "grad_norm": 0.002638947917148471, "learning_rate": 2.679581494196502e-05, "loss": 0.0155, "step": 4000 }, { "epoch": 0.5921512376328663, "grad_norm": 0.002830574056133628, "learning_rate": 2.675494523459212e-05, "loss": 0.0161, "step": 4025 }, { "epoch": 0.5958291956305859, "grad_norm": 0.015412558801472187, "learning_rate": 2.6714075527219226e-05, "loss": 0.131, "step": 4050 }, { "epoch": 0.5995071536283055, "grad_norm": 0.016349300742149353, "learning_rate": 2.667320581984633e-05, "loss": 0.0132, "step": 4075 }, { "epoch": 0.6031851116260253, "grad_norm": 0.013236075639724731, "learning_rate": 2.6632336112473437e-05, "loss": 0.0005, "step": 4100 }, { "epoch": 0.6068630696237449, "grad_norm": 0.007088659331202507, "learning_rate": 2.6591466405100542e-05, "loss": 0.0004, "step": 4125 }, { "epoch": 0.6105410276214646, "grad_norm": 0.02121301181614399, "learning_rate": 2.6550596697727648e-05, "loss": 0.0397, "step": 4150 }, { "epoch": 0.6142189856191842, "grad_norm": 0.030070917680859566, "learning_rate": 2.650972699035475e-05, "loss": 0.0754, "step": 4175 }, { "epoch": 0.617896943616904, "grad_norm": 10.173595428466797, "learning_rate": 2.646885728298185e-05, "loss": 0.0336, "step": 4200 }, { "epoch": 0.6215749016146236, "grad_norm": 0.014447388239204884, "learning_rate": 2.6427987575608957e-05, "loss": 0.003, "step": 4225 }, { "epoch": 0.6252528596123432, "grad_norm": 0.012096612714231014, "learning_rate": 2.6387117868236062e-05, "loss": 0.0481, "step": 4250 }, { "epoch": 0.6289308176100629, "grad_norm": 0.02047719806432724, "learning_rate": 2.6346248160863168e-05, "loss": 0.051, "step": 4275 }, { "epoch": 0.6326087756077826, "grad_norm": 0.01152089238166809, "learning_rate": 2.6305378453490273e-05, "loss": 0.011, "step": 4300 }, { "epoch": 0.6362867336055023, "grad_norm": 0.01178329810500145, "learning_rate": 2.626450874611738e-05, "loss": 0.0187, "step": 4325 }, { "epoch": 0.6399646916032219, "grad_norm": 0.012962247245013714, "learning_rate": 2.6223639038744484e-05, "loss": 0.041, "step": 4350 }, { "epoch": 0.6436426496009415, "grad_norm": 0.012993029318749905, "learning_rate": 2.6182769331371586e-05, "loss": 0.03, "step": 4375 }, { "epoch": 0.6473206075986612, "grad_norm": 0.01311455201357603, "learning_rate": 2.614189962399869e-05, "loss": 0.0421, "step": 4400 }, { "epoch": 0.6509985655963809, "grad_norm": 0.022407829761505127, "learning_rate": 2.6101029916625797e-05, "loss": 0.0312, "step": 4425 }, { "epoch": 0.6546765235941006, "grad_norm": 0.007614122703671455, "learning_rate": 2.6060160209252902e-05, "loss": 0.0014, "step": 4450 }, { "epoch": 0.6583544815918202, "grad_norm": 0.006891134660691023, "learning_rate": 2.6019290501880007e-05, "loss": 0.0966, "step": 4475 }, { "epoch": 0.6620324395895398, "grad_norm": 0.026897389441728592, "learning_rate": 2.5978420794507113e-05, "loss": 0.0387, "step": 4500 }, { "epoch": 0.6657103975872596, "grad_norm": 0.013364088721573353, "learning_rate": 2.5937551087134218e-05, "loss": 0.0007, "step": 4525 }, { "epoch": 0.6693883555849792, "grad_norm": 0.006984102539718151, "learning_rate": 2.589668137976132e-05, "loss": 0.0008, "step": 4550 }, { "epoch": 0.6730663135826989, "grad_norm": 0.005882107652723789, "learning_rate": 2.5855811672388425e-05, "loss": 0.0003, "step": 4575 }, { "epoch": 0.6767442715804185, "grad_norm": 0.008882598020136356, "learning_rate": 2.581494196501553e-05, "loss": 0.0389, "step": 4600 }, { "epoch": 0.6804222295781382, "grad_norm": 0.01086785364896059, "learning_rate": 2.5774072257642636e-05, "loss": 0.0305, "step": 4625 }, { "epoch": 0.6841001875758579, "grad_norm": 0.005837304517626762, "learning_rate": 2.573320255026974e-05, "loss": 0.0277, "step": 4650 }, { "epoch": 0.6877781455735775, "grad_norm": 0.006613869220018387, "learning_rate": 2.5692332842896847e-05, "loss": 0.0003, "step": 4675 }, { "epoch": 0.6914561035712972, "grad_norm": 0.012274155393242836, "learning_rate": 2.5651463135523952e-05, "loss": 0.0383, "step": 4700 }, { "epoch": 0.6951340615690169, "grad_norm": 0.0031378071289509535, "learning_rate": 2.5610593428151054e-05, "loss": 0.0065, "step": 4725 }, { "epoch": 0.6988120195667366, "grad_norm": 0.12304351478815079, "learning_rate": 2.556972372077816e-05, "loss": 0.0103, "step": 4750 }, { "epoch": 0.7024899775644562, "grad_norm": 0.005349988583475351, "learning_rate": 2.5528854013405265e-05, "loss": 0.0292, "step": 4775 }, { "epoch": 0.7061679355621758, "grad_norm": 0.0023686892818659544, "learning_rate": 2.548798430603237e-05, "loss": 0.0169, "step": 4800 }, { "epoch": 0.7098458935598956, "grad_norm": 0.0018137163715437055, "learning_rate": 2.5447114598659476e-05, "loss": 0.0444, "step": 4825 }, { "epoch": 0.7135238515576152, "grad_norm": 0.0029049592558294535, "learning_rate": 2.540624489128658e-05, "loss": 0.0591, "step": 4850 }, { "epoch": 0.7172018095553349, "grad_norm": 0.0024209930561482906, "learning_rate": 2.5365375183913687e-05, "loss": 0.0311, "step": 4875 }, { "epoch": 0.7208797675530545, "grad_norm": 62.02742385864258, "learning_rate": 2.532450547654079e-05, "loss": 0.0324, "step": 4900 }, { "epoch": 0.7245577255507742, "grad_norm": 0.002258418360725045, "learning_rate": 2.528363576916789e-05, "loss": 0.0003, "step": 4925 }, { "epoch": 0.7282356835484939, "grad_norm": 0.001804179628379643, "learning_rate": 2.5242766061794996e-05, "loss": 0.0499, "step": 4950 }, { "epoch": 0.7319136415462135, "grad_norm": 0.33615773916244507, "learning_rate": 2.52018963544221e-05, "loss": 0.0005, "step": 4975 }, { "epoch": 0.7355915995439332, "grad_norm": 0.0010956133482977748, "learning_rate": 2.5161026647049207e-05, "loss": 0.0008, "step": 5000 }, { "epoch": 0.7392695575416529, "grad_norm": 0.0012902173912152648, "learning_rate": 2.5120156939676312e-05, "loss": 0.0273, "step": 5025 }, { "epoch": 0.7429475155393725, "grad_norm": 0.013881388120353222, "learning_rate": 2.5079287232303417e-05, "loss": 0.0488, "step": 5050 }, { "epoch": 0.7466254735370922, "grad_norm": 0.011136908084154129, "learning_rate": 2.5038417524930523e-05, "loss": 0.0003, "step": 5075 }, { "epoch": 0.7503034315348118, "grad_norm": 0.020626788958907127, "learning_rate": 2.4997547817557625e-05, "loss": 0.0621, "step": 5100 }, { "epoch": 0.7539813895325316, "grad_norm": 0.039804015308618546, "learning_rate": 2.495667811018473e-05, "loss": 0.0941, "step": 5125 }, { "epoch": 0.7576593475302512, "grad_norm": 0.019914086908102036, "learning_rate": 2.4915808402811835e-05, "loss": 0.0021, "step": 5150 }, { "epoch": 0.7613373055279709, "grad_norm": 0.027103891596198082, "learning_rate": 2.487493869543894e-05, "loss": 0.0375, "step": 5175 }, { "epoch": 0.7650152635256905, "grad_norm": 0.008572924882173538, "learning_rate": 2.4834068988066046e-05, "loss": 0.0007, "step": 5200 }, { "epoch": 0.7686932215234102, "grad_norm": 0.011288322508335114, "learning_rate": 2.479319928069315e-05, "loss": 0.0339, "step": 5225 }, { "epoch": 0.7723711795211299, "grad_norm": 11.5412015914917, "learning_rate": 2.4752329573320257e-05, "loss": 0.0414, "step": 5250 }, { "epoch": 0.7760491375188495, "grad_norm": 0.016787946224212646, "learning_rate": 2.471145986594736e-05, "loss": 0.0817, "step": 5275 }, { "epoch": 0.7797270955165692, "grad_norm": 0.3828181326389313, "learning_rate": 2.4670590158574464e-05, "loss": 0.0013, "step": 5300 }, { "epoch": 0.7834050535142889, "grad_norm": 0.00423394562676549, "learning_rate": 2.462972045120157e-05, "loss": 0.0009, "step": 5325 }, { "epoch": 0.7870830115120085, "grad_norm": 0.0038542733527719975, "learning_rate": 2.4588850743828675e-05, "loss": 0.0006, "step": 5350 }, { "epoch": 0.7907609695097282, "grad_norm": 0.002444320358335972, "learning_rate": 2.454798103645578e-05, "loss": 0.0208, "step": 5375 }, { "epoch": 0.7944389275074478, "grad_norm": 0.06983044743537903, "learning_rate": 2.4507111329082886e-05, "loss": 0.0611, "step": 5400 }, { "epoch": 0.7981168855051676, "grad_norm": 0.0033168047666549683, "learning_rate": 2.446624162170999e-05, "loss": 0.0016, "step": 5425 }, { "epoch": 0.8017948435028872, "grad_norm": 0.0031268312595784664, "learning_rate": 2.4425371914337093e-05, "loss": 0.0119, "step": 5450 }, { "epoch": 0.8054728015006068, "grad_norm": 0.0019544719252735376, "learning_rate": 2.43845022069642e-05, "loss": 0.0337, "step": 5475 }, { "epoch": 0.8091507594983265, "grad_norm": 0.017085539177060127, "learning_rate": 2.4343632499591304e-05, "loss": 0.0776, "step": 5500 }, { "epoch": 0.8128287174960462, "grad_norm": 0.916741669178009, "learning_rate": 2.430276279221841e-05, "loss": 0.0009, "step": 5525 }, { "epoch": 0.8165066754937659, "grad_norm": 0.0018967619398608804, "learning_rate": 2.4261893084845515e-05, "loss": 0.0005, "step": 5550 }, { "epoch": 0.8201846334914855, "grad_norm": 0.002946459921076894, "learning_rate": 2.422102337747262e-05, "loss": 0.052, "step": 5575 }, { "epoch": 0.8238625914892052, "grad_norm": 0.00457314308732748, "learning_rate": 2.4180153670099725e-05, "loss": 0.0386, "step": 5600 }, { "epoch": 0.8275405494869249, "grad_norm": 0.00886601209640503, "learning_rate": 2.4139283962726827e-05, "loss": 0.0003, "step": 5625 }, { "epoch": 0.8312185074846445, "grad_norm": 0.004110053181648254, "learning_rate": 2.409841425535393e-05, "loss": 0.0002, "step": 5650 }, { "epoch": 0.8348964654823642, "grad_norm": 0.002550973556935787, "learning_rate": 2.4057544547981035e-05, "loss": 0.0893, "step": 5675 }, { "epoch": 0.8385744234800838, "grad_norm": 0.0047971270978450775, "learning_rate": 2.401667484060814e-05, "loss": 0.0649, "step": 5700 }, { "epoch": 0.8422523814778036, "grad_norm": 22.79808235168457, "learning_rate": 2.3975805133235246e-05, "loss": 0.0509, "step": 5725 }, { "epoch": 0.8459303394755232, "grad_norm": 0.02145661599934101, "learning_rate": 2.393493542586235e-05, "loss": 0.0456, "step": 5750 }, { "epoch": 0.8496082974732428, "grad_norm": 0.8593617081642151, "learning_rate": 2.3894065718489456e-05, "loss": 0.0727, "step": 5775 }, { "epoch": 0.8532862554709625, "grad_norm": 0.004426372237503529, "learning_rate": 2.385319601111656e-05, "loss": 0.0004, "step": 5800 }, { "epoch": 0.8569642134686822, "grad_norm": 0.0030661604832857847, "learning_rate": 2.3812326303743664e-05, "loss": 0.0001, "step": 5825 }, { "epoch": 0.8606421714664019, "grad_norm": 0.0062530264258384705, "learning_rate": 2.377145659637077e-05, "loss": 0.044, "step": 5850 }, { "epoch": 0.8643201294641215, "grad_norm": 0.023851774632930756, "learning_rate": 2.3730586888997874e-05, "loss": 0.0412, "step": 5875 }, { "epoch": 0.8679980874618412, "grad_norm": 0.013338697142899036, "learning_rate": 2.368971718162498e-05, "loss": 0.0006, "step": 5900 }, { "epoch": 0.8716760454595609, "grad_norm": 0.01904129609465599, "learning_rate": 2.3648847474252085e-05, "loss": 0.0726, "step": 5925 }, { "epoch": 0.8753540034572805, "grad_norm": 0.010262302123010159, "learning_rate": 2.360797776687919e-05, "loss": 0.0087, "step": 5950 }, { "epoch": 0.8790319614550002, "grad_norm": 0.006104280706495047, "learning_rate": 2.3567108059506296e-05, "loss": 0.0004, "step": 5975 }, { "epoch": 0.8827099194527198, "grad_norm": 0.019870450720191002, "learning_rate": 2.3526238352133398e-05, "loss": 0.0795, "step": 6000 }, { "epoch": 0.8863878774504396, "grad_norm": 0.021579677239060402, "learning_rate": 2.3485368644760503e-05, "loss": 0.0009, "step": 6025 }, { "epoch": 0.8900658354481592, "grad_norm": 0.007828918285667896, "learning_rate": 2.344449893738761e-05, "loss": 0.0017, "step": 6050 }, { "epoch": 0.8937437934458788, "grad_norm": 0.006341638043522835, "learning_rate": 2.3403629230014714e-05, "loss": 0.0198, "step": 6075 }, { "epoch": 0.8974217514435985, "grad_norm": 0.004665954038500786, "learning_rate": 2.336275952264182e-05, "loss": 0.0002, "step": 6100 }, { "epoch": 0.9010997094413182, "grad_norm": 0.0059740557335317135, "learning_rate": 2.3321889815268925e-05, "loss": 0.0398, "step": 6125 }, { "epoch": 0.9047776674390379, "grad_norm": 0.09372496604919434, "learning_rate": 2.328102010789603e-05, "loss": 0.0596, "step": 6150 }, { "epoch": 0.9084556254367575, "grad_norm": 0.06878636032342911, "learning_rate": 2.3240150400523132e-05, "loss": 0.0858, "step": 6175 }, { "epoch": 0.9121335834344771, "grad_norm": 5.581681728363037, "learning_rate": 2.3199280693150238e-05, "loss": 0.0728, "step": 6200 }, { "epoch": 0.9158115414321969, "grad_norm": 0.017690079286694527, "learning_rate": 2.3158410985777343e-05, "loss": 0.0109, "step": 6225 }, { "epoch": 0.9194894994299165, "grad_norm": 0.009789933450520039, "learning_rate": 2.3117541278404448e-05, "loss": 0.003, "step": 6250 }, { "epoch": 0.9231674574276362, "grad_norm": 0.007185524329543114, "learning_rate": 2.3076671571031554e-05, "loss": 0.0003, "step": 6275 }, { "epoch": 0.9268454154253558, "grad_norm": 0.29879918694496155, "learning_rate": 2.303580186365866e-05, "loss": 0.0004, "step": 6300 }, { "epoch": 0.9305233734230756, "grad_norm": 0.005276743322610855, "learning_rate": 2.2994932156285764e-05, "loss": 0.0209, "step": 6325 }, { "epoch": 0.9342013314207952, "grad_norm": 4.756071090698242, "learning_rate": 2.2954062448912866e-05, "loss": 0.0696, "step": 6350 }, { "epoch": 0.9378792894185148, "grad_norm": 0.005177750252187252, "learning_rate": 2.291319274153997e-05, "loss": 0.0294, "step": 6375 }, { "epoch": 0.9415572474162345, "grad_norm": 0.005691983737051487, "learning_rate": 2.2872323034167074e-05, "loss": 0.0102, "step": 6400 }, { "epoch": 0.9452352054139542, "grad_norm": 0.012254934757947922, "learning_rate": 2.283145332679418e-05, "loss": 0.0204, "step": 6425 }, { "epoch": 0.9489131634116739, "grad_norm": 0.007204866968095303, "learning_rate": 2.2790583619421284e-05, "loss": 0.001, "step": 6450 }, { "epoch": 0.9525911214093935, "grad_norm": 0.0022422156762331724, "learning_rate": 2.274971391204839e-05, "loss": 0.0074, "step": 6475 }, { "epoch": 0.9562690794071131, "grad_norm": 0.0029815786983817816, "learning_rate": 2.2708844204675495e-05, "loss": 0.0001, "step": 6500 }, { "epoch": 0.9599470374048328, "grad_norm": 0.0027428902685642242, "learning_rate": 2.26679744973026e-05, "loss": 0.0534, "step": 6525 }, { "epoch": 0.9636249954025525, "grad_norm": 0.0038738884031772614, "learning_rate": 2.2627104789929703e-05, "loss": 0.0167, "step": 6550 }, { "epoch": 0.9673029534002722, "grad_norm": 0.002053373260423541, "learning_rate": 2.2586235082556808e-05, "loss": 0.0119, "step": 6575 }, { "epoch": 0.9709809113979918, "grad_norm": 0.015416144393384457, "learning_rate": 2.2545365375183913e-05, "loss": 0.0436, "step": 6600 }, { "epoch": 0.9746588693957114, "grad_norm": 0.028199590742588043, "learning_rate": 2.250449566781102e-05, "loss": 0.06, "step": 6625 }, { "epoch": 0.9783368273934312, "grad_norm": 0.00808124803006649, "learning_rate": 2.2463625960438124e-05, "loss": 0.0082, "step": 6650 }, { "epoch": 0.9820147853911508, "grad_norm": 0.896677553653717, "learning_rate": 2.242275625306523e-05, "loss": 0.0004, "step": 6675 }, { "epoch": 0.9856927433888705, "grad_norm": 0.014748472720384598, "learning_rate": 2.2381886545692335e-05, "loss": 0.0554, "step": 6700 }, { "epoch": 0.9893707013865901, "grad_norm": 0.08279622346162796, "learning_rate": 2.2341016838319437e-05, "loss": 0.0727, "step": 6725 }, { "epoch": 0.9930486593843099, "grad_norm": 0.0343361496925354, "learning_rate": 2.2300147130946542e-05, "loss": 0.0653, "step": 6750 }, { "epoch": 0.9967266173820295, "grad_norm": 0.01778659224510193, "learning_rate": 2.2259277423573648e-05, "loss": 0.0468, "step": 6775 }, { "epoch": 0.9999632204200228, "eval_accuracy": 0.9960279514527399, "eval_auc": 0.9999026317054973, "eval_f1": 0.9960253201825409, "eval_loss": 0.020395906642079353, "eval_precision": 0.9967589864466706, "eval_recall": 0.9952927331568108, "eval_runtime": 2488.2544, "eval_samples_per_second": 5.464, "eval_steps_per_second": 1.366, "step": 6797 }, { "epoch": 1.0004045753797493, "grad_norm": 0.038309529423713684, "learning_rate": 2.2218407716200753e-05, "loss": 0.0867, "step": 6800 }, { "epoch": 1.0040825333774688, "grad_norm": 0.03099379874765873, "learning_rate": 2.217753800882786e-05, "loss": 0.0251, "step": 6825 }, { "epoch": 1.0077604913751885, "grad_norm": 0.014889312908053398, "learning_rate": 2.2136668301454964e-05, "loss": 0.0007, "step": 6850 }, { "epoch": 1.011438449372908, "grad_norm": 0.011484134942293167, "learning_rate": 2.209579859408207e-05, "loss": 0.0042, "step": 6875 }, { "epoch": 1.0151164073706278, "grad_norm": 0.008166844956576824, "learning_rate": 2.205492888670917e-05, "loss": 0.0003, "step": 6900 }, { "epoch": 1.0187943653683476, "grad_norm": 0.006568376440554857, "learning_rate": 2.2014059179336276e-05, "loss": 0.0003, "step": 6925 }, { "epoch": 1.022472323366067, "grad_norm": 0.0057509117759764194, "learning_rate": 2.1973189471963382e-05, "loss": 0.0084, "step": 6950 }, { "epoch": 1.0261502813637868, "grad_norm": 0.004868589341640472, "learning_rate": 2.1932319764590487e-05, "loss": 0.0043, "step": 6975 }, { "epoch": 1.0298282393615066, "grad_norm": 0.004712184425443411, "learning_rate": 2.1891450057217593e-05, "loss": 0.0029, "step": 7000 }, { "epoch": 1.033506197359226, "grad_norm": 0.0035947624128311872, "learning_rate": 2.1850580349844698e-05, "loss": 0.0051, "step": 7025 }, { "epoch": 1.0371841553569459, "grad_norm": 0.0033714687451720238, "learning_rate": 2.1809710642471803e-05, "loss": 0.0377, "step": 7050 }, { "epoch": 1.0408621133546654, "grad_norm": 12.332621574401855, "learning_rate": 2.1768840935098905e-05, "loss": 0.0061, "step": 7075 }, { "epoch": 1.0445400713523851, "grad_norm": 0.002749204868450761, "learning_rate": 2.172797122772601e-05, "loss": 0.0003, "step": 7100 }, { "epoch": 1.0482180293501049, "grad_norm": 0.0026924049016088247, "learning_rate": 2.1687101520353113e-05, "loss": 0.0001, "step": 7125 }, { "epoch": 1.0518959873478244, "grad_norm": 0.006290792487561703, "learning_rate": 2.1646231812980218e-05, "loss": 0.0443, "step": 7150 }, { "epoch": 1.0555739453455442, "grad_norm": 0.0048763868398964405, "learning_rate": 2.1605362105607323e-05, "loss": 0.0002, "step": 7175 }, { "epoch": 1.059251903343264, "grad_norm": 0.003825924126431346, "learning_rate": 2.156449239823443e-05, "loss": 0.0002, "step": 7200 }, { "epoch": 1.0629298613409834, "grad_norm": 0.0068919663317501545, "learning_rate": 2.1523622690861534e-05, "loss": 0.0001, "step": 7225 }, { "epoch": 1.0666078193387032, "grad_norm": 0.0029492308385670185, "learning_rate": 2.1482752983488636e-05, "loss": 0.0001, "step": 7250 }, { "epoch": 1.0702857773364227, "grad_norm": 0.0031761634163558483, "learning_rate": 2.144188327611574e-05, "loss": 0.0001, "step": 7275 }, { "epoch": 1.0739637353341425, "grad_norm": 0.004821736365556717, "learning_rate": 2.1401013568742847e-05, "loss": 0.0373, "step": 7300 }, { "epoch": 1.0776416933318622, "grad_norm": 0.003594837849959731, "learning_rate": 2.1360143861369952e-05, "loss": 0.0004, "step": 7325 }, { "epoch": 1.0813196513295817, "grad_norm": 0.004811630584299564, "learning_rate": 2.1319274153997058e-05, "loss": 0.0001, "step": 7350 }, { "epoch": 1.0849976093273015, "grad_norm": 0.006440363824367523, "learning_rate": 2.1278404446624163e-05, "loss": 0.0453, "step": 7375 }, { "epoch": 1.0886755673250212, "grad_norm": 0.007900132797658443, "learning_rate": 2.123753473925127e-05, "loss": 0.0003, "step": 7400 }, { "epoch": 1.0923535253227408, "grad_norm": 0.00898217223584652, "learning_rate": 2.1196665031878374e-05, "loss": 0.0811, "step": 7425 }, { "epoch": 1.0960314833204605, "grad_norm": 0.031215157359838486, "learning_rate": 2.1155795324505476e-05, "loss": 0.035, "step": 7450 }, { "epoch": 1.09970944131818, "grad_norm": 0.022409003227949142, "learning_rate": 2.111492561713258e-05, "loss": 0.0014, "step": 7475 }, { "epoch": 1.1033873993158998, "grad_norm": 0.0137456264346838, "learning_rate": 2.1074055909759686e-05, "loss": 0.0006, "step": 7500 }, { "epoch": 1.1070653573136195, "grad_norm": 0.006075088866055012, "learning_rate": 2.1033186202386792e-05, "loss": 0.0005, "step": 7525 }, { "epoch": 1.110743315311339, "grad_norm": 0.007382239680737257, "learning_rate": 2.0992316495013897e-05, "loss": 0.0003, "step": 7550 }, { "epoch": 1.1144212733090588, "grad_norm": 0.016082163900136948, "learning_rate": 2.0951446787641003e-05, "loss": 0.0469, "step": 7575 }, { "epoch": 1.1180992313067786, "grad_norm": 0.02028113603591919, "learning_rate": 2.0910577080268108e-05, "loss": 0.0398, "step": 7600 }, { "epoch": 1.121777189304498, "grad_norm": 0.014643259346485138, "learning_rate": 2.086970737289521e-05, "loss": 0.0007, "step": 7625 }, { "epoch": 1.1254551473022179, "grad_norm": 0.010461482219398022, "learning_rate": 2.0828837665522315e-05, "loss": 0.0004, "step": 7650 }, { "epoch": 1.1291331052999374, "grad_norm": 0.009396770037710667, "learning_rate": 2.078796795814942e-05, "loss": 0.0004, "step": 7675 }, { "epoch": 1.1328110632976571, "grad_norm": 0.007909806445240974, "learning_rate": 2.0747098250776526e-05, "loss": 0.016, "step": 7700 }, { "epoch": 1.1364890212953769, "grad_norm": 0.006153750233352184, "learning_rate": 2.070622854340363e-05, "loss": 0.0055, "step": 7725 }, { "epoch": 1.1401669792930964, "grad_norm": 0.006996823474764824, "learning_rate": 2.0665358836030737e-05, "loss": 0.0002, "step": 7750 }, { "epoch": 1.1438449372908162, "grad_norm": 0.006032935809344053, "learning_rate": 2.0624489128657842e-05, "loss": 0.0331, "step": 7775 }, { "epoch": 1.1475228952885357, "grad_norm": 0.003607578342780471, "learning_rate": 2.0583619421284944e-05, "loss": 0.0002, "step": 7800 }, { "epoch": 1.1512008532862554, "grad_norm": 0.004726866725832224, "learning_rate": 2.054274971391205e-05, "loss": 0.0002, "step": 7825 }, { "epoch": 1.1548788112839752, "grad_norm": 0.004033273551613092, "learning_rate": 2.050188000653915e-05, "loss": 0.0001, "step": 7850 }, { "epoch": 1.1585567692816947, "grad_norm": 0.0035559283569455147, "learning_rate": 2.0461010299166257e-05, "loss": 0.0001, "step": 7875 }, { "epoch": 1.1622347272794145, "grad_norm": 0.002765959594398737, "learning_rate": 2.0420140591793362e-05, "loss": 0.0001, "step": 7900 }, { "epoch": 1.1659126852771342, "grad_norm": 0.003123935777693987, "learning_rate": 2.0379270884420468e-05, "loss": 0.0001, "step": 7925 }, { "epoch": 1.1695906432748537, "grad_norm": 0.0030226910021156073, "learning_rate": 2.0338401177047573e-05, "loss": 0.0443, "step": 7950 }, { "epoch": 1.1732686012725735, "grad_norm": 0.002675386844202876, "learning_rate": 2.0297531469674675e-05, "loss": 0.0001, "step": 7975 }, { "epoch": 1.1769465592702932, "grad_norm": 0.002876314101740718, "learning_rate": 2.025666176230178e-05, "loss": 0.0001, "step": 8000 }, { "epoch": 1.1806245172680128, "grad_norm": 0.003930400125682354, "learning_rate": 2.0215792054928886e-05, "loss": 0.0463, "step": 8025 }, { "epoch": 1.1843024752657325, "grad_norm": 0.004908836912363768, "learning_rate": 2.017492234755599e-05, "loss": 0.0002, "step": 8050 }, { "epoch": 1.187980433263452, "grad_norm": 0.005489639472216368, "learning_rate": 2.0134052640183097e-05, "loss": 0.0014, "step": 8075 }, { "epoch": 1.1916583912611718, "grad_norm": 0.0054463837295770645, "learning_rate": 2.0093182932810202e-05, "loss": 0.0418, "step": 8100 }, { "epoch": 1.1953363492588915, "grad_norm": 0.004771388601511717, "learning_rate": 2.0052313225437307e-05, "loss": 0.0002, "step": 8125 }, { "epoch": 1.199014307256611, "grad_norm": 0.004579597618430853, "learning_rate": 2.001144351806441e-05, "loss": 0.0002, "step": 8150 }, { "epoch": 1.2026922652543308, "grad_norm": 0.005399708636105061, "learning_rate": 1.9970573810691515e-05, "loss": 0.0002, "step": 8175 }, { "epoch": 1.2063702232520503, "grad_norm": 0.0028218550141900778, "learning_rate": 1.992970410331862e-05, "loss": 0.0001, "step": 8200 }, { "epoch": 1.21004818124977, "grad_norm": 0.0270390622317791, "learning_rate": 1.9888834395945725e-05, "loss": 0.1464, "step": 8225 }, { "epoch": 1.2137261392474898, "grad_norm": 0.007817487232387066, "learning_rate": 1.984796468857283e-05, "loss": 0.0005, "step": 8250 }, { "epoch": 1.2174040972452094, "grad_norm": 0.009673170745372772, "learning_rate": 1.9807094981199936e-05, "loss": 0.0003, "step": 8275 }, { "epoch": 1.2210820552429291, "grad_norm": 0.006883264984935522, "learning_rate": 1.976622527382704e-05, "loss": 0.0364, "step": 8300 }, { "epoch": 1.2247600132406489, "grad_norm": 0.038729436695575714, "learning_rate": 1.9725355566454147e-05, "loss": 0.0002, "step": 8325 }, { "epoch": 1.2284379712383684, "grad_norm": 0.004570882301777601, "learning_rate": 1.968448585908125e-05, "loss": 0.0002, "step": 8350 }, { "epoch": 1.2321159292360881, "grad_norm": 0.010231226682662964, "learning_rate": 1.9643616151708354e-05, "loss": 0.0463, "step": 8375 }, { "epoch": 1.235793887233808, "grad_norm": 0.008044122718274593, "learning_rate": 1.960274644433546e-05, "loss": 0.0003, "step": 8400 }, { "epoch": 1.2394718452315274, "grad_norm": 0.005202152766287327, "learning_rate": 1.9561876736962565e-05, "loss": 0.0391, "step": 8425 }, { "epoch": 1.2431498032292472, "grad_norm": 0.0054007298313081264, "learning_rate": 1.952100702958967e-05, "loss": 0.0182, "step": 8450 }, { "epoch": 1.2468277612269667, "grad_norm": 0.005195588804781437, "learning_rate": 1.9480137322216776e-05, "loss": 0.0392, "step": 8475 }, { "epoch": 1.2505057192246865, "grad_norm": 0.00451032817363739, "learning_rate": 1.943926761484388e-05, "loss": 0.0002, "step": 8500 }, { "epoch": 1.2541836772224062, "grad_norm": 0.00390147278085351, "learning_rate": 1.9398397907470983e-05, "loss": 0.0002, "step": 8525 }, { "epoch": 1.2578616352201257, "grad_norm": 0.0030624952632933855, "learning_rate": 1.935752820009809e-05, "loss": 0.0001, "step": 8550 }, { "epoch": 1.2615395932178455, "grad_norm": 0.0030448674224317074, "learning_rate": 1.931665849272519e-05, "loss": 0.0001, "step": 8575 }, { "epoch": 1.265217551215565, "grad_norm": 0.003369387937709689, "learning_rate": 1.9275788785352296e-05, "loss": 0.0001, "step": 8600 }, { "epoch": 1.2688955092132848, "grad_norm": 0.0026294661220163107, "learning_rate": 1.92349190779794e-05, "loss": 0.0001, "step": 8625 }, { "epoch": 1.2725734672110045, "grad_norm": 0.002674271585419774, "learning_rate": 1.9194049370606507e-05, "loss": 0.0001, "step": 8650 }, { "epoch": 1.276251425208724, "grad_norm": 0.016562707722187042, "learning_rate": 1.9153179663233612e-05, "loss": 0.0001, "step": 8675 }, { "epoch": 1.2799293832064438, "grad_norm": 0.002845450770109892, "learning_rate": 1.9112309955860714e-05, "loss": 0.0376, "step": 8700 }, { "epoch": 1.2836073412041635, "grad_norm": 0.002954358235001564, "learning_rate": 1.907144024848782e-05, "loss": 0.0001, "step": 8725 }, { "epoch": 1.287285299201883, "grad_norm": 0.002028050599619746, "learning_rate": 1.9030570541114925e-05, "loss": 0.0047, "step": 8750 }, { "epoch": 1.2909632571996028, "grad_norm": 0.002608607057482004, "learning_rate": 1.898970083374203e-05, "loss": 0.0001, "step": 8775 }, { "epoch": 1.2946412151973226, "grad_norm": 0.0024424525909125805, "learning_rate": 1.8948831126369135e-05, "loss": 0.0001, "step": 8800 }, { "epoch": 1.298319173195042, "grad_norm": 0.001993270590901375, "learning_rate": 1.890796141899624e-05, "loss": 0.0001, "step": 8825 }, { "epoch": 1.3019971311927618, "grad_norm": 0.009992810897529125, "learning_rate": 1.8867091711623346e-05, "loss": 0.0001, "step": 8850 }, { "epoch": 1.3056750891904814, "grad_norm": 0.003959705121815205, "learning_rate": 1.8826222004250448e-05, "loss": 0.0336, "step": 8875 }, { "epoch": 1.3093530471882011, "grad_norm": 0.002648918190971017, "learning_rate": 1.8785352296877554e-05, "loss": 0.0002, "step": 8900 }, { "epoch": 1.3130310051859206, "grad_norm": 0.001997936749830842, "learning_rate": 1.874448258950466e-05, "loss": 0.0001, "step": 8925 }, { "epoch": 1.3167089631836404, "grad_norm": 0.0019702455028891563, "learning_rate": 1.8703612882131764e-05, "loss": 0.0001, "step": 8950 }, { "epoch": 1.3203869211813601, "grad_norm": 0.0019666815642267466, "learning_rate": 1.866274317475887e-05, "loss": 0.015, "step": 8975 }, { "epoch": 1.3240648791790797, "grad_norm": 0.016209330409765244, "learning_rate": 1.8621873467385975e-05, "loss": 0.0499, "step": 9000 }, { "epoch": 1.3277428371767994, "grad_norm": 0.002770668361335993, "learning_rate": 1.858100376001308e-05, "loss": 0.0001, "step": 9025 }, { "epoch": 1.3314207951745192, "grad_norm": 0.0025566229596734047, "learning_rate": 1.8540134052640182e-05, "loss": 0.0429, "step": 9050 }, { "epoch": 1.3350987531722387, "grad_norm": 0.00490075396373868, "learning_rate": 1.8499264345267288e-05, "loss": 0.0391, "step": 9075 }, { "epoch": 1.3387767111699584, "grad_norm": 0.002448379760608077, "learning_rate": 1.8458394637894393e-05, "loss": 0.0002, "step": 9100 }, { "epoch": 1.3424546691676782, "grad_norm": 0.0027882566209882498, "learning_rate": 1.84175249305215e-05, "loss": 0.0001, "step": 9125 }, { "epoch": 1.3461326271653977, "grad_norm": 0.0021890706848353148, "learning_rate": 1.8376655223148604e-05, "loss": 0.0001, "step": 9150 }, { "epoch": 1.3498105851631175, "grad_norm": 0.002767590805888176, "learning_rate": 1.833578551577571e-05, "loss": 0.0001, "step": 9175 }, { "epoch": 1.3534885431608372, "grad_norm": 0.0018375491490587592, "learning_rate": 1.8294915808402815e-05, "loss": 0.0003, "step": 9200 }, { "epoch": 1.3571665011585567, "grad_norm": 0.0020680581219494343, "learning_rate": 1.825404610102992e-05, "loss": 0.0001, "step": 9225 }, { "epoch": 1.3608444591562765, "grad_norm": 0.001452911994419992, "learning_rate": 1.8213176393657022e-05, "loss": 0.0001, "step": 9250 }, { "epoch": 1.364522417153996, "grad_norm": 0.011856326833367348, "learning_rate": 1.8172306686284127e-05, "loss": 0.0498, "step": 9275 }, { "epoch": 1.3682003751517158, "grad_norm": 0.005070924758911133, "learning_rate": 1.813143697891123e-05, "loss": 0.0003, "step": 9300 }, { "epoch": 1.3718783331494353, "grad_norm": 0.003941578324884176, "learning_rate": 1.8090567271538335e-05, "loss": 0.0001, "step": 9325 }, { "epoch": 1.375556291147155, "grad_norm": 0.0044369762763381, "learning_rate": 1.804969756416544e-05, "loss": 0.0395, "step": 9350 }, { "epoch": 1.3792342491448748, "grad_norm": 0.003973621409386396, "learning_rate": 1.8008827856792546e-05, "loss": 0.0002, "step": 9375 }, { "epoch": 1.3829122071425943, "grad_norm": 0.00455184280872345, "learning_rate": 1.796795814941965e-05, "loss": 0.0001, "step": 9400 }, { "epoch": 1.386590165140314, "grad_norm": 0.0031091428827494383, "learning_rate": 1.7927088442046753e-05, "loss": 0.0001, "step": 9425 }, { "epoch": 1.3902681231380338, "grad_norm": 0.0024325144477188587, "learning_rate": 1.7886218734673858e-05, "loss": 0.0001, "step": 9450 }, { "epoch": 1.3939460811357534, "grad_norm": 0.0036399061791598797, "learning_rate": 1.7845349027300964e-05, "loss": 0.0001, "step": 9475 }, { "epoch": 1.397624039133473, "grad_norm": 0.0023723021149635315, "learning_rate": 1.780447931992807e-05, "loss": 0.0001, "step": 9500 }, { "epoch": 1.4013019971311929, "grad_norm": 0.0027509965002536774, "learning_rate": 1.7763609612555174e-05, "loss": 0.0001, "step": 9525 }, { "epoch": 1.4049799551289124, "grad_norm": 0.0033826676663011312, "learning_rate": 1.772273990518228e-05, "loss": 0.0001, "step": 9550 }, { "epoch": 1.4086579131266321, "grad_norm": 0.011138912290334702, "learning_rate": 1.7681870197809385e-05, "loss": 0.0398, "step": 9575 }, { "epoch": 1.4123358711243519, "grad_norm": 0.023271048441529274, "learning_rate": 1.7641000490436487e-05, "loss": 0.0747, "step": 9600 }, { "epoch": 1.4160138291220714, "grad_norm": 0.18063010275363922, "learning_rate": 1.7600130783063593e-05, "loss": 0.0009, "step": 9625 }, { "epoch": 1.4196917871197912, "grad_norm": 0.012859140522778034, "learning_rate": 1.7559261075690698e-05, "loss": 0.0444, "step": 9650 }, { "epoch": 1.4233697451175107, "grad_norm": 0.003733620513230562, "learning_rate": 1.7518391368317803e-05, "loss": 0.0219, "step": 9675 }, { "epoch": 1.4270477031152304, "grad_norm": 4.048089504241943, "learning_rate": 1.747752166094491e-05, "loss": 0.052, "step": 9700 }, { "epoch": 1.43072566111295, "grad_norm": 0.02329842559993267, "learning_rate": 1.7436651953572014e-05, "loss": 0.0033, "step": 9725 }, { "epoch": 1.4344036191106697, "grad_norm": 0.5609085559844971, "learning_rate": 1.739578224619912e-05, "loss": 0.0468, "step": 9750 }, { "epoch": 1.4380815771083895, "grad_norm": 0.010268951766192913, "learning_rate": 1.735491253882622e-05, "loss": 0.0004, "step": 9775 }, { "epoch": 1.441759535106109, "grad_norm": 0.005183890461921692, "learning_rate": 1.7314042831453327e-05, "loss": 0.0002, "step": 9800 }, { "epoch": 1.4454374931038287, "grad_norm": 0.006362477317452431, "learning_rate": 1.7273173124080432e-05, "loss": 0.0623, "step": 9825 }, { "epoch": 1.4491154511015485, "grad_norm": 0.004158661235123873, "learning_rate": 1.7232303416707537e-05, "loss": 0.0002, "step": 9850 }, { "epoch": 1.452793409099268, "grad_norm": 0.003037210088223219, "learning_rate": 1.7191433709334643e-05, "loss": 0.0001, "step": 9875 }, { "epoch": 1.4564713670969878, "grad_norm": 0.006479774601757526, "learning_rate": 1.7150564001961748e-05, "loss": 0.0562, "step": 9900 }, { "epoch": 1.4601493250947075, "grad_norm": 34.625465393066406, "learning_rate": 1.7109694294588854e-05, "loss": 0.0423, "step": 9925 }, { "epoch": 1.463827283092427, "grad_norm": 0.003740801243111491, "learning_rate": 1.706882458721596e-05, "loss": 0.0001, "step": 9950 }, { "epoch": 1.4675052410901468, "grad_norm": 0.06391607969999313, "learning_rate": 1.702795487984306e-05, "loss": 0.0211, "step": 9975 }, { "epoch": 1.4711831990878665, "grad_norm": 0.0029998337849974632, "learning_rate": 1.6987085172470166e-05, "loss": 0.0012, "step": 10000 }, { "epoch": 1.474861157085586, "grad_norm": 0.002598424442112446, "learning_rate": 1.6946215465097272e-05, "loss": 0.0056, "step": 10025 }, { "epoch": 1.4785391150833058, "grad_norm": 0.0026498546358197927, "learning_rate": 1.6905345757724374e-05, "loss": 0.0003, "step": 10050 }, { "epoch": 1.4822170730810253, "grad_norm": 0.002896289573982358, "learning_rate": 1.686447605035148e-05, "loss": 0.0244, "step": 10075 }, { "epoch": 1.485895031078745, "grad_norm": 0.002737634815275669, "learning_rate": 1.6823606342978584e-05, "loss": 0.0002, "step": 10100 }, { "epoch": 1.4895729890764646, "grad_norm": 0.002295145532116294, "learning_rate": 1.678273663560569e-05, "loss": 0.0001, "step": 10125 }, { "epoch": 1.4932509470741844, "grad_norm": 0.0018749627051874995, "learning_rate": 1.6741866928232792e-05, "loss": 0.0001, "step": 10150 }, { "epoch": 1.4969289050719041, "grad_norm": 0.002252426231279969, "learning_rate": 1.6700997220859897e-05, "loss": 0.0091, "step": 10175 }, { "epoch": 1.5006068630696237, "grad_norm": 0.001987684750929475, "learning_rate": 1.6660127513487003e-05, "loss": 0.0059, "step": 10200 }, { "epoch": 1.5042848210673434, "grad_norm": 0.0018681609071791172, "learning_rate": 1.6619257806114108e-05, "loss": 0.0036, "step": 10225 }, { "epoch": 1.5079627790650632, "grad_norm": 0.002243634080514312, "learning_rate": 1.6578388098741213e-05, "loss": 0.0001, "step": 10250 }, { "epoch": 1.5116407370627827, "grad_norm": 0.005282828118652105, "learning_rate": 1.653751839136832e-05, "loss": 0.0508, "step": 10275 }, { "epoch": 1.5153186950605024, "grad_norm": 0.0033266160171478987, "learning_rate": 1.6496648683995424e-05, "loss": 0.0036, "step": 10300 }, { "epoch": 1.5189966530582222, "grad_norm": 0.0024327326100319624, "learning_rate": 1.6455778976622526e-05, "loss": 0.0001, "step": 10325 }, { "epoch": 1.5226746110559417, "grad_norm": 0.0037725428119301796, "learning_rate": 1.641490926924963e-05, "loss": 0.0859, "step": 10350 }, { "epoch": 1.5263525690536615, "grad_norm": 0.01479677390307188, "learning_rate": 1.6374039561876737e-05, "loss": 0.0002, "step": 10375 }, { "epoch": 1.5300305270513812, "grad_norm": 0.002465145429596305, "learning_rate": 1.6333169854503842e-05, "loss": 0.0009, "step": 10400 }, { "epoch": 1.5337084850491007, "grad_norm": 0.002028359565883875, "learning_rate": 1.6292300147130948e-05, "loss": 0.0001, "step": 10425 }, { "epoch": 1.5373864430468203, "grad_norm": 0.0017766653327271342, "learning_rate": 1.6251430439758053e-05, "loss": 0.0001, "step": 10450 }, { "epoch": 1.5410644010445402, "grad_norm": 0.002013767370954156, "learning_rate": 1.6210560732385158e-05, "loss": 0.0255, "step": 10475 }, { "epoch": 1.5447423590422598, "grad_norm": 0.0019861028995364904, "learning_rate": 1.616969102501226e-05, "loss": 0.0109, "step": 10500 }, { "epoch": 1.5484203170399793, "grad_norm": 0.0017919589299708605, "learning_rate": 1.6128821317639366e-05, "loss": 0.0063, "step": 10525 }, { "epoch": 1.552098275037699, "grad_norm": 0.001575242611579597, "learning_rate": 1.608795161026647e-05, "loss": 0.0001, "step": 10550 }, { "epoch": 1.5557762330354188, "grad_norm": 0.0017625424079596996, "learning_rate": 1.6047081902893576e-05, "loss": 0.0001, "step": 10575 }, { "epoch": 1.5594541910331383, "grad_norm": 0.0014293509302660823, "learning_rate": 1.6006212195520682e-05, "loss": 0.0001, "step": 10600 }, { "epoch": 1.563132149030858, "grad_norm": 3.637284994125366, "learning_rate": 1.5965342488147787e-05, "loss": 0.0319, "step": 10625 }, { "epoch": 1.5668101070285778, "grad_norm": 0.0015190584817901254, "learning_rate": 1.5924472780774893e-05, "loss": 0.1112, "step": 10650 }, { "epoch": 1.5704880650262973, "grad_norm": 0.0019073854200541973, "learning_rate": 1.5883603073401995e-05, "loss": 0.0001, "step": 10675 }, { "epoch": 1.574166023024017, "grad_norm": 0.15334878861904144, "learning_rate": 1.58427333660291e-05, "loss": 0.0001, "step": 10700 }, { "epoch": 1.5778439810217368, "grad_norm": 0.0013233659556135535, "learning_rate": 1.5801863658656205e-05, "loss": 0.0006, "step": 10725 }, { "epoch": 1.5815219390194564, "grad_norm": 60.88636779785156, "learning_rate": 1.576099395128331e-05, "loss": 0.0794, "step": 10750 }, { "epoch": 1.5851998970171761, "grad_norm": 0.006810314953327179, "learning_rate": 1.5720124243910413e-05, "loss": 0.0272, "step": 10775 }, { "epoch": 1.5888778550148959, "grad_norm": 0.006012595724314451, "learning_rate": 1.5679254536537518e-05, "loss": 0.0177, "step": 10800 }, { "epoch": 1.5925558130126154, "grad_norm": 0.0041669500060379505, "learning_rate": 1.5638384829164623e-05, "loss": 0.0007, "step": 10825 }, { "epoch": 1.596233771010335, "grad_norm": 0.0024410944897681475, "learning_rate": 1.559751512179173e-05, "loss": 0.0001, "step": 10850 }, { "epoch": 1.5999117290080547, "grad_norm": 0.002287843730300665, "learning_rate": 1.555664541441883e-05, "loss": 0.0001, "step": 10875 }, { "epoch": 1.6035896870057744, "grad_norm": 0.002450288040563464, "learning_rate": 1.5515775707045936e-05, "loss": 0.0001, "step": 10900 }, { "epoch": 1.607267645003494, "grad_norm": 0.0017540917033329606, "learning_rate": 1.547490599967304e-05, "loss": 0.0001, "step": 10925 }, { "epoch": 1.6109456030012137, "grad_norm": 0.0018945990595966578, "learning_rate": 1.5434036292300147e-05, "loss": 0.0001, "step": 10950 }, { "epoch": 1.6146235609989334, "grad_norm": 0.38427916169166565, "learning_rate": 1.5393166584927252e-05, "loss": 0.0478, "step": 10975 }, { "epoch": 1.618301518996653, "grad_norm": 0.005249540787190199, "learning_rate": 1.5352296877554358e-05, "loss": 0.0005, "step": 11000 }, { "epoch": 1.6219794769943727, "grad_norm": 0.049626559019088745, "learning_rate": 1.5311427170181463e-05, "loss": 0.0803, "step": 11025 }, { "epoch": 1.6256574349920925, "grad_norm": 0.006765100173652172, "learning_rate": 1.5270557462808565e-05, "loss": 0.021, "step": 11050 }, { "epoch": 1.629335392989812, "grad_norm": 0.012057892046868801, "learning_rate": 1.522968775543567e-05, "loss": 0.0005, "step": 11075 }, { "epoch": 1.6330133509875318, "grad_norm": 0.012171362526714802, "learning_rate": 1.5188818048062776e-05, "loss": 0.0171, "step": 11100 }, { "epoch": 1.6366913089852515, "grad_norm": 0.006173169240355492, "learning_rate": 1.5147948340689881e-05, "loss": 0.0183, "step": 11125 }, { "epoch": 1.640369266982971, "grad_norm": 0.025982793420553207, "learning_rate": 1.5107078633316986e-05, "loss": 0.041, "step": 11150 }, { "epoch": 1.6440472249806906, "grad_norm": 0.0121184466406703, "learning_rate": 1.5066208925944092e-05, "loss": 0.0066, "step": 11175 }, { "epoch": 1.6477251829784105, "grad_norm": 0.008928947150707245, "learning_rate": 1.5025339218571197e-05, "loss": 0.0013, "step": 11200 }, { "epoch": 1.65140314097613, "grad_norm": 0.003572331042960286, "learning_rate": 1.4984469511198301e-05, "loss": 0.0448, "step": 11225 }, { "epoch": 1.6550810989738496, "grad_norm": 0.012093408964574337, "learning_rate": 1.4943599803825406e-05, "loss": 0.0003, "step": 11250 }, { "epoch": 1.6587590569715693, "grad_norm": 0.005746824201196432, "learning_rate": 1.490273009645251e-05, "loss": 0.0002, "step": 11275 }, { "epoch": 1.662437014969289, "grad_norm": 0.005075458902865648, "learning_rate": 1.4861860389079615e-05, "loss": 0.0431, "step": 11300 }, { "epoch": 1.6661149729670086, "grad_norm": 0.006644480861723423, "learning_rate": 1.4820990681706719e-05, "loss": 0.0003, "step": 11325 }, { "epoch": 1.6697929309647284, "grad_norm": 0.016171354800462723, "learning_rate": 1.4780120974333823e-05, "loss": 0.0163, "step": 11350 }, { "epoch": 1.673470888962448, "grad_norm": 0.005658384878188372, "learning_rate": 1.4739251266960928e-05, "loss": 0.0022, "step": 11375 }, { "epoch": 1.6771488469601676, "grad_norm": 0.010968804359436035, "learning_rate": 1.4698381559588033e-05, "loss": 0.0804, "step": 11400 }, { "epoch": 1.6808268049578874, "grad_norm": 0.029876096174120903, "learning_rate": 1.4657511852215139e-05, "loss": 0.067, "step": 11425 }, { "epoch": 1.6845047629556071, "grad_norm": 0.03841656073927879, "learning_rate": 1.4616642144842242e-05, "loss": 0.0349, "step": 11450 }, { "epoch": 1.6881827209533267, "grad_norm": 0.017025554552674294, "learning_rate": 1.4575772437469348e-05, "loss": 0.001, "step": 11475 }, { "epoch": 1.6918606789510464, "grad_norm": 0.024776197969913483, "learning_rate": 1.4534902730096453e-05, "loss": 0.0356, "step": 11500 }, { "epoch": 1.6955386369487662, "grad_norm": 0.018094466999173164, "learning_rate": 1.4494033022723559e-05, "loss": 0.0006, "step": 11525 }, { "epoch": 1.6992165949464857, "grad_norm": 0.010948434472084045, "learning_rate": 1.4453163315350662e-05, "loss": 0.0566, "step": 11550 }, { "epoch": 1.7028945529442052, "grad_norm": 0.06060256063938141, "learning_rate": 1.4412293607977768e-05, "loss": 0.087, "step": 11575 }, { "epoch": 1.7065725109419252, "grad_norm": 0.0425218902528286, "learning_rate": 1.4371423900604873e-05, "loss": 0.0014, "step": 11600 }, { "epoch": 1.7102504689396447, "grad_norm": 0.03931298479437828, "learning_rate": 1.4330554193231977e-05, "loss": 0.0329, "step": 11625 }, { "epoch": 1.7139284269373642, "grad_norm": 0.05203554406762123, "learning_rate": 1.4289684485859082e-05, "loss": 0.0667, "step": 11650 }, { "epoch": 1.717606384935084, "grad_norm": 0.059145841747522354, "learning_rate": 1.4248814778486187e-05, "loss": 0.0464, "step": 11675 }, { "epoch": 1.7212843429328037, "grad_norm": 0.053441960364580154, "learning_rate": 1.4207945071113291e-05, "loss": 0.0598, "step": 11700 }, { "epoch": 1.7249623009305233, "grad_norm": 0.0338728241622448, "learning_rate": 1.4167075363740395e-05, "loss": 0.0014, "step": 11725 }, { "epoch": 1.728640258928243, "grad_norm": 0.03298606723546982, "learning_rate": 1.41262056563675e-05, "loss": 0.0011, "step": 11750 }, { "epoch": 1.7323182169259628, "grad_norm": 0.007968394085764885, "learning_rate": 1.4085335948994606e-05, "loss": 0.0332, "step": 11775 }, { "epoch": 1.7359961749236823, "grad_norm": 0.033015619963407516, "learning_rate": 1.404446624162171e-05, "loss": 0.0471, "step": 11800 }, { "epoch": 1.739674132921402, "grad_norm": 0.03123684972524643, "learning_rate": 1.4003596534248815e-05, "loss": 0.0008, "step": 11825 }, { "epoch": 1.7433520909191218, "grad_norm": 0.026270106434822083, "learning_rate": 1.396272682687592e-05, "loss": 0.027, "step": 11850 }, { "epoch": 1.7470300489168413, "grad_norm": 0.025614146143198013, "learning_rate": 1.3921857119503025e-05, "loss": 0.0006, "step": 11875 }, { "epoch": 1.750708006914561, "grad_norm": 0.011196363717317581, "learning_rate": 1.3880987412130129e-05, "loss": 0.0004, "step": 11900 }, { "epoch": 1.7543859649122808, "grad_norm": 0.014085380360484123, "learning_rate": 1.3840117704757234e-05, "loss": 0.0007, "step": 11925 }, { "epoch": 1.7580639229100004, "grad_norm": 0.2520334720611572, "learning_rate": 1.379924799738434e-05, "loss": 0.0012, "step": 11950 }, { "epoch": 1.7617418809077199, "grad_norm": 0.0027042387519031763, "learning_rate": 1.3758378290011445e-05, "loss": 0.0657, "step": 11975 }, { "epoch": 1.7654198389054399, "grad_norm": 0.007959190756082535, "learning_rate": 1.3717508582638549e-05, "loss": 0.0009, "step": 12000 }, { "epoch": 1.7690977969031594, "grad_norm": 0.006802896503359079, "learning_rate": 1.3676638875265654e-05, "loss": 0.0002, "step": 12025 }, { "epoch": 1.772775754900879, "grad_norm": 0.0037322076968848705, "learning_rate": 1.3635769167892758e-05, "loss": 0.0002, "step": 12050 }, { "epoch": 1.7764537128985987, "grad_norm": 0.004444212652742863, "learning_rate": 1.3594899460519862e-05, "loss": 0.0113, "step": 12075 }, { "epoch": 1.7801316708963184, "grad_norm": 0.0029294530395418406, "learning_rate": 1.3554029753146967e-05, "loss": 0.0024, "step": 12100 }, { "epoch": 1.783809628894038, "grad_norm": 0.006351064890623093, "learning_rate": 1.3513160045774072e-05, "loss": 0.0339, "step": 12125 }, { "epoch": 1.7874875868917577, "grad_norm": 0.0033591645769774914, "learning_rate": 1.3472290338401178e-05, "loss": 0.003, "step": 12150 }, { "epoch": 1.7911655448894774, "grad_norm": 0.003340468741953373, "learning_rate": 1.3431420631028281e-05, "loss": 0.0002, "step": 12175 }, { "epoch": 1.794843502887197, "grad_norm": 0.12212031334638596, "learning_rate": 1.3390550923655387e-05, "loss": 0.0836, "step": 12200 }, { "epoch": 1.7985214608849167, "grad_norm": 0.014243889600038528, "learning_rate": 1.3349681216282492e-05, "loss": 0.0318, "step": 12225 }, { "epoch": 1.8021994188826365, "grad_norm": 0.016160359606146812, "learning_rate": 1.3308811508909596e-05, "loss": 0.0003, "step": 12250 }, { "epoch": 1.805877376880356, "grad_norm": 0.011376752518117428, "learning_rate": 1.3267941801536701e-05, "loss": 0.0003, "step": 12275 }, { "epoch": 1.8095553348780757, "grad_norm": 0.00865715742111206, "learning_rate": 1.3227072094163807e-05, "loss": 0.0133, "step": 12300 }, { "epoch": 1.8132332928757955, "grad_norm": 0.007116909604519606, "learning_rate": 1.3186202386790912e-05, "loss": 0.0003, "step": 12325 }, { "epoch": 1.816911250873515, "grad_norm": 0.008155121468007565, "learning_rate": 1.3145332679418016e-05, "loss": 0.0385, "step": 12350 }, { "epoch": 1.8205892088712345, "grad_norm": 0.013204419054090977, "learning_rate": 1.3104462972045121e-05, "loss": 0.0356, "step": 12375 }, { "epoch": 1.8242671668689545, "grad_norm": 0.013173281215131283, "learning_rate": 1.3063593264672226e-05, "loss": 0.0004, "step": 12400 }, { "epoch": 1.827945124866674, "grad_norm": 0.010820701718330383, "learning_rate": 1.302272355729933e-05, "loss": 0.0003, "step": 12425 }, { "epoch": 1.8316230828643936, "grad_norm": 0.00571137759834528, "learning_rate": 1.2981853849926434e-05, "loss": 0.0011, "step": 12450 }, { "epoch": 1.8353010408621133, "grad_norm": 0.007815693505108356, "learning_rate": 1.2940984142553539e-05, "loss": 0.0002, "step": 12475 }, { "epoch": 1.838978998859833, "grad_norm": 0.04561807960271835, "learning_rate": 1.2900114435180645e-05, "loss": 0.0002, "step": 12500 }, { "epoch": 1.8426569568575526, "grad_norm": 0.007523215841501951, "learning_rate": 1.2859244727807748e-05, "loss": 0.0203, "step": 12525 }, { "epoch": 1.8463349148552723, "grad_norm": 0.007975575514137745, "learning_rate": 1.2818375020434854e-05, "loss": 0.0002, "step": 12550 }, { "epoch": 1.850012872852992, "grad_norm": 0.007269065361469984, "learning_rate": 1.2777505313061959e-05, "loss": 0.0002, "step": 12575 }, { "epoch": 1.8536908308507116, "grad_norm": 0.004501336719840765, "learning_rate": 1.2736635605689064e-05, "loss": 0.0001, "step": 12600 }, { "epoch": 1.8573687888484314, "grad_norm": 0.004011464770883322, "learning_rate": 1.2695765898316168e-05, "loss": 0.0003, "step": 12625 }, { "epoch": 1.8610467468461511, "grad_norm": 0.002334051998332143, "learning_rate": 1.2654896190943273e-05, "loss": 0.0234, "step": 12650 }, { "epoch": 1.8647247048438707, "grad_norm": 0.004475513007491827, "learning_rate": 1.2614026483570379e-05, "loss": 0.0002, "step": 12675 }, { "epoch": 1.8684026628415904, "grad_norm": 0.003851409535855055, "learning_rate": 1.2573156776197482e-05, "loss": 0.0001, "step": 12700 }, { "epoch": 1.8720806208393101, "grad_norm": 0.0028481779154390097, "learning_rate": 1.2532287068824588e-05, "loss": 0.0255, "step": 12725 }, { "epoch": 1.8757585788370297, "grad_norm": 0.0030939916614443064, "learning_rate": 1.2491417361451693e-05, "loss": 0.0332, "step": 12750 }, { "epoch": 1.8794365368347492, "grad_norm": 0.0065445504151284695, "learning_rate": 1.2450547654078797e-05, "loss": 0.0422, "step": 12775 }, { "epoch": 1.8831144948324692, "grad_norm": 0.005459626670926809, "learning_rate": 1.24096779467059e-05, "loss": 0.0113, "step": 12800 }, { "epoch": 1.8867924528301887, "grad_norm": 0.002942801220342517, "learning_rate": 1.2368808239333006e-05, "loss": 0.0002, "step": 12825 }, { "epoch": 1.8904704108279082, "grad_norm": 0.0067766509018838406, "learning_rate": 1.2327938531960111e-05, "loss": 0.018, "step": 12850 }, { "epoch": 1.894148368825628, "grad_norm": 0.005411918740719557, "learning_rate": 1.2287068824587217e-05, "loss": 0.0486, "step": 12875 }, { "epoch": 1.8978263268233477, "grad_norm": 0.006009817123413086, "learning_rate": 1.224619911721432e-05, "loss": 0.0002, "step": 12900 }, { "epoch": 1.9015042848210673, "grad_norm": 0.005595459137111902, "learning_rate": 1.2205329409841426e-05, "loss": 0.0408, "step": 12925 }, { "epoch": 1.905182242818787, "grad_norm": 0.012987248599529266, "learning_rate": 1.2164459702468531e-05, "loss": 0.0823, "step": 12950 }, { "epoch": 1.9088602008165068, "grad_norm": 0.16368244588375092, "learning_rate": 1.2123589995095635e-05, "loss": 0.0018, "step": 12975 }, { "epoch": 1.9125381588142263, "grad_norm": 0.00949876382946968, "learning_rate": 1.208272028772274e-05, "loss": 0.0163, "step": 13000 }, { "epoch": 1.916216116811946, "grad_norm": 14.246623039245605, "learning_rate": 1.2041850580349846e-05, "loss": 0.0342, "step": 13025 }, { "epoch": 1.9198940748096658, "grad_norm": 0.00826562475413084, "learning_rate": 1.2000980872976951e-05, "loss": 0.0003, "step": 13050 }, { "epoch": 1.9235720328073853, "grad_norm": 0.006868135649710894, "learning_rate": 1.1960111165604055e-05, "loss": 0.0003, "step": 13075 }, { "epoch": 1.9272499908051048, "grad_norm": 0.00808362290263176, "learning_rate": 1.191924145823116e-05, "loss": 0.0402, "step": 13100 }, { "epoch": 1.9309279488028248, "grad_norm": 0.010807299055159092, "learning_rate": 1.1878371750858265e-05, "loss": 0.078, "step": 13125 }, { "epoch": 1.9346059068005443, "grad_norm": 0.01139509491622448, "learning_rate": 1.1837502043485367e-05, "loss": 0.0007, "step": 13150 }, { "epoch": 1.9382838647982639, "grad_norm": 0.00977110955864191, "learning_rate": 1.1796632336112473e-05, "loss": 0.0004, "step": 13175 }, { "epoch": 1.9419618227959836, "grad_norm": 0.006910570897161961, "learning_rate": 1.1755762628739578e-05, "loss": 0.0003, "step": 13200 }, { "epoch": 1.9456397807937034, "grad_norm": 4.1620564460754395, "learning_rate": 1.1714892921366683e-05, "loss": 0.0667, "step": 13225 }, { "epoch": 1.949317738791423, "grad_norm": 0.015238853171467781, "learning_rate": 1.1674023213993787e-05, "loss": 0.0015, "step": 13250 }, { "epoch": 1.9529956967891426, "grad_norm": 0.007931707426905632, "learning_rate": 1.1633153506620892e-05, "loss": 0.0003, "step": 13275 }, { "epoch": 1.9566736547868624, "grad_norm": 0.009560568258166313, "learning_rate": 1.1592283799247998e-05, "loss": 0.0003, "step": 13300 }, { "epoch": 1.960351612784582, "grad_norm": 0.008578946813941002, "learning_rate": 1.1551414091875103e-05, "loss": 0.0008, "step": 13325 }, { "epoch": 1.9640295707823017, "grad_norm": 0.011748207733035088, "learning_rate": 1.1510544384502207e-05, "loss": 0.0002, "step": 13350 }, { "epoch": 1.9677075287800214, "grad_norm": 0.007073475047945976, "learning_rate": 1.1469674677129312e-05, "loss": 0.0002, "step": 13375 }, { "epoch": 1.971385486777741, "grad_norm": 0.003219211706891656, "learning_rate": 1.1428804969756418e-05, "loss": 0.0323, "step": 13400 }, { "epoch": 1.9750634447754607, "grad_norm": 0.0061137378215789795, "learning_rate": 1.1387935262383521e-05, "loss": 0.0002, "step": 13425 }, { "epoch": 1.9787414027731804, "grad_norm": 0.006435078103095293, "learning_rate": 1.1347065555010627e-05, "loss": 0.0409, "step": 13450 }, { "epoch": 1.9824193607709, "grad_norm": 0.002217411994934082, "learning_rate": 1.1306195847637732e-05, "loss": 0.0002, "step": 13475 }, { "epoch": 1.9860973187686195, "grad_norm": 0.009155605919659138, "learning_rate": 1.1265326140264837e-05, "loss": 0.0487, "step": 13500 }, { "epoch": 1.9897752767663395, "grad_norm": 0.011870177462697029, "learning_rate": 1.122445643289194e-05, "loss": 0.0004, "step": 13525 }, { "epoch": 1.993453234764059, "grad_norm": 0.008746917359530926, "learning_rate": 1.1183586725519045e-05, "loss": 0.0411, "step": 13550 }, { "epoch": 1.9971311927617785, "grad_norm": 0.005829541012644768, "learning_rate": 1.114271701814615e-05, "loss": 0.0003, "step": 13575 }, { "epoch": 1.9999264408400457, "eval_accuracy": 0.9963221772710555, "eval_auc": 0.9999360039904769, "eval_f1": 0.9963186570460905, "eval_loss": 0.023916827514767647, "eval_precision": 0.9973466981132075, "eval_recall": 0.9952927331568108, "eval_runtime": 2353.5774, "eval_samples_per_second": 5.776, "eval_steps_per_second": 1.444, "step": 13594 }, { "epoch": 2.0008091507594985, "grad_norm": 0.010372490622103214, "learning_rate": 1.1101847310773254e-05, "loss": 0.0003, "step": 13600 }, { "epoch": 2.004487108757218, "grad_norm": 0.014902903698384762, "learning_rate": 1.106097760340036e-05, "loss": 0.0673, "step": 13625 }, { "epoch": 2.0081650667549376, "grad_norm": 0.005416123196482658, "learning_rate": 1.1020107896027465e-05, "loss": 0.0003, "step": 13650 }, { "epoch": 2.0118430247526575, "grad_norm": 0.007089643273502588, "learning_rate": 1.097923818865457e-05, "loss": 0.0003, "step": 13675 }, { "epoch": 2.015520982750377, "grad_norm": 0.005935342982411385, "learning_rate": 1.0938368481281674e-05, "loss": 0.0004, "step": 13700 }, { "epoch": 2.0191989407480966, "grad_norm": 0.004356461577117443, "learning_rate": 1.0897498773908779e-05, "loss": 0.0003, "step": 13725 }, { "epoch": 2.022876898745816, "grad_norm": 0.010521539486944675, "learning_rate": 1.0856629066535884e-05, "loss": 0.0839, "step": 13750 }, { "epoch": 2.026554856743536, "grad_norm": 0.007211623247712851, "learning_rate": 1.081575935916299e-05, "loss": 0.0215, "step": 13775 }, { "epoch": 2.0302328147412556, "grad_norm": 0.008732822723686695, "learning_rate": 1.0774889651790093e-05, "loss": 0.0002, "step": 13800 }, { "epoch": 2.033910772738975, "grad_norm": 0.005103670991957188, "learning_rate": 1.0734019944417199e-05, "loss": 0.0002, "step": 13825 }, { "epoch": 2.037588730736695, "grad_norm": 0.00569286709651351, "learning_rate": 1.0693150237044304e-05, "loss": 0.0002, "step": 13850 }, { "epoch": 2.0412666887344146, "grad_norm": 0.004690663423389196, "learning_rate": 1.0652280529671408e-05, "loss": 0.0002, "step": 13875 }, { "epoch": 2.044944646732134, "grad_norm": 0.003813117044046521, "learning_rate": 1.0611410822298512e-05, "loss": 0.0001, "step": 13900 }, { "epoch": 2.048622604729854, "grad_norm": 0.0031241225078701973, "learning_rate": 1.0570541114925617e-05, "loss": 0.0001, "step": 13925 }, { "epoch": 2.0523005627275737, "grad_norm": 0.001760639250278473, "learning_rate": 1.0529671407552722e-05, "loss": 0.0003, "step": 13950 }, { "epoch": 2.055978520725293, "grad_norm": 0.00507943844422698, "learning_rate": 1.0488801700179826e-05, "loss": 0.0465, "step": 13975 }, { "epoch": 2.059656478723013, "grad_norm": 0.005704312119632959, "learning_rate": 1.0447931992806931e-05, "loss": 0.0002, "step": 14000 }, { "epoch": 2.0633344367207327, "grad_norm": 0.0037137740291655064, "learning_rate": 1.0407062285434037e-05, "loss": 0.0002, "step": 14025 }, { "epoch": 2.067012394718452, "grad_norm": 0.004969414323568344, "learning_rate": 1.036619257806114e-05, "loss": 0.0002, "step": 14050 }, { "epoch": 2.070690352716172, "grad_norm": 0.002151261083781719, "learning_rate": 1.0325322870688246e-05, "loss": 0.0001, "step": 14075 }, { "epoch": 2.0743683107138917, "grad_norm": 0.004214055370539427, "learning_rate": 1.0284453163315351e-05, "loss": 0.0001, "step": 14100 }, { "epoch": 2.0780462687116112, "grad_norm": 0.004696809686720371, "learning_rate": 1.0243583455942457e-05, "loss": 0.0001, "step": 14125 }, { "epoch": 2.0817242267093308, "grad_norm": 8.668023109436035, "learning_rate": 1.020271374856956e-05, "loss": 0.0642, "step": 14150 }, { "epoch": 2.0854021847070507, "grad_norm": 0.00823593232780695, "learning_rate": 1.0161844041196666e-05, "loss": 0.0004, "step": 14175 }, { "epoch": 2.0890801427047703, "grad_norm": 0.006173284724354744, "learning_rate": 1.0120974333823771e-05, "loss": 0.0002, "step": 14200 }, { "epoch": 2.09275810070249, "grad_norm": 0.004422744270414114, "learning_rate": 1.0080104626450876e-05, "loss": 0.0002, "step": 14225 }, { "epoch": 2.0964360587002098, "grad_norm": 0.0038796046283096075, "learning_rate": 1.0039234919077978e-05, "loss": 0.0002, "step": 14250 }, { "epoch": 2.1001140166979293, "grad_norm": 0.003889993764460087, "learning_rate": 9.998365211705084e-06, "loss": 0.0008, "step": 14275 }, { "epoch": 2.103791974695649, "grad_norm": 0.0035641242284327745, "learning_rate": 9.957495504332189e-06, "loss": 0.0001, "step": 14300 }, { "epoch": 2.107469932693369, "grad_norm": 0.0037507452070713043, "learning_rate": 9.916625796959293e-06, "loss": 0.0001, "step": 14325 }, { "epoch": 2.1111478906910883, "grad_norm": 0.002810309175401926, "learning_rate": 9.875756089586398e-06, "loss": 0.0001, "step": 14350 }, { "epoch": 2.114825848688808, "grad_norm": 0.0030445558950304985, "learning_rate": 9.834886382213504e-06, "loss": 0.0001, "step": 14375 }, { "epoch": 2.118503806686528, "grad_norm": 0.0025213556364178658, "learning_rate": 9.794016674840609e-06, "loss": 0.0001, "step": 14400 }, { "epoch": 2.1221817646842474, "grad_norm": 0.0027236223686486483, "learning_rate": 9.753146967467713e-06, "loss": 0.0001, "step": 14425 }, { "epoch": 2.125859722681967, "grad_norm": 0.002416795352473855, "learning_rate": 9.712277260094818e-06, "loss": 0.0004, "step": 14450 }, { "epoch": 2.129537680679687, "grad_norm": 0.0019158340292051435, "learning_rate": 9.671407552721923e-06, "loss": 0.0001, "step": 14475 }, { "epoch": 2.1332156386774064, "grad_norm": 0.002519650151953101, "learning_rate": 9.630537845349029e-06, "loss": 0.0001, "step": 14500 }, { "epoch": 2.136893596675126, "grad_norm": 0.002294061239808798, "learning_rate": 9.589668137976132e-06, "loss": 0.0001, "step": 14525 }, { "epoch": 2.1405715546728454, "grad_norm": 0.0021358055528253317, "learning_rate": 9.548798430603238e-06, "loss": 0.0471, "step": 14550 }, { "epoch": 2.1442495126705654, "grad_norm": 0.001824073726311326, "learning_rate": 9.507928723230343e-06, "loss": 0.0001, "step": 14575 }, { "epoch": 2.147927470668285, "grad_norm": 0.001960406079888344, "learning_rate": 9.467059015857447e-06, "loss": 0.0001, "step": 14600 }, { "epoch": 2.1516054286660045, "grad_norm": 0.0018290438456460834, "learning_rate": 9.42618930848455e-06, "loss": 0.0001, "step": 14625 }, { "epoch": 2.1552833866637244, "grad_norm": 0.0019052918069064617, "learning_rate": 9.385319601111656e-06, "loss": 0.0001, "step": 14650 }, { "epoch": 2.158961344661444, "grad_norm": 0.0018661071080714464, "learning_rate": 9.344449893738761e-06, "loss": 0.0001, "step": 14675 }, { "epoch": 2.1626393026591635, "grad_norm": 0.0031746248714625835, "learning_rate": 9.303580186365865e-06, "loss": 0.049, "step": 14700 }, { "epoch": 2.1663172606568835, "grad_norm": 0.003573804395273328, "learning_rate": 9.26271047899297e-06, "loss": 0.0001, "step": 14725 }, { "epoch": 2.169995218654603, "grad_norm": 0.003289070213213563, "learning_rate": 9.221840771620076e-06, "loss": 0.0113, "step": 14750 }, { "epoch": 2.1736731766523225, "grad_norm": 0.00257130921818316, "learning_rate": 9.18097106424718e-06, "loss": 0.0483, "step": 14775 }, { "epoch": 2.1773511346500425, "grad_norm": 0.005980730522423983, "learning_rate": 9.140101356874285e-06, "loss": 0.0002, "step": 14800 }, { "epoch": 2.181029092647762, "grad_norm": 0.005953842308372259, "learning_rate": 9.09923164950139e-06, "loss": 0.0002, "step": 14825 }, { "epoch": 2.1847070506454815, "grad_norm": 0.037090156227350235, "learning_rate": 9.058361942128496e-06, "loss": 0.0785, "step": 14850 }, { "epoch": 2.188385008643201, "grad_norm": 0.007919345051050186, "learning_rate": 9.0174922347556e-06, "loss": 0.0006, "step": 14875 }, { "epoch": 2.192062966640921, "grad_norm": 0.021819893270730972, "learning_rate": 8.976622527382705e-06, "loss": 0.0376, "step": 14900 }, { "epoch": 2.1957409246386406, "grad_norm": 0.024493372067809105, "learning_rate": 8.93575282000981e-06, "loss": 0.0439, "step": 14925 }, { "epoch": 2.19941888263636, "grad_norm": 0.038370776921510696, "learning_rate": 8.894883112636915e-06, "loss": 0.0802, "step": 14950 }, { "epoch": 2.20309684063408, "grad_norm": 0.019332151859998703, "learning_rate": 8.854013405264019e-06, "loss": 0.0012, "step": 14975 }, { "epoch": 2.2067747986317996, "grad_norm": 0.03362823650240898, "learning_rate": 8.813143697891123e-06, "loss": 0.0369, "step": 15000 }, { "epoch": 2.210452756629519, "grad_norm": 0.024772603064775467, "learning_rate": 8.772273990518228e-06, "loss": 0.0008, "step": 15025 }, { "epoch": 2.214130714627239, "grad_norm": 0.02276591770350933, "learning_rate": 8.731404283145332e-06, "loss": 0.1007, "step": 15050 }, { "epoch": 2.2178086726249586, "grad_norm": 0.016099456697702408, "learning_rate": 8.690534575772437e-06, "loss": 0.0009, "step": 15075 }, { "epoch": 2.221486630622678, "grad_norm": 0.003277967683970928, "learning_rate": 8.649664868399542e-06, "loss": 0.0069, "step": 15100 }, { "epoch": 2.225164588620398, "grad_norm": 0.011233772151172161, "learning_rate": 8.608795161026648e-06, "loss": 0.0386, "step": 15125 }, { "epoch": 2.2288425466181176, "grad_norm": 0.007455474231392145, "learning_rate": 8.567925453653752e-06, "loss": 0.0003, "step": 15150 }, { "epoch": 2.232520504615837, "grad_norm": 0.011497107334434986, "learning_rate": 8.527055746280857e-06, "loss": 0.0004, "step": 15175 }, { "epoch": 2.236198462613557, "grad_norm": 0.003145186696201563, "learning_rate": 8.486186038907962e-06, "loss": 0.0003, "step": 15200 }, { "epoch": 2.2398764206112767, "grad_norm": 0.00954380352050066, "learning_rate": 8.445316331535066e-06, "loss": 0.0595, "step": 15225 }, { "epoch": 2.243554378608996, "grad_norm": 0.007323611527681351, "learning_rate": 8.404446624162171e-06, "loss": 0.0004, "step": 15250 }, { "epoch": 2.247232336606716, "grad_norm": 0.011944909580051899, "learning_rate": 8.363576916789277e-06, "loss": 0.0003, "step": 15275 }, { "epoch": 2.2509102946044357, "grad_norm": 0.01304931566119194, "learning_rate": 8.322707209416382e-06, "loss": 0.0389, "step": 15300 }, { "epoch": 2.2545882526021552, "grad_norm": 0.008787041530013084, "learning_rate": 8.281837502043486e-06, "loss": 0.0004, "step": 15325 }, { "epoch": 2.2582662105998748, "grad_norm": 0.011969480663537979, "learning_rate": 8.24096779467059e-06, "loss": 0.0004, "step": 15350 }, { "epoch": 2.2619441685975947, "grad_norm": 0.011229045689105988, "learning_rate": 8.200098087297695e-06, "loss": 0.0003, "step": 15375 }, { "epoch": 2.2656221265953143, "grad_norm": 0.00922977551817894, "learning_rate": 8.1592283799248e-06, "loss": 0.0004, "step": 15400 }, { "epoch": 2.269300084593034, "grad_norm": 0.008094431832432747, "learning_rate": 8.118358672551904e-06, "loss": 0.0003, "step": 15425 }, { "epoch": 2.2729780425907538, "grad_norm": 0.0032492594327777624, "learning_rate": 8.07748896517901e-06, "loss": 0.0002, "step": 15450 }, { "epoch": 2.2766560005884733, "grad_norm": 0.004196746740490198, "learning_rate": 8.036619257806115e-06, "loss": 0.0002, "step": 15475 }, { "epoch": 2.280333958586193, "grad_norm": 0.005214506760239601, "learning_rate": 7.995749550433218e-06, "loss": 0.0002, "step": 15500 }, { "epoch": 2.284011916583913, "grad_norm": 0.0034893976990133524, "learning_rate": 7.954879843060324e-06, "loss": 0.0002, "step": 15525 }, { "epoch": 2.2876898745816323, "grad_norm": 0.0036745897959917784, "learning_rate": 7.914010135687429e-06, "loss": 0.0002, "step": 15550 }, { "epoch": 2.291367832579352, "grad_norm": 0.0020664865151047707, "learning_rate": 7.873140428314534e-06, "loss": 0.0001, "step": 15575 }, { "epoch": 2.2950457905770714, "grad_norm": 0.005072563886642456, "learning_rate": 7.832270720941638e-06, "loss": 0.0417, "step": 15600 }, { "epoch": 2.2987237485747913, "grad_norm": 0.004465815611183643, "learning_rate": 7.791401013568743e-06, "loss": 0.0002, "step": 15625 }, { "epoch": 2.302401706572511, "grad_norm": 0.005166616756469011, "learning_rate": 7.750531306195849e-06, "loss": 0.016, "step": 15650 }, { "epoch": 2.3060796645702304, "grad_norm": 0.0010274857049807906, "learning_rate": 7.709661598822953e-06, "loss": 0.0002, "step": 15675 }, { "epoch": 2.3097576225679504, "grad_norm": 0.006900500506162643, "learning_rate": 7.668791891450058e-06, "loss": 0.0002, "step": 15700 }, { "epoch": 2.31343558056567, "grad_norm": 0.004663816653192043, "learning_rate": 7.6279221840771624e-06, "loss": 0.0001, "step": 15725 }, { "epoch": 2.3171135385633894, "grad_norm": 0.006946474779397249, "learning_rate": 7.587052476704268e-06, "loss": 0.0001, "step": 15750 }, { "epoch": 2.3207914965611094, "grad_norm": 0.003868917003273964, "learning_rate": 7.5461827693313715e-06, "loss": 0.0342, "step": 15775 }, { "epoch": 2.324469454558829, "grad_norm": 0.0028817090205848217, "learning_rate": 7.505313061958477e-06, "loss": 0.0138, "step": 15800 }, { "epoch": 2.3281474125565484, "grad_norm": 0.0059151784516870975, "learning_rate": 7.464443354585581e-06, "loss": 0.0733, "step": 15825 }, { "epoch": 2.3318253705542684, "grad_norm": 0.004359770100563765, "learning_rate": 7.423573647212686e-06, "loss": 0.0421, "step": 15850 }, { "epoch": 2.335503328551988, "grad_norm": 0.011809108778834343, "learning_rate": 7.3827039398397904e-06, "loss": 0.0003, "step": 15875 }, { "epoch": 2.3391812865497075, "grad_norm": 0.005823772866278887, "learning_rate": 7.341834232466896e-06, "loss": 0.0003, "step": 15900 }, { "epoch": 2.3428592445474274, "grad_norm": 0.003460386535152793, "learning_rate": 7.300964525094e-06, "loss": 0.0002, "step": 15925 }, { "epoch": 2.346537202545147, "grad_norm": 0.008056416176259518, "learning_rate": 7.260094817721106e-06, "loss": 0.0381, "step": 15950 }, { "epoch": 2.3502151605428665, "grad_norm": 0.007788171526044607, "learning_rate": 7.21922511034821e-06, "loss": 0.0002, "step": 15975 }, { "epoch": 2.3538931185405865, "grad_norm": 0.0066045369021594524, "learning_rate": 7.178355402975315e-06, "loss": 0.0002, "step": 16000 }, { "epoch": 2.357571076538306, "grad_norm": 0.004805906675755978, "learning_rate": 7.137485695602419e-06, "loss": 0.0053, "step": 16025 }, { "epoch": 2.3612490345360255, "grad_norm": 0.010813217610120773, "learning_rate": 7.096615988229525e-06, "loss": 0.0381, "step": 16050 }, { "epoch": 2.3649269925337455, "grad_norm": 0.009302555583417416, "learning_rate": 7.055746280856629e-06, "loss": 0.0393, "step": 16075 }, { "epoch": 2.368604950531465, "grad_norm": 0.011496507562696934, "learning_rate": 7.014876573483734e-06, "loss": 0.0386, "step": 16100 }, { "epoch": 2.3722829085291846, "grad_norm": 0.025231193751096725, "learning_rate": 6.974006866110839e-06, "loss": 0.0367, "step": 16125 }, { "epoch": 2.375960866526904, "grad_norm": 0.020235830917954445, "learning_rate": 6.933137158737944e-06, "loss": 0.0006, "step": 16150 }, { "epoch": 2.379638824524624, "grad_norm": 0.006687480956315994, "learning_rate": 6.892267451365048e-06, "loss": 0.0004, "step": 16175 }, { "epoch": 2.3833167825223436, "grad_norm": 0.003918817732483149, "learning_rate": 6.851397743992153e-06, "loss": 0.0003, "step": 16200 }, { "epoch": 2.386994740520063, "grad_norm": 0.011175381019711494, "learning_rate": 6.810528036619258e-06, "loss": 0.0003, "step": 16225 }, { "epoch": 2.390672698517783, "grad_norm": 0.007755937986075878, "learning_rate": 6.769658329246363e-06, "loss": 0.0002, "step": 16250 }, { "epoch": 2.3943506565155026, "grad_norm": 0.004887331277132034, "learning_rate": 6.728788621873468e-06, "loss": 0.0002, "step": 16275 }, { "epoch": 2.398028614513222, "grad_norm": 0.0048552751541137695, "learning_rate": 6.6879189145005725e-06, "loss": 0.0002, "step": 16300 }, { "epoch": 2.401706572510942, "grad_norm": 0.011255592107772827, "learning_rate": 6.647049207127677e-06, "loss": 0.0002, "step": 16325 }, { "epoch": 2.4053845305086616, "grad_norm": 0.009114415384829044, "learning_rate": 6.6061794997547816e-06, "loss": 0.0002, "step": 16350 }, { "epoch": 2.409062488506381, "grad_norm": 0.009386932477355003, "learning_rate": 6.565309792381886e-06, "loss": 0.0395, "step": 16375 }, { "epoch": 2.4127404465041007, "grad_norm": 0.005927698221057653, "learning_rate": 6.5244400850089915e-06, "loss": 0.0002, "step": 16400 }, { "epoch": 2.4164184045018207, "grad_norm": 0.0084453159943223, "learning_rate": 6.483570377636096e-06, "loss": 0.0506, "step": 16425 }, { "epoch": 2.42009636249954, "grad_norm": 0.008083072490990162, "learning_rate": 6.442700670263201e-06, "loss": 0.0003, "step": 16450 }, { "epoch": 2.4237743204972597, "grad_norm": 0.00735598336905241, "learning_rate": 6.401830962890306e-06, "loss": 0.0003, "step": 16475 }, { "epoch": 2.4274522784949797, "grad_norm": 0.007824303582310677, "learning_rate": 6.360961255517411e-06, "loss": 0.0398, "step": 16500 }, { "epoch": 2.431130236492699, "grad_norm": 0.009155460633337498, "learning_rate": 6.320091548144516e-06, "loss": 0.0003, "step": 16525 }, { "epoch": 2.4348081944904187, "grad_norm": 0.005739257670938969, "learning_rate": 6.27922184077162e-06, "loss": 0.0003, "step": 16550 }, { "epoch": 2.4384861524881387, "grad_norm": 0.006940542254596949, "learning_rate": 6.238352133398725e-06, "loss": 0.0003, "step": 16575 }, { "epoch": 2.4421641104858582, "grad_norm": 0.0053449515253305435, "learning_rate": 6.197482426025829e-06, "loss": 0.0002, "step": 16600 }, { "epoch": 2.4458420684835778, "grad_norm": 0.005325790494680405, "learning_rate": 6.156612718652935e-06, "loss": 0.0002, "step": 16625 }, { "epoch": 2.4495200264812977, "grad_norm": 0.006259521469473839, "learning_rate": 6.115743011280039e-06, "loss": 0.0002, "step": 16650 }, { "epoch": 2.4531979844790173, "grad_norm": 0.006854058708995581, "learning_rate": 6.074873303907145e-06, "loss": 0.0002, "step": 16675 }, { "epoch": 2.456875942476737, "grad_norm": 0.004361658822745085, "learning_rate": 6.034003596534249e-06, "loss": 0.0002, "step": 16700 }, { "epoch": 2.4605539004744568, "grad_norm": 0.0055083055049180984, "learning_rate": 5.993133889161354e-06, "loss": 0.0002, "step": 16725 }, { "epoch": 2.4642318584721763, "grad_norm": 0.0033617918379604816, "learning_rate": 5.952264181788458e-06, "loss": 0.0002, "step": 16750 }, { "epoch": 2.467909816469896, "grad_norm": 0.0048737069591879845, "learning_rate": 5.911394474415564e-06, "loss": 0.0001, "step": 16775 }, { "epoch": 2.471587774467616, "grad_norm": 0.0036280914209783077, "learning_rate": 5.870524767042668e-06, "loss": 0.0001, "step": 16800 }, { "epoch": 2.4752657324653353, "grad_norm": 0.003542742459103465, "learning_rate": 5.829655059669773e-06, "loss": 0.0001, "step": 16825 }, { "epoch": 2.478943690463055, "grad_norm": 0.004226271994411945, "learning_rate": 5.788785352296878e-06, "loss": 0.0001, "step": 16850 }, { "epoch": 2.482621648460775, "grad_norm": 0.0033333373721688986, "learning_rate": 5.7479156449239826e-06, "loss": 0.0001, "step": 16875 }, { "epoch": 2.4862996064584943, "grad_norm": 0.003888545325025916, "learning_rate": 5.707045937551087e-06, "loss": 0.0001, "step": 16900 }, { "epoch": 2.489977564456214, "grad_norm": 0.0031992702279239893, "learning_rate": 5.666176230178192e-06, "loss": 0.0001, "step": 16925 }, { "epoch": 2.4936555224539334, "grad_norm": 0.0026705926284193993, "learning_rate": 5.625306522805297e-06, "loss": 0.0001, "step": 16950 }, { "epoch": 2.4973334804516534, "grad_norm": 0.001754347002133727, "learning_rate": 5.5844368154324015e-06, "loss": 0.0001, "step": 16975 }, { "epoch": 2.501011438449373, "grad_norm": 0.0018643263028934598, "learning_rate": 5.543567108059507e-06, "loss": 0.0006, "step": 17000 }, { "epoch": 2.5046893964470924, "grad_norm": 0.002491478342562914, "learning_rate": 5.502697400686611e-06, "loss": 0.0001, "step": 17025 }, { "epoch": 2.5083673544448124, "grad_norm": 0.002735487651079893, "learning_rate": 5.461827693313716e-06, "loss": 0.0001, "step": 17050 }, { "epoch": 2.512045312442532, "grad_norm": 0.002121156081557274, "learning_rate": 5.420957985940821e-06, "loss": 0.0013, "step": 17075 }, { "epoch": 2.5157232704402515, "grad_norm": 0.001368986559100449, "learning_rate": 5.380088278567925e-06, "loss": 0.0001, "step": 17100 }, { "epoch": 2.519401228437971, "grad_norm": 0.0018654069863259792, "learning_rate": 5.33921857119503e-06, "loss": 0.0001, "step": 17125 }, { "epoch": 2.523079186435691, "grad_norm": 0.0008688032394275069, "learning_rate": 5.298348863822135e-06, "loss": 0.0001, "step": 17150 }, { "epoch": 2.5267571444334105, "grad_norm": 0.0014730022521689534, "learning_rate": 5.25747915644924e-06, "loss": 0.0001, "step": 17175 }, { "epoch": 2.53043510243113, "grad_norm": 589.290283203125, "learning_rate": 5.216609449076345e-06, "loss": 0.0295, "step": 17200 }, { "epoch": 2.53411306042885, "grad_norm": 0.0014689558884128928, "learning_rate": 5.17573974170345e-06, "loss": 0.0, "step": 17225 }, { "epoch": 2.5377910184265695, "grad_norm": 0.001330269267782569, "learning_rate": 5.134870034330555e-06, "loss": 0.0, "step": 17250 }, { "epoch": 2.541468976424289, "grad_norm": 0.001491030678153038, "learning_rate": 5.094000326957658e-06, "loss": 0.0, "step": 17275 }, { "epoch": 2.545146934422009, "grad_norm": 0.002089619869366288, "learning_rate": 5.053130619584764e-06, "loss": 0.0778, "step": 17300 }, { "epoch": 2.5488248924197285, "grad_norm": 0.0015247270930558443, "learning_rate": 5.012260912211868e-06, "loss": 0.0188, "step": 17325 }, { "epoch": 2.552502850417448, "grad_norm": 0.002242110203951597, "learning_rate": 4.971391204838974e-06, "loss": 0.0179, "step": 17350 }, { "epoch": 2.556180808415168, "grad_norm": 0.0018629367696121335, "learning_rate": 4.930521497466078e-06, "loss": 0.0181, "step": 17375 }, { "epoch": 2.5598587664128876, "grad_norm": 0.0014634733088314533, "learning_rate": 4.8896517900931836e-06, "loss": 0.0328, "step": 17400 }, { "epoch": 2.563536724410607, "grad_norm": 0.001321232644841075, "learning_rate": 4.848782082720288e-06, "loss": 0.0, "step": 17425 }, { "epoch": 2.567214682408327, "grad_norm": 0.0012456915574148297, "learning_rate": 4.807912375347393e-06, "loss": 0.0003, "step": 17450 }, { "epoch": 2.5708926404060466, "grad_norm": 0.0009979073656722903, "learning_rate": 4.767042667974497e-06, "loss": 0.0001, "step": 17475 }, { "epoch": 2.574570598403766, "grad_norm": 0.001377744134515524, "learning_rate": 4.726172960601602e-06, "loss": 0.0, "step": 17500 }, { "epoch": 2.578248556401486, "grad_norm": 0.0022715404629707336, "learning_rate": 4.685303253228707e-06, "loss": 0.0498, "step": 17525 }, { "epoch": 2.5819265143992056, "grad_norm": 0.002307375194504857, "learning_rate": 4.644433545855812e-06, "loss": 0.0001, "step": 17550 }, { "epoch": 2.585604472396925, "grad_norm": 0.002744297729805112, "learning_rate": 4.603563838482917e-06, "loss": 0.0444, "step": 17575 }, { "epoch": 2.589282430394645, "grad_norm": 0.004225959535688162, "learning_rate": 4.5626941311100215e-06, "loss": 0.0148, "step": 17600 }, { "epoch": 2.5929603883923646, "grad_norm": 0.0028173536993563175, "learning_rate": 4.521824423737127e-06, "loss": 0.0033, "step": 17625 }, { "epoch": 2.596638346390084, "grad_norm": 0.00215067807585001, "learning_rate": 4.4809547163642305e-06, "loss": 0.0001, "step": 17650 }, { "epoch": 2.600316304387804, "grad_norm": 0.004402931313961744, "learning_rate": 4.440085008991336e-06, "loss": 0.0001, "step": 17675 }, { "epoch": 2.6039942623855237, "grad_norm": 0.0019863785710185766, "learning_rate": 4.3992153016184404e-06, "loss": 0.0001, "step": 17700 }, { "epoch": 2.607672220383243, "grad_norm": 0.0032948977313935757, "learning_rate": 4.358345594245545e-06, "loss": 0.0001, "step": 17725 }, { "epoch": 2.6113501783809627, "grad_norm": 0.0017591605428606272, "learning_rate": 4.31747588687265e-06, "loss": 0.0001, "step": 17750 }, { "epoch": 2.6150281363786827, "grad_norm": 0.5669000148773193, "learning_rate": 4.276606179499755e-06, "loss": 0.0002, "step": 17775 }, { "epoch": 2.6187060943764022, "grad_norm": 0.0018617259338498116, "learning_rate": 4.23573647212686e-06, "loss": 0.044, "step": 17800 }, { "epoch": 2.6223840523741218, "grad_norm": 0.004173843190073967, "learning_rate": 4.194866764753964e-06, "loss": 0.0001, "step": 17825 }, { "epoch": 2.6260620103718413, "grad_norm": 0.005529914982616901, "learning_rate": 4.153997057381069e-06, "loss": 0.0001, "step": 17850 }, { "epoch": 2.6297399683695613, "grad_norm": 0.003100366098806262, "learning_rate": 4.113127350008174e-06, "loss": 0.0001, "step": 17875 }, { "epoch": 2.633417926367281, "grad_norm": 0.0017961232224479318, "learning_rate": 4.072257642635279e-06, "loss": 0.012, "step": 17900 }, { "epoch": 2.6370958843650003, "grad_norm": 0.0022237550001591444, "learning_rate": 4.031387935262384e-06, "loss": 0.0001, "step": 17925 }, { "epoch": 2.6407738423627203, "grad_norm": 0.002973005408421159, "learning_rate": 3.990518227889488e-06, "loss": 0.0438, "step": 17950 }, { "epoch": 2.64445180036044, "grad_norm": 0.003434759797528386, "learning_rate": 3.949648520516594e-06, "loss": 0.0003, "step": 17975 }, { "epoch": 2.6481297583581593, "grad_norm": 0.003463399363681674, "learning_rate": 3.908778813143697e-06, "loss": 0.0001, "step": 18000 }, { "epoch": 2.6518077163558793, "grad_norm": 0.003393635619431734, "learning_rate": 3.867909105770803e-06, "loss": 0.0002, "step": 18025 }, { "epoch": 2.655485674353599, "grad_norm": 0.0027733049355447292, "learning_rate": 3.827039398397907e-06, "loss": 0.0001, "step": 18050 }, { "epoch": 2.6591636323513184, "grad_norm": 0.0038054571487009525, "learning_rate": 3.7861696910250126e-06, "loss": 0.0001, "step": 18075 }, { "epoch": 2.6628415903490383, "grad_norm": 0.0029823731165379286, "learning_rate": 3.745299983652117e-06, "loss": 0.0001, "step": 18100 }, { "epoch": 2.666519548346758, "grad_norm": 0.0019862265326082706, "learning_rate": 3.704430276279222e-06, "loss": 0.0001, "step": 18125 }, { "epoch": 2.6701975063444774, "grad_norm": 0.003500757971778512, "learning_rate": 3.6635605689063266e-06, "loss": 0.0001, "step": 18150 }, { "epoch": 2.6738754643421974, "grad_norm": 0.002085187705233693, "learning_rate": 3.6226908615334315e-06, "loss": 0.0001, "step": 18175 }, { "epoch": 2.677553422339917, "grad_norm": 0.0023257972206920385, "learning_rate": 3.5818211541605365e-06, "loss": 0.0001, "step": 18200 }, { "epoch": 2.6812313803376364, "grad_norm": 0.0022203666158020496, "learning_rate": 3.5409514467876414e-06, "loss": 0.0001, "step": 18225 }, { "epoch": 2.6849093383353564, "grad_norm": 0.0012388962786644697, "learning_rate": 3.500081739414746e-06, "loss": 0.0, "step": 18250 }, { "epoch": 2.688587296333076, "grad_norm": 0.0008910479955375195, "learning_rate": 3.4592120320418505e-06, "loss": 0.0003, "step": 18275 }, { "epoch": 2.6922652543307954, "grad_norm": 0.0010503758676350117, "learning_rate": 3.4183423246689554e-06, "loss": 0.0, "step": 18300 }, { "epoch": 2.6959432123285154, "grad_norm": 0.000730241066776216, "learning_rate": 3.37747261729606e-06, "loss": 0.0001, "step": 18325 }, { "epoch": 2.699621170326235, "grad_norm": 0.000822307774797082, "learning_rate": 3.336602909923165e-06, "loss": 0.0, "step": 18350 }, { "epoch": 2.7032991283239545, "grad_norm": 1.4722820520401, "learning_rate": 3.29573320255027e-06, "loss": 0.1083, "step": 18375 }, { "epoch": 2.7069770863216744, "grad_norm": 0.004885438829660416, "learning_rate": 3.254863495177375e-06, "loss": 0.0002, "step": 18400 }, { "epoch": 2.710655044319394, "grad_norm": 0.0033965399488806725, "learning_rate": 3.2139937878044794e-06, "loss": 0.0001, "step": 18425 }, { "epoch": 2.7143330023171135, "grad_norm": 0.004250906407833099, "learning_rate": 3.1731240804315843e-06, "loss": 0.0231, "step": 18450 }, { "epoch": 2.7180109603148335, "grad_norm": 0.003409018972888589, "learning_rate": 3.1322543730586893e-06, "loss": 0.0002, "step": 18475 }, { "epoch": 2.721688918312553, "grad_norm": 0.0036356241907924414, "learning_rate": 3.0913846656857938e-06, "loss": 0.0409, "step": 18500 }, { "epoch": 2.7253668763102725, "grad_norm": 0.006237304303795099, "learning_rate": 3.0505149583128983e-06, "loss": 0.0386, "step": 18525 }, { "epoch": 2.729044834307992, "grad_norm": 0.006783687509596348, "learning_rate": 3.0096452509400033e-06, "loss": 0.0002, "step": 18550 }, { "epoch": 2.732722792305712, "grad_norm": 0.04287054389715195, "learning_rate": 2.9687755435671082e-06, "loss": 0.0321, "step": 18575 }, { "epoch": 2.7364007503034316, "grad_norm": 0.0038001120556145906, "learning_rate": 2.9279058361942127e-06, "loss": 0.0003, "step": 18600 }, { "epoch": 2.740078708301151, "grad_norm": 0.003841620171442628, "learning_rate": 2.8870361288213177e-06, "loss": 0.0001, "step": 18625 }, { "epoch": 2.7437566662988706, "grad_norm": 0.002676568925380707, "learning_rate": 2.8461664214484226e-06, "loss": 0.0001, "step": 18650 }, { "epoch": 2.7474346242965906, "grad_norm": 0.008307211101055145, "learning_rate": 2.8052967140755276e-06, "loss": 0.0001, "step": 18675 }, { "epoch": 2.75111258229431, "grad_norm": 0.0034743708092719316, "learning_rate": 2.764427006702632e-06, "loss": 0.0001, "step": 18700 }, { "epoch": 2.7547905402920296, "grad_norm": 0.0020617684349417686, "learning_rate": 2.7235572993297367e-06, "loss": 0.0001, "step": 18725 }, { "epoch": 2.7584684982897496, "grad_norm": 0.0017286173533648252, "learning_rate": 2.6826875919568416e-06, "loss": 0.0001, "step": 18750 }, { "epoch": 2.762146456287469, "grad_norm": 0.001774169155396521, "learning_rate": 2.6418178845839466e-06, "loss": 0.0001, "step": 18775 }, { "epoch": 2.7658244142851887, "grad_norm": 0.003061393741518259, "learning_rate": 2.600948177211051e-06, "loss": 0.0298, "step": 18800 }, { "epoch": 2.7695023722829086, "grad_norm": 0.00195386353880167, "learning_rate": 2.560078469838156e-06, "loss": 0.0001, "step": 18825 }, { "epoch": 2.773180330280628, "grad_norm": 0.0015053004026412964, "learning_rate": 2.519208762465261e-06, "loss": 0.0001, "step": 18850 }, { "epoch": 2.7768582882783477, "grad_norm": 0.002827111864462495, "learning_rate": 2.4783390550923655e-06, "loss": 0.0001, "step": 18875 }, { "epoch": 2.7805362462760677, "grad_norm": 0.0010932940058410168, "learning_rate": 2.4374693477194705e-06, "loss": 0.0001, "step": 18900 }, { "epoch": 2.784214204273787, "grad_norm": 7.858973026275635, "learning_rate": 2.3965996403465754e-06, "loss": 0.0468, "step": 18925 }, { "epoch": 2.7878921622715067, "grad_norm": 0.002107949461787939, "learning_rate": 2.35572993297368e-06, "loss": 0.0001, "step": 18950 }, { "epoch": 2.7915701202692267, "grad_norm": 0.001860212185420096, "learning_rate": 2.3148602256007845e-06, "loss": 0.0001, "step": 18975 }, { "epoch": 2.795248078266946, "grad_norm": 0.002180658746510744, "learning_rate": 2.2739905182278894e-06, "loss": 0.0001, "step": 19000 }, { "epoch": 2.7989260362646657, "grad_norm": 0.001684672199189663, "learning_rate": 2.2331208108549944e-06, "loss": 0.0001, "step": 19025 }, { "epoch": 2.8026039942623857, "grad_norm": 0.0015821090200915933, "learning_rate": 2.1922511034820993e-06, "loss": 0.0001, "step": 19050 }, { "epoch": 2.8062819522601052, "grad_norm": 0.0031413165852427483, "learning_rate": 2.151381396109204e-06, "loss": 0.0222, "step": 19075 }, { "epoch": 2.8099599102578248, "grad_norm": 0.001654456602409482, "learning_rate": 2.110511688736309e-06, "loss": 0.0001, "step": 19100 }, { "epoch": 2.8136378682555447, "grad_norm": 0.0025208396837115288, "learning_rate": 2.0696419813634138e-06, "loss": 0.0297, "step": 19125 }, { "epoch": 2.8173158262532643, "grad_norm": 0.0016039038309827447, "learning_rate": 2.0287722739905183e-06, "loss": 0.0001, "step": 19150 }, { "epoch": 2.820993784250984, "grad_norm": 0.0015692878514528275, "learning_rate": 1.987902566617623e-06, "loss": 0.0001, "step": 19175 }, { "epoch": 2.8246717422487038, "grad_norm": 0.0014573705848306417, "learning_rate": 1.9470328592447278e-06, "loss": 0.0001, "step": 19200 }, { "epoch": 2.8283497002464233, "grad_norm": 0.005317123141139746, "learning_rate": 1.9061631518718325e-06, "loss": 0.0001, "step": 19225 }, { "epoch": 2.832027658244143, "grad_norm": 0.0014695243444293737, "learning_rate": 1.8652934444989374e-06, "loss": 0.0312, "step": 19250 }, { "epoch": 2.835705616241863, "grad_norm": 0.04826376587152481, "learning_rate": 1.8244237371260422e-06, "loss": 0.0263, "step": 19275 }, { "epoch": 2.8393835742395823, "grad_norm": 0.0012747733853757381, "learning_rate": 1.7835540297531471e-06, "loss": 0.0001, "step": 19300 }, { "epoch": 2.843061532237302, "grad_norm": 0.0011536297388374805, "learning_rate": 1.7426843223802519e-06, "loss": 0.025, "step": 19325 }, { "epoch": 2.8467394902350214, "grad_norm": 0.00559173384681344, "learning_rate": 1.7018146150073564e-06, "loss": 0.0001, "step": 19350 }, { "epoch": 2.8504174482327413, "grad_norm": 0.0011801973450928926, "learning_rate": 1.6609449076344614e-06, "loss": 0.0001, "step": 19375 }, { "epoch": 2.854095406230461, "grad_norm": 0.020327366888523102, "learning_rate": 1.620075200261566e-06, "loss": 0.0001, "step": 19400 }, { "epoch": 2.8577733642281804, "grad_norm": 0.0012536696158349514, "learning_rate": 1.579205492888671e-06, "loss": 0.0001, "step": 19425 }, { "epoch": 2.8614513222259, "grad_norm": 0.0010541353840380907, "learning_rate": 1.5383357855157758e-06, "loss": 0.0, "step": 19450 }, { "epoch": 2.86512928022362, "grad_norm": 0.0011492278426885605, "learning_rate": 1.4974660781428805e-06, "loss": 0.0001, "step": 19475 }, { "epoch": 2.8688072382213394, "grad_norm": 0.002121875062584877, "learning_rate": 1.4565963707699853e-06, "loss": 0.0339, "step": 19500 }, { "epoch": 2.872485196219059, "grad_norm": 0.0013062648940831423, "learning_rate": 1.4157266633970902e-06, "loss": 0.0001, "step": 19525 }, { "epoch": 2.876163154216779, "grad_norm": 0.0012365735601633787, "learning_rate": 1.374856956024195e-06, "loss": 0.0001, "step": 19550 }, { "epoch": 2.8798411122144985, "grad_norm": 0.001490547088906169, "learning_rate": 1.3339872486512997e-06, "loss": 0.0389, "step": 19575 }, { "epoch": 2.883519070212218, "grad_norm": 0.0010857345769181848, "learning_rate": 1.2931175412784044e-06, "loss": 0.0002, "step": 19600 }, { "epoch": 2.887197028209938, "grad_norm": 0.0016767021734267473, "learning_rate": 1.2522478339055092e-06, "loss": 0.0001, "step": 19625 }, { "epoch": 2.8908749862076575, "grad_norm": 0.004218839108943939, "learning_rate": 1.2113781265326141e-06, "loss": 0.0001, "step": 19650 }, { "epoch": 2.894552944205377, "grad_norm": 0.0010596220381557941, "learning_rate": 1.1705084191597189e-06, "loss": 0.0001, "step": 19675 }, { "epoch": 2.898230902203097, "grad_norm": 0.005758639425039291, "learning_rate": 1.1296387117868236e-06, "loss": 0.0001, "step": 19700 }, { "epoch": 2.9019088602008165, "grad_norm": 0.004077006597071886, "learning_rate": 1.0887690044139283e-06, "loss": 0.0001, "step": 19725 }, { "epoch": 2.905586818198536, "grad_norm": 0.023057300597429276, "learning_rate": 1.0478992970410333e-06, "loss": 0.0001, "step": 19750 }, { "epoch": 2.909264776196256, "grad_norm": 0.0010171595495194197, "learning_rate": 1.007029589668138e-06, "loss": 0.0002, "step": 19775 }, { "epoch": 2.9129427341939755, "grad_norm": 0.0021811590995639563, "learning_rate": 9.661598822952428e-07, "loss": 0.0018, "step": 19800 }, { "epoch": 2.916620692191695, "grad_norm": 0.0007530258735641837, "learning_rate": 9.252901749223475e-07, "loss": 0.0, "step": 19825 }, { "epoch": 2.920298650189415, "grad_norm": 0.0008248965605162084, "learning_rate": 8.844204675494524e-07, "loss": 0.0, "step": 19850 }, { "epoch": 2.9239766081871346, "grad_norm": 0.0008437008364126086, "learning_rate": 8.435507601765572e-07, "loss": 0.0001, "step": 19875 }, { "epoch": 2.927654566184854, "grad_norm": 0.0011598097626119852, "learning_rate": 8.026810528036619e-07, "loss": 0.029, "step": 19900 }, { "epoch": 2.931332524182574, "grad_norm": 0.000989201944321394, "learning_rate": 7.618113454307668e-07, "loss": 0.0001, "step": 19925 }, { "epoch": 2.9350104821802936, "grad_norm": 0.0009332878980785608, "learning_rate": 7.209416380578715e-07, "loss": 0.0001, "step": 19950 }, { "epoch": 2.938688440178013, "grad_norm": 0.0010302929440513253, "learning_rate": 6.800719306849764e-07, "loss": 0.0316, "step": 19975 }, { "epoch": 2.942366398175733, "grad_norm": 0.0011053696507588029, "learning_rate": 6.392022233120811e-07, "loss": 0.0001, "step": 20000 }, { "epoch": 2.9460443561734526, "grad_norm": 0.001087658922187984, "learning_rate": 5.983325159391858e-07, "loss": 0.0, "step": 20025 }, { "epoch": 2.949722314171172, "grad_norm": 0.0008900929242372513, "learning_rate": 5.574628085662906e-07, "loss": 0.0001, "step": 20050 }, { "epoch": 2.9534002721688917, "grad_norm": 0.001053415471687913, "learning_rate": 5.165931011933954e-07, "loss": 0.0, "step": 20075 }, { "epoch": 2.9570782301666116, "grad_norm": 0.0008429349982179701, "learning_rate": 4.757233938205003e-07, "loss": 0.0, "step": 20100 }, { "epoch": 2.960756188164331, "grad_norm": 0.0009649925632402301, "learning_rate": 4.34853686447605e-07, "loss": 0.0, "step": 20125 }, { "epoch": 2.9644341461620507, "grad_norm": 0.0009367198217660189, "learning_rate": 3.939839790747098e-07, "loss": 0.0002, "step": 20150 }, { "epoch": 2.9681121041597702, "grad_norm": 0.0008432368049398065, "learning_rate": 3.5311427170181465e-07, "loss": 0.0, "step": 20175 }, { "epoch": 2.97179006215749, "grad_norm": 2.9367611408233643, "learning_rate": 3.1224456432891944e-07, "loss": 0.0002, "step": 20200 }, { "epoch": 2.9754680201552097, "grad_norm": 0.0008842748356983066, "learning_rate": 2.7137485695602424e-07, "loss": 0.0001, "step": 20225 }, { "epoch": 2.9791459781529293, "grad_norm": 0.3803035616874695, "learning_rate": 2.30505149583129e-07, "loss": 0.0303, "step": 20250 }, { "epoch": 2.9828239361506492, "grad_norm": 0.001255788840353489, "learning_rate": 1.8963544221023377e-07, "loss": 0.0001, "step": 20275 }, { "epoch": 2.9865018941483688, "grad_norm": 0.0012517735594883561, "learning_rate": 1.4876573483733856e-07, "loss": 0.0001, "step": 20300 }, { "epoch": 2.9901798521460883, "grad_norm": 0.0008377633057534695, "learning_rate": 1.0789602746444335e-07, "loss": 0.0001, "step": 20325 }, { "epoch": 2.9938578101438083, "grad_norm": 0.0008699085447005928, "learning_rate": 6.702632009154815e-08, "loss": 0.0001, "step": 20350 }, { "epoch": 2.997535768141528, "grad_norm": 0.000927777262404561, "learning_rate": 2.6156612718652934e-08, "loss": 0.023, "step": 20375 }, { "epoch": 2.9998896612600685, "eval_accuracy": 0.9969841853622655, "eval_auc": 0.9999289486306174, "eval_f1": 0.9969837416317222, "eval_loss": 0.01777876727283001, "eval_precision": 0.9972038263428992, "eval_recall": 0.9967637540453075, "eval_runtime": 2385.463, "eval_samples_per_second": 5.699, "eval_steps_per_second": 1.425, "step": 20391 } ], "logging_steps": 25, "max_steps": 20391, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.073019505969152e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }