ChoudharyTAlhaArain's picture
Upload folder using huggingface_hub
f4b3654 verified
raw
history blame contribute delete
No virus
146 kB
{
"best_metric": 0.01777876727283001,
"best_model_checkpoint": "autotrain-vp92t-1q2id/checkpoint-20391",
"epoch": 2.9998896612600685,
"eval_steps": 500,
"global_step": 20391,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003677957997719666,
"grad_norm": 4.279318809509277,
"learning_rate": 3.6764705882352943e-07,
"loss": 0.685,
"step": 25
},
{
"epoch": 0.007355915995439332,
"grad_norm": 5.647529602050781,
"learning_rate": 7.352941176470589e-07,
"loss": 0.6757,
"step": 50
},
{
"epoch": 0.011033873993158997,
"grad_norm": 5.691235065460205,
"learning_rate": 1.1029411764705884e-06,
"loss": 0.6611,
"step": 75
},
{
"epoch": 0.014711831990878664,
"grad_norm": 3.6039223670959473,
"learning_rate": 1.4705882352941177e-06,
"loss": 0.6402,
"step": 100
},
{
"epoch": 0.018389789988598332,
"grad_norm": 5.447757244110107,
"learning_rate": 1.8382352941176471e-06,
"loss": 0.5745,
"step": 125
},
{
"epoch": 0.022067747986317995,
"grad_norm": 11.862848281860352,
"learning_rate": 2.2058823529411767e-06,
"loss": 0.4869,
"step": 150
},
{
"epoch": 0.02574570598403766,
"grad_norm": 8.085945129394531,
"learning_rate": 2.573529411764706e-06,
"loss": 0.3687,
"step": 175
},
{
"epoch": 0.029423663981757327,
"grad_norm": 7.246406555175781,
"learning_rate": 2.9411764705882355e-06,
"loss": 0.3029,
"step": 200
},
{
"epoch": 0.033101621979477,
"grad_norm": 17.822601318359375,
"learning_rate": 3.308823529411765e-06,
"loss": 0.2098,
"step": 225
},
{
"epoch": 0.036779579977196664,
"grad_norm": 21.39044952392578,
"learning_rate": 3.6764705882352942e-06,
"loss": 0.1483,
"step": 250
},
{
"epoch": 0.04045753797491632,
"grad_norm": 1.4089692831039429,
"learning_rate": 4.044117647058823e-06,
"loss": 0.0803,
"step": 275
},
{
"epoch": 0.04413549597263599,
"grad_norm": 0.7371423840522766,
"learning_rate": 4.411764705882353e-06,
"loss": 0.0396,
"step": 300
},
{
"epoch": 0.047813453970355656,
"grad_norm": 0.22398647665977478,
"learning_rate": 4.779411764705882e-06,
"loss": 0.0381,
"step": 325
},
{
"epoch": 0.05149141196807532,
"grad_norm": 0.25332173705101013,
"learning_rate": 5.147058823529412e-06,
"loss": 0.0306,
"step": 350
},
{
"epoch": 0.05516936996579499,
"grad_norm": 0.24578100442886353,
"learning_rate": 5.5147058823529415e-06,
"loss": 0.0342,
"step": 375
},
{
"epoch": 0.058847327963514655,
"grad_norm": 0.09213005006313324,
"learning_rate": 5.882352941176471e-06,
"loss": 0.0094,
"step": 400
},
{
"epoch": 0.06252528596123433,
"grad_norm": 11.745342254638672,
"learning_rate": 6.25e-06,
"loss": 0.0627,
"step": 425
},
{
"epoch": 0.066203243958954,
"grad_norm": 12.147088050842285,
"learning_rate": 6.61764705882353e-06,
"loss": 0.0755,
"step": 450
},
{
"epoch": 0.06988120195667366,
"grad_norm": 0.14864382147789001,
"learning_rate": 6.985294117647059e-06,
"loss": 0.0532,
"step": 475
},
{
"epoch": 0.07355915995439333,
"grad_norm": 0.06755024939775467,
"learning_rate": 7.3529411764705884e-06,
"loss": 0.049,
"step": 500
},
{
"epoch": 0.077237117952113,
"grad_norm": 0.0582403726875782,
"learning_rate": 7.720588235294117e-06,
"loss": 0.0213,
"step": 525
},
{
"epoch": 0.08091507594983265,
"grad_norm": 0.04456046596169472,
"learning_rate": 8.088235294117646e-06,
"loss": 0.0446,
"step": 550
},
{
"epoch": 0.08459303394755231,
"grad_norm": 0.058339089155197144,
"learning_rate": 8.455882352941177e-06,
"loss": 0.0442,
"step": 575
},
{
"epoch": 0.08827099194527198,
"grad_norm": 0.03951073810458183,
"learning_rate": 8.823529411764707e-06,
"loss": 0.0131,
"step": 600
},
{
"epoch": 0.09194894994299165,
"grad_norm": 0.08159155398607254,
"learning_rate": 9.191176470588236e-06,
"loss": 0.0429,
"step": 625
},
{
"epoch": 0.09562690794071131,
"grad_norm": 0.0362938717007637,
"learning_rate": 9.558823529411764e-06,
"loss": 0.0336,
"step": 650
},
{
"epoch": 0.09930486593843098,
"grad_norm": 0.10369551926851273,
"learning_rate": 9.926470588235293e-06,
"loss": 0.0277,
"step": 675
},
{
"epoch": 0.10298282393615064,
"grad_norm": 0.032908402383327484,
"learning_rate": 1.0294117647058824e-05,
"loss": 0.0213,
"step": 700
},
{
"epoch": 0.10666078193387031,
"grad_norm": 0.017092719674110413,
"learning_rate": 1.0661764705882354e-05,
"loss": 0.0784,
"step": 725
},
{
"epoch": 0.11033873993158998,
"grad_norm": 0.015081583522260189,
"learning_rate": 1.1029411764705883e-05,
"loss": 0.0265,
"step": 750
},
{
"epoch": 0.11401669792930964,
"grad_norm": 0.09147176891565323,
"learning_rate": 1.139705882352941e-05,
"loss": 0.0381,
"step": 775
},
{
"epoch": 0.11769465592702931,
"grad_norm": 0.08481771498918533,
"learning_rate": 1.1764705882352942e-05,
"loss": 0.128,
"step": 800
},
{
"epoch": 0.12137261392474898,
"grad_norm": 0.014765892177820206,
"learning_rate": 1.2132352941176471e-05,
"loss": 0.0141,
"step": 825
},
{
"epoch": 0.12505057192246866,
"grad_norm": 0.018918083980679512,
"learning_rate": 1.25e-05,
"loss": 0.0538,
"step": 850
},
{
"epoch": 0.12872852992018832,
"grad_norm": 0.015013976022601128,
"learning_rate": 1.2867647058823528e-05,
"loss": 0.0019,
"step": 875
},
{
"epoch": 0.132406487917908,
"grad_norm": 11.109874725341797,
"learning_rate": 1.323529411764706e-05,
"loss": 0.0394,
"step": 900
},
{
"epoch": 0.13608444591562766,
"grad_norm": 0.015857884660363197,
"learning_rate": 1.3602941176470589e-05,
"loss": 0.0005,
"step": 925
},
{
"epoch": 0.13976240391334732,
"grad_norm": 0.015862109139561653,
"learning_rate": 1.3970588235294118e-05,
"loss": 0.0214,
"step": 950
},
{
"epoch": 0.143440361911067,
"grad_norm": 0.020424969494342804,
"learning_rate": 1.4338235294117647e-05,
"loss": 0.0367,
"step": 975
},
{
"epoch": 0.14711831990878665,
"grad_norm": 0.027131319046020508,
"learning_rate": 1.4705882352941177e-05,
"loss": 0.08,
"step": 1000
},
{
"epoch": 0.15079627790650632,
"grad_norm": 0.03147580847144127,
"learning_rate": 1.5073529411764706e-05,
"loss": 0.0329,
"step": 1025
},
{
"epoch": 0.154474235904226,
"grad_norm": 24.15705680847168,
"learning_rate": 1.5441176470588234e-05,
"loss": 0.0064,
"step": 1050
},
{
"epoch": 0.15815219390194565,
"grad_norm": 0.011522412300109863,
"learning_rate": 1.5808823529411767e-05,
"loss": 0.0762,
"step": 1075
},
{
"epoch": 0.1618301518996653,
"grad_norm": 0.04401927441358566,
"learning_rate": 1.6176470588235293e-05,
"loss": 0.0656,
"step": 1100
},
{
"epoch": 0.16550810989738496,
"grad_norm": 0.034190475940704346,
"learning_rate": 1.6544117647058825e-05,
"loss": 0.0308,
"step": 1125
},
{
"epoch": 0.16918606789510462,
"grad_norm": 0.021350180730223656,
"learning_rate": 1.6911764705882355e-05,
"loss": 0.0539,
"step": 1150
},
{
"epoch": 0.1728640258928243,
"grad_norm": 0.0446242094039917,
"learning_rate": 1.727941176470588e-05,
"loss": 0.0681,
"step": 1175
},
{
"epoch": 0.17654198389054396,
"grad_norm": 1.6311242580413818,
"learning_rate": 1.7647058823529414e-05,
"loss": 0.0293,
"step": 1200
},
{
"epoch": 0.18021994188826362,
"grad_norm": 0.00914335809648037,
"learning_rate": 1.801470588235294e-05,
"loss": 0.0386,
"step": 1225
},
{
"epoch": 0.1838978998859833,
"grad_norm": 0.009417989291250706,
"learning_rate": 1.8382352941176472e-05,
"loss": 0.0004,
"step": 1250
},
{
"epoch": 0.18757585788370296,
"grad_norm": 0.9801831245422363,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.057,
"step": 1275
},
{
"epoch": 0.19125381588142262,
"grad_norm": 0.005170044023543596,
"learning_rate": 1.9117647058823528e-05,
"loss": 0.0002,
"step": 1300
},
{
"epoch": 0.1949317738791423,
"grad_norm": 0.03409629687666893,
"learning_rate": 1.948529411764706e-05,
"loss": 0.0394,
"step": 1325
},
{
"epoch": 0.19860973187686196,
"grad_norm": 0.008645136840641499,
"learning_rate": 1.9852941176470586e-05,
"loss": 0.0053,
"step": 1350
},
{
"epoch": 0.20228768987458162,
"grad_norm": 0.004454991314560175,
"learning_rate": 2.022058823529412e-05,
"loss": 0.0002,
"step": 1375
},
{
"epoch": 0.2059656478723013,
"grad_norm": 0.00453265942633152,
"learning_rate": 2.058823529411765e-05,
"loss": 0.0216,
"step": 1400
},
{
"epoch": 0.20964360587002095,
"grad_norm": 0.005607594270259142,
"learning_rate": 2.0955882352941175e-05,
"loss": 0.0206,
"step": 1425
},
{
"epoch": 0.21332156386774062,
"grad_norm": 0.002988673048093915,
"learning_rate": 2.1323529411764707e-05,
"loss": 0.0002,
"step": 1450
},
{
"epoch": 0.2169995218654603,
"grad_norm": 0.15066058933734894,
"learning_rate": 2.1691176470588237e-05,
"loss": 0.0002,
"step": 1475
},
{
"epoch": 0.22067747986317995,
"grad_norm": 0.2750360071659088,
"learning_rate": 2.2058823529411766e-05,
"loss": 0.0002,
"step": 1500
},
{
"epoch": 0.22435543786089962,
"grad_norm": 0.00299286050722003,
"learning_rate": 2.2426470588235296e-05,
"loss": 0.0004,
"step": 1525
},
{
"epoch": 0.2280333958586193,
"grad_norm": 0.004124614410102367,
"learning_rate": 2.279411764705882e-05,
"loss": 0.0001,
"step": 1550
},
{
"epoch": 0.23171135385633895,
"grad_norm": 0.5924888849258423,
"learning_rate": 2.3161764705882354e-05,
"loss": 0.1041,
"step": 1575
},
{
"epoch": 0.23538931185405862,
"grad_norm": 0.011225424706935883,
"learning_rate": 2.3529411764705884e-05,
"loss": 0.0528,
"step": 1600
},
{
"epoch": 0.23906726985177829,
"grad_norm": 0.008940880186855793,
"learning_rate": 2.389705882352941e-05,
"loss": 0.0972,
"step": 1625
},
{
"epoch": 0.24274522784949795,
"grad_norm": 0.008405734784901142,
"learning_rate": 2.4264705882352942e-05,
"loss": 0.0495,
"step": 1650
},
{
"epoch": 0.24642318584721762,
"grad_norm": 0.008656616322696209,
"learning_rate": 2.4632352941176472e-05,
"loss": 0.0216,
"step": 1675
},
{
"epoch": 0.2501011438449373,
"grad_norm": 0.02361353114247322,
"learning_rate": 2.5e-05,
"loss": 0.0385,
"step": 1700
},
{
"epoch": 0.25377910184265695,
"grad_norm": 0.05051364749670029,
"learning_rate": 2.536764705882353e-05,
"loss": 0.0814,
"step": 1725
},
{
"epoch": 0.25745705984037665,
"grad_norm": 0.00914950855076313,
"learning_rate": 2.5735294117647057e-05,
"loss": 0.0006,
"step": 1750
},
{
"epoch": 0.2611350178380963,
"grad_norm": 0.008635000325739384,
"learning_rate": 2.610294117647059e-05,
"loss": 0.0003,
"step": 1775
},
{
"epoch": 0.264812975835816,
"grad_norm": 0.005725966300815344,
"learning_rate": 2.647058823529412e-05,
"loss": 0.0002,
"step": 1800
},
{
"epoch": 0.2684909338335356,
"grad_norm": 0.014133188873529434,
"learning_rate": 2.6838235294117648e-05,
"loss": 0.0562,
"step": 1825
},
{
"epoch": 0.2721688918312553,
"grad_norm": 0.0024135392159223557,
"learning_rate": 2.7205882352941177e-05,
"loss": 0.003,
"step": 1850
},
{
"epoch": 0.27584684982897495,
"grad_norm": 0.03300013393163681,
"learning_rate": 2.7573529411764707e-05,
"loss": 0.0947,
"step": 1875
},
{
"epoch": 0.27952480782669464,
"grad_norm": 0.046893417835235596,
"learning_rate": 2.7941176470588236e-05,
"loss": 0.0613,
"step": 1900
},
{
"epoch": 0.2832027658244143,
"grad_norm": 0.009986027143895626,
"learning_rate": 2.8308823529411766e-05,
"loss": 0.0616,
"step": 1925
},
{
"epoch": 0.286880723822134,
"grad_norm": 0.05905308201909065,
"learning_rate": 2.8676470588235295e-05,
"loss": 0.1182,
"step": 1950
},
{
"epoch": 0.2905586818198536,
"grad_norm": 0.00858025811612606,
"learning_rate": 2.9044117647058824e-05,
"loss": 0.0007,
"step": 1975
},
{
"epoch": 0.2942366398175733,
"grad_norm": 95.36400604248047,
"learning_rate": 2.9411764705882354e-05,
"loss": 0.0869,
"step": 2000
},
{
"epoch": 0.29791459781529295,
"grad_norm": 0.005693793762475252,
"learning_rate": 2.9779411764705883e-05,
"loss": 0.0348,
"step": 2025
},
{
"epoch": 0.30159255581301264,
"grad_norm": 0.009921176359057426,
"learning_rate": 2.998365211705084e-05,
"loss": 0.0514,
"step": 2050
},
{
"epoch": 0.3052705138107323,
"grad_norm": 0.01893715187907219,
"learning_rate": 2.9942782409677946e-05,
"loss": 0.0575,
"step": 2075
},
{
"epoch": 0.308948471808452,
"grad_norm": 0.012356853112578392,
"learning_rate": 2.990191270230505e-05,
"loss": 0.0085,
"step": 2100
},
{
"epoch": 0.3126264298061716,
"grad_norm": 0.027572082355618477,
"learning_rate": 2.9861042994932156e-05,
"loss": 0.0071,
"step": 2125
},
{
"epoch": 0.3163043878038913,
"grad_norm": 0.0033696063328534365,
"learning_rate": 2.9820173287559262e-05,
"loss": 0.0153,
"step": 2150
},
{
"epoch": 0.31998234580161095,
"grad_norm": 0.0032816240563988686,
"learning_rate": 2.9779303580186367e-05,
"loss": 0.0425,
"step": 2175
},
{
"epoch": 0.3236603037993306,
"grad_norm": 0.013268062844872475,
"learning_rate": 2.9738433872813472e-05,
"loss": 0.0529,
"step": 2200
},
{
"epoch": 0.3273382617970503,
"grad_norm": 13.58910083770752,
"learning_rate": 2.9697564165440574e-05,
"loss": 0.1147,
"step": 2225
},
{
"epoch": 0.3310162197947699,
"grad_norm": 80.59501647949219,
"learning_rate": 2.965669445806768e-05,
"loss": 0.0043,
"step": 2250
},
{
"epoch": 0.3346941777924896,
"grad_norm": 81.63990020751953,
"learning_rate": 2.9615824750694785e-05,
"loss": 0.0355,
"step": 2275
},
{
"epoch": 0.33837213579020925,
"grad_norm": 0.014708608388900757,
"learning_rate": 2.957495504332189e-05,
"loss": 0.0668,
"step": 2300
},
{
"epoch": 0.34205009378792894,
"grad_norm": 0.015443817712366581,
"learning_rate": 2.9534085335948996e-05,
"loss": 0.0006,
"step": 2325
},
{
"epoch": 0.3457280517856486,
"grad_norm": 0.005841911304742098,
"learning_rate": 2.94932156285761e-05,
"loss": 0.0487,
"step": 2350
},
{
"epoch": 0.3494060097833683,
"grad_norm": 0.06746743619441986,
"learning_rate": 2.9452345921203207e-05,
"loss": 0.0229,
"step": 2375
},
{
"epoch": 0.3530839677810879,
"grad_norm": 0.002315863035619259,
"learning_rate": 2.941147621383031e-05,
"loss": 0.0003,
"step": 2400
},
{
"epoch": 0.3567619257788076,
"grad_norm": 0.0017154604429379106,
"learning_rate": 2.9370606506457414e-05,
"loss": 0.0308,
"step": 2425
},
{
"epoch": 0.36043988377652725,
"grad_norm": 0.009691163897514343,
"learning_rate": 2.932973679908452e-05,
"loss": 0.0256,
"step": 2450
},
{
"epoch": 0.36411784177424694,
"grad_norm": 0.011957678943872452,
"learning_rate": 2.9288867091711625e-05,
"loss": 0.0713,
"step": 2475
},
{
"epoch": 0.3677957997719666,
"grad_norm": 0.02373524010181427,
"learning_rate": 2.924799738433873e-05,
"loss": 0.0504,
"step": 2500
},
{
"epoch": 0.3714737577696863,
"grad_norm": 0.0031693673226982355,
"learning_rate": 2.9207127676965836e-05,
"loss": 0.0251,
"step": 2525
},
{
"epoch": 0.3751517157674059,
"grad_norm": 0.025995498523116112,
"learning_rate": 2.916625796959294e-05,
"loss": 0.0459,
"step": 2550
},
{
"epoch": 0.3788296737651256,
"grad_norm": 0.006027763709425926,
"learning_rate": 2.9125388262220043e-05,
"loss": 0.0013,
"step": 2575
},
{
"epoch": 0.38250763176284525,
"grad_norm": 0.014026220887899399,
"learning_rate": 2.9084518554847148e-05,
"loss": 0.0831,
"step": 2600
},
{
"epoch": 0.38618558976056494,
"grad_norm": 0.025293108075857162,
"learning_rate": 2.9043648847474254e-05,
"loss": 0.0275,
"step": 2625
},
{
"epoch": 0.3898635477582846,
"grad_norm": 0.0041050901636481285,
"learning_rate": 2.900277914010136e-05,
"loss": 0.0007,
"step": 2650
},
{
"epoch": 0.3935415057560043,
"grad_norm": 0.027650628238916397,
"learning_rate": 2.8961909432728464e-05,
"loss": 0.0002,
"step": 2675
},
{
"epoch": 0.3972194637537239,
"grad_norm": 0.06839253753423691,
"learning_rate": 2.8921039725355566e-05,
"loss": 0.0353,
"step": 2700
},
{
"epoch": 0.4008974217514436,
"grad_norm": 0.005996390245854855,
"learning_rate": 2.8880170017982672e-05,
"loss": 0.0134,
"step": 2725
},
{
"epoch": 0.40457537974916324,
"grad_norm": 0.0020017025526612997,
"learning_rate": 2.8839300310609774e-05,
"loss": 0.0002,
"step": 2750
},
{
"epoch": 0.40825333774688294,
"grad_norm": 0.04533281922340393,
"learning_rate": 2.879843060323688e-05,
"loss": 0.0367,
"step": 2775
},
{
"epoch": 0.4119312957446026,
"grad_norm": 0.005575124174356461,
"learning_rate": 2.8757560895863984e-05,
"loss": 0.064,
"step": 2800
},
{
"epoch": 0.41560925374232227,
"grad_norm": 0.0019691460765898228,
"learning_rate": 2.871669118849109e-05,
"loss": 0.0005,
"step": 2825
},
{
"epoch": 0.4192872117400419,
"grad_norm": 0.004097863100469112,
"learning_rate": 2.8675821481118195e-05,
"loss": 0.0557,
"step": 2850
},
{
"epoch": 0.4229651697377616,
"grad_norm": 0.0018194678705185652,
"learning_rate": 2.86349517737453e-05,
"loss": 0.0009,
"step": 2875
},
{
"epoch": 0.42664312773548124,
"grad_norm": 0.006679282058030367,
"learning_rate": 2.8594082066372406e-05,
"loss": 0.0882,
"step": 2900
},
{
"epoch": 0.43032108573320094,
"grad_norm": 0.0030163535848259926,
"learning_rate": 2.8553212358999508e-05,
"loss": 0.0005,
"step": 2925
},
{
"epoch": 0.4339990437309206,
"grad_norm": 0.2035808265209198,
"learning_rate": 2.8512342651626613e-05,
"loss": 0.0007,
"step": 2950
},
{
"epoch": 0.43767700172864027,
"grad_norm": 0.022791976109147072,
"learning_rate": 2.847147294425372e-05,
"loss": 0.0941,
"step": 2975
},
{
"epoch": 0.4413549597263599,
"grad_norm": 0.007299873046576977,
"learning_rate": 2.8430603236880824e-05,
"loss": 0.0511,
"step": 3000
},
{
"epoch": 0.4450329177240796,
"grad_norm": 0.003951882012188435,
"learning_rate": 2.838973352950793e-05,
"loss": 0.0096,
"step": 3025
},
{
"epoch": 0.44871087572179924,
"grad_norm": 0.009184204041957855,
"learning_rate": 2.8348863822135035e-05,
"loss": 0.0372,
"step": 3050
},
{
"epoch": 0.45238883371951893,
"grad_norm": 0.032343972474336624,
"learning_rate": 2.830799411476214e-05,
"loss": 0.0197,
"step": 3075
},
{
"epoch": 0.4560667917172386,
"grad_norm": 0.002111822599545121,
"learning_rate": 2.8267124407389246e-05,
"loss": 0.0004,
"step": 3100
},
{
"epoch": 0.45974474971495827,
"grad_norm": 0.0024695848114788532,
"learning_rate": 2.8226254700016348e-05,
"loss": 0.0757,
"step": 3125
},
{
"epoch": 0.4634227077126779,
"grad_norm": 0.001880451338365674,
"learning_rate": 2.8185384992643453e-05,
"loss": 0.0181,
"step": 3150
},
{
"epoch": 0.4671006657103976,
"grad_norm": 0.02519827149808407,
"learning_rate": 2.814451528527056e-05,
"loss": 0.0006,
"step": 3175
},
{
"epoch": 0.47077862370811724,
"grad_norm": 0.0016058700857684016,
"learning_rate": 2.8103645577897664e-05,
"loss": 0.0088,
"step": 3200
},
{
"epoch": 0.47445658170583693,
"grad_norm": 0.02888200432062149,
"learning_rate": 2.806277587052477e-05,
"loss": 0.0496,
"step": 3225
},
{
"epoch": 0.47813453970355657,
"grad_norm": 0.0747719258069992,
"learning_rate": 2.8021906163151874e-05,
"loss": 0.1338,
"step": 3250
},
{
"epoch": 0.48181249770127627,
"grad_norm": 4.114503860473633,
"learning_rate": 2.798103645577898e-05,
"loss": 0.063,
"step": 3275
},
{
"epoch": 0.4854904556989959,
"grad_norm": 0.10994021594524384,
"learning_rate": 2.7940166748406082e-05,
"loss": 0.0525,
"step": 3300
},
{
"epoch": 0.4891684136967156,
"grad_norm": 0.016218269243836403,
"learning_rate": 2.7899297041033187e-05,
"loss": 0.0547,
"step": 3325
},
{
"epoch": 0.49284637169443524,
"grad_norm": 0.08570988476276398,
"learning_rate": 2.7858427333660293e-05,
"loss": 0.0551,
"step": 3350
},
{
"epoch": 0.49652432969215493,
"grad_norm": 0.013475511223077774,
"learning_rate": 2.7817557626287398e-05,
"loss": 0.0445,
"step": 3375
},
{
"epoch": 0.5002022876898746,
"grad_norm": 0.006590835750102997,
"learning_rate": 2.7776687918914503e-05,
"loss": 0.0005,
"step": 3400
},
{
"epoch": 0.5038802456875943,
"grad_norm": 0.004501729272305965,
"learning_rate": 2.773581821154161e-05,
"loss": 0.0219,
"step": 3425
},
{
"epoch": 0.5075582036853139,
"grad_norm": 0.06999096274375916,
"learning_rate": 2.769494850416871e-05,
"loss": 0.0569,
"step": 3450
},
{
"epoch": 0.5112361616830335,
"grad_norm": 0.003883685451000929,
"learning_rate": 2.7654078796795813e-05,
"loss": 0.001,
"step": 3475
},
{
"epoch": 0.5149141196807533,
"grad_norm": 0.0029312793631106615,
"learning_rate": 2.7613209089422918e-05,
"loss": 0.0229,
"step": 3500
},
{
"epoch": 0.5185920776784729,
"grad_norm": 0.006315870210528374,
"learning_rate": 2.7572339382050023e-05,
"loss": 0.0731,
"step": 3525
},
{
"epoch": 0.5222700356761926,
"grad_norm": 0.0030722382944077253,
"learning_rate": 2.753146967467713e-05,
"loss": 0.0392,
"step": 3550
},
{
"epoch": 0.5259479936739122,
"grad_norm": 0.005796592216938734,
"learning_rate": 2.7490599967304234e-05,
"loss": 0.0139,
"step": 3575
},
{
"epoch": 0.529625951671632,
"grad_norm": 0.6967291831970215,
"learning_rate": 2.744973025993134e-05,
"loss": 0.0325,
"step": 3600
},
{
"epoch": 0.5333039096693516,
"grad_norm": 0.017705194652080536,
"learning_rate": 2.7408860552558445e-05,
"loss": 0.0433,
"step": 3625
},
{
"epoch": 0.5369818676670712,
"grad_norm": 0.020230021327733994,
"learning_rate": 2.7367990845185547e-05,
"loss": 0.0007,
"step": 3650
},
{
"epoch": 0.5406598256647909,
"grad_norm": 0.0023030710872262716,
"learning_rate": 2.7327121137812652e-05,
"loss": 0.0002,
"step": 3675
},
{
"epoch": 0.5443377836625106,
"grad_norm": 0.0020703673362731934,
"learning_rate": 2.7286251430439758e-05,
"loss": 0.0002,
"step": 3700
},
{
"epoch": 0.5480157416602303,
"grad_norm": 0.002691243775188923,
"learning_rate": 2.7245381723066863e-05,
"loss": 0.0005,
"step": 3725
},
{
"epoch": 0.5516936996579499,
"grad_norm": 0.002691768342629075,
"learning_rate": 2.720451201569397e-05,
"loss": 0.0957,
"step": 3750
},
{
"epoch": 0.5553716576556695,
"grad_norm": 0.05962933972477913,
"learning_rate": 2.7163642308321074e-05,
"loss": 0.1317,
"step": 3775
},
{
"epoch": 0.5590496156533893,
"grad_norm": 0.06475093215703964,
"learning_rate": 2.712277260094818e-05,
"loss": 0.0257,
"step": 3800
},
{
"epoch": 0.5627275736511089,
"grad_norm": 0.0121241370216012,
"learning_rate": 2.708190289357528e-05,
"loss": 0.0609,
"step": 3825
},
{
"epoch": 0.5664055316488286,
"grad_norm": 0.007753327488899231,
"learning_rate": 2.7041033186202387e-05,
"loss": 0.0008,
"step": 3850
},
{
"epoch": 0.5700834896465482,
"grad_norm": 0.005270315799862146,
"learning_rate": 2.7000163478829492e-05,
"loss": 0.0002,
"step": 3875
},
{
"epoch": 0.573761447644268,
"grad_norm": 0.004358434583991766,
"learning_rate": 2.6959293771456597e-05,
"loss": 0.0174,
"step": 3900
},
{
"epoch": 0.5774394056419876,
"grad_norm": 0.003769191913306713,
"learning_rate": 2.6918424064083703e-05,
"loss": 0.0513,
"step": 3925
},
{
"epoch": 0.5811173636397072,
"grad_norm": 0.0043784258887171745,
"learning_rate": 2.6877554356710808e-05,
"loss": 0.0185,
"step": 3950
},
{
"epoch": 0.5847953216374269,
"grad_norm": 0.004602793138474226,
"learning_rate": 2.6836684649337913e-05,
"loss": 0.0236,
"step": 3975
},
{
"epoch": 0.5884732796351466,
"grad_norm": 0.002638947917148471,
"learning_rate": 2.679581494196502e-05,
"loss": 0.0155,
"step": 4000
},
{
"epoch": 0.5921512376328663,
"grad_norm": 0.002830574056133628,
"learning_rate": 2.675494523459212e-05,
"loss": 0.0161,
"step": 4025
},
{
"epoch": 0.5958291956305859,
"grad_norm": 0.015412558801472187,
"learning_rate": 2.6714075527219226e-05,
"loss": 0.131,
"step": 4050
},
{
"epoch": 0.5995071536283055,
"grad_norm": 0.016349300742149353,
"learning_rate": 2.667320581984633e-05,
"loss": 0.0132,
"step": 4075
},
{
"epoch": 0.6031851116260253,
"grad_norm": 0.013236075639724731,
"learning_rate": 2.6632336112473437e-05,
"loss": 0.0005,
"step": 4100
},
{
"epoch": 0.6068630696237449,
"grad_norm": 0.007088659331202507,
"learning_rate": 2.6591466405100542e-05,
"loss": 0.0004,
"step": 4125
},
{
"epoch": 0.6105410276214646,
"grad_norm": 0.02121301181614399,
"learning_rate": 2.6550596697727648e-05,
"loss": 0.0397,
"step": 4150
},
{
"epoch": 0.6142189856191842,
"grad_norm": 0.030070917680859566,
"learning_rate": 2.650972699035475e-05,
"loss": 0.0754,
"step": 4175
},
{
"epoch": 0.617896943616904,
"grad_norm": 10.173595428466797,
"learning_rate": 2.646885728298185e-05,
"loss": 0.0336,
"step": 4200
},
{
"epoch": 0.6215749016146236,
"grad_norm": 0.014447388239204884,
"learning_rate": 2.6427987575608957e-05,
"loss": 0.003,
"step": 4225
},
{
"epoch": 0.6252528596123432,
"grad_norm": 0.012096612714231014,
"learning_rate": 2.6387117868236062e-05,
"loss": 0.0481,
"step": 4250
},
{
"epoch": 0.6289308176100629,
"grad_norm": 0.02047719806432724,
"learning_rate": 2.6346248160863168e-05,
"loss": 0.051,
"step": 4275
},
{
"epoch": 0.6326087756077826,
"grad_norm": 0.01152089238166809,
"learning_rate": 2.6305378453490273e-05,
"loss": 0.011,
"step": 4300
},
{
"epoch": 0.6362867336055023,
"grad_norm": 0.01178329810500145,
"learning_rate": 2.626450874611738e-05,
"loss": 0.0187,
"step": 4325
},
{
"epoch": 0.6399646916032219,
"grad_norm": 0.012962247245013714,
"learning_rate": 2.6223639038744484e-05,
"loss": 0.041,
"step": 4350
},
{
"epoch": 0.6436426496009415,
"grad_norm": 0.012993029318749905,
"learning_rate": 2.6182769331371586e-05,
"loss": 0.03,
"step": 4375
},
{
"epoch": 0.6473206075986612,
"grad_norm": 0.01311455201357603,
"learning_rate": 2.614189962399869e-05,
"loss": 0.0421,
"step": 4400
},
{
"epoch": 0.6509985655963809,
"grad_norm": 0.022407829761505127,
"learning_rate": 2.6101029916625797e-05,
"loss": 0.0312,
"step": 4425
},
{
"epoch": 0.6546765235941006,
"grad_norm": 0.007614122703671455,
"learning_rate": 2.6060160209252902e-05,
"loss": 0.0014,
"step": 4450
},
{
"epoch": 0.6583544815918202,
"grad_norm": 0.006891134660691023,
"learning_rate": 2.6019290501880007e-05,
"loss": 0.0966,
"step": 4475
},
{
"epoch": 0.6620324395895398,
"grad_norm": 0.026897389441728592,
"learning_rate": 2.5978420794507113e-05,
"loss": 0.0387,
"step": 4500
},
{
"epoch": 0.6657103975872596,
"grad_norm": 0.013364088721573353,
"learning_rate": 2.5937551087134218e-05,
"loss": 0.0007,
"step": 4525
},
{
"epoch": 0.6693883555849792,
"grad_norm": 0.006984102539718151,
"learning_rate": 2.589668137976132e-05,
"loss": 0.0008,
"step": 4550
},
{
"epoch": 0.6730663135826989,
"grad_norm": 0.005882107652723789,
"learning_rate": 2.5855811672388425e-05,
"loss": 0.0003,
"step": 4575
},
{
"epoch": 0.6767442715804185,
"grad_norm": 0.008882598020136356,
"learning_rate": 2.581494196501553e-05,
"loss": 0.0389,
"step": 4600
},
{
"epoch": 0.6804222295781382,
"grad_norm": 0.01086785364896059,
"learning_rate": 2.5774072257642636e-05,
"loss": 0.0305,
"step": 4625
},
{
"epoch": 0.6841001875758579,
"grad_norm": 0.005837304517626762,
"learning_rate": 2.573320255026974e-05,
"loss": 0.0277,
"step": 4650
},
{
"epoch": 0.6877781455735775,
"grad_norm": 0.006613869220018387,
"learning_rate": 2.5692332842896847e-05,
"loss": 0.0003,
"step": 4675
},
{
"epoch": 0.6914561035712972,
"grad_norm": 0.012274155393242836,
"learning_rate": 2.5651463135523952e-05,
"loss": 0.0383,
"step": 4700
},
{
"epoch": 0.6951340615690169,
"grad_norm": 0.0031378071289509535,
"learning_rate": 2.5610593428151054e-05,
"loss": 0.0065,
"step": 4725
},
{
"epoch": 0.6988120195667366,
"grad_norm": 0.12304351478815079,
"learning_rate": 2.556972372077816e-05,
"loss": 0.0103,
"step": 4750
},
{
"epoch": 0.7024899775644562,
"grad_norm": 0.005349988583475351,
"learning_rate": 2.5528854013405265e-05,
"loss": 0.0292,
"step": 4775
},
{
"epoch": 0.7061679355621758,
"grad_norm": 0.0023686892818659544,
"learning_rate": 2.548798430603237e-05,
"loss": 0.0169,
"step": 4800
},
{
"epoch": 0.7098458935598956,
"grad_norm": 0.0018137163715437055,
"learning_rate": 2.5447114598659476e-05,
"loss": 0.0444,
"step": 4825
},
{
"epoch": 0.7135238515576152,
"grad_norm": 0.0029049592558294535,
"learning_rate": 2.540624489128658e-05,
"loss": 0.0591,
"step": 4850
},
{
"epoch": 0.7172018095553349,
"grad_norm": 0.0024209930561482906,
"learning_rate": 2.5365375183913687e-05,
"loss": 0.0311,
"step": 4875
},
{
"epoch": 0.7208797675530545,
"grad_norm": 62.02742385864258,
"learning_rate": 2.532450547654079e-05,
"loss": 0.0324,
"step": 4900
},
{
"epoch": 0.7245577255507742,
"grad_norm": 0.002258418360725045,
"learning_rate": 2.528363576916789e-05,
"loss": 0.0003,
"step": 4925
},
{
"epoch": 0.7282356835484939,
"grad_norm": 0.001804179628379643,
"learning_rate": 2.5242766061794996e-05,
"loss": 0.0499,
"step": 4950
},
{
"epoch": 0.7319136415462135,
"grad_norm": 0.33615773916244507,
"learning_rate": 2.52018963544221e-05,
"loss": 0.0005,
"step": 4975
},
{
"epoch": 0.7355915995439332,
"grad_norm": 0.0010956133482977748,
"learning_rate": 2.5161026647049207e-05,
"loss": 0.0008,
"step": 5000
},
{
"epoch": 0.7392695575416529,
"grad_norm": 0.0012902173912152648,
"learning_rate": 2.5120156939676312e-05,
"loss": 0.0273,
"step": 5025
},
{
"epoch": 0.7429475155393725,
"grad_norm": 0.013881388120353222,
"learning_rate": 2.5079287232303417e-05,
"loss": 0.0488,
"step": 5050
},
{
"epoch": 0.7466254735370922,
"grad_norm": 0.011136908084154129,
"learning_rate": 2.5038417524930523e-05,
"loss": 0.0003,
"step": 5075
},
{
"epoch": 0.7503034315348118,
"grad_norm": 0.020626788958907127,
"learning_rate": 2.4997547817557625e-05,
"loss": 0.0621,
"step": 5100
},
{
"epoch": 0.7539813895325316,
"grad_norm": 0.039804015308618546,
"learning_rate": 2.495667811018473e-05,
"loss": 0.0941,
"step": 5125
},
{
"epoch": 0.7576593475302512,
"grad_norm": 0.019914086908102036,
"learning_rate": 2.4915808402811835e-05,
"loss": 0.0021,
"step": 5150
},
{
"epoch": 0.7613373055279709,
"grad_norm": 0.027103891596198082,
"learning_rate": 2.487493869543894e-05,
"loss": 0.0375,
"step": 5175
},
{
"epoch": 0.7650152635256905,
"grad_norm": 0.008572924882173538,
"learning_rate": 2.4834068988066046e-05,
"loss": 0.0007,
"step": 5200
},
{
"epoch": 0.7686932215234102,
"grad_norm": 0.011288322508335114,
"learning_rate": 2.479319928069315e-05,
"loss": 0.0339,
"step": 5225
},
{
"epoch": 0.7723711795211299,
"grad_norm": 11.5412015914917,
"learning_rate": 2.4752329573320257e-05,
"loss": 0.0414,
"step": 5250
},
{
"epoch": 0.7760491375188495,
"grad_norm": 0.016787946224212646,
"learning_rate": 2.471145986594736e-05,
"loss": 0.0817,
"step": 5275
},
{
"epoch": 0.7797270955165692,
"grad_norm": 0.3828181326389313,
"learning_rate": 2.4670590158574464e-05,
"loss": 0.0013,
"step": 5300
},
{
"epoch": 0.7834050535142889,
"grad_norm": 0.00423394562676549,
"learning_rate": 2.462972045120157e-05,
"loss": 0.0009,
"step": 5325
},
{
"epoch": 0.7870830115120085,
"grad_norm": 0.0038542733527719975,
"learning_rate": 2.4588850743828675e-05,
"loss": 0.0006,
"step": 5350
},
{
"epoch": 0.7907609695097282,
"grad_norm": 0.002444320358335972,
"learning_rate": 2.454798103645578e-05,
"loss": 0.0208,
"step": 5375
},
{
"epoch": 0.7944389275074478,
"grad_norm": 0.06983044743537903,
"learning_rate": 2.4507111329082886e-05,
"loss": 0.0611,
"step": 5400
},
{
"epoch": 0.7981168855051676,
"grad_norm": 0.0033168047666549683,
"learning_rate": 2.446624162170999e-05,
"loss": 0.0016,
"step": 5425
},
{
"epoch": 0.8017948435028872,
"grad_norm": 0.0031268312595784664,
"learning_rate": 2.4425371914337093e-05,
"loss": 0.0119,
"step": 5450
},
{
"epoch": 0.8054728015006068,
"grad_norm": 0.0019544719252735376,
"learning_rate": 2.43845022069642e-05,
"loss": 0.0337,
"step": 5475
},
{
"epoch": 0.8091507594983265,
"grad_norm": 0.017085539177060127,
"learning_rate": 2.4343632499591304e-05,
"loss": 0.0776,
"step": 5500
},
{
"epoch": 0.8128287174960462,
"grad_norm": 0.916741669178009,
"learning_rate": 2.430276279221841e-05,
"loss": 0.0009,
"step": 5525
},
{
"epoch": 0.8165066754937659,
"grad_norm": 0.0018967619398608804,
"learning_rate": 2.4261893084845515e-05,
"loss": 0.0005,
"step": 5550
},
{
"epoch": 0.8201846334914855,
"grad_norm": 0.002946459921076894,
"learning_rate": 2.422102337747262e-05,
"loss": 0.052,
"step": 5575
},
{
"epoch": 0.8238625914892052,
"grad_norm": 0.00457314308732748,
"learning_rate": 2.4180153670099725e-05,
"loss": 0.0386,
"step": 5600
},
{
"epoch": 0.8275405494869249,
"grad_norm": 0.00886601209640503,
"learning_rate": 2.4139283962726827e-05,
"loss": 0.0003,
"step": 5625
},
{
"epoch": 0.8312185074846445,
"grad_norm": 0.004110053181648254,
"learning_rate": 2.409841425535393e-05,
"loss": 0.0002,
"step": 5650
},
{
"epoch": 0.8348964654823642,
"grad_norm": 0.002550973556935787,
"learning_rate": 2.4057544547981035e-05,
"loss": 0.0893,
"step": 5675
},
{
"epoch": 0.8385744234800838,
"grad_norm": 0.0047971270978450775,
"learning_rate": 2.401667484060814e-05,
"loss": 0.0649,
"step": 5700
},
{
"epoch": 0.8422523814778036,
"grad_norm": 22.79808235168457,
"learning_rate": 2.3975805133235246e-05,
"loss": 0.0509,
"step": 5725
},
{
"epoch": 0.8459303394755232,
"grad_norm": 0.02145661599934101,
"learning_rate": 2.393493542586235e-05,
"loss": 0.0456,
"step": 5750
},
{
"epoch": 0.8496082974732428,
"grad_norm": 0.8593617081642151,
"learning_rate": 2.3894065718489456e-05,
"loss": 0.0727,
"step": 5775
},
{
"epoch": 0.8532862554709625,
"grad_norm": 0.004426372237503529,
"learning_rate": 2.385319601111656e-05,
"loss": 0.0004,
"step": 5800
},
{
"epoch": 0.8569642134686822,
"grad_norm": 0.0030661604832857847,
"learning_rate": 2.3812326303743664e-05,
"loss": 0.0001,
"step": 5825
},
{
"epoch": 0.8606421714664019,
"grad_norm": 0.0062530264258384705,
"learning_rate": 2.377145659637077e-05,
"loss": 0.044,
"step": 5850
},
{
"epoch": 0.8643201294641215,
"grad_norm": 0.023851774632930756,
"learning_rate": 2.3730586888997874e-05,
"loss": 0.0412,
"step": 5875
},
{
"epoch": 0.8679980874618412,
"grad_norm": 0.013338697142899036,
"learning_rate": 2.368971718162498e-05,
"loss": 0.0006,
"step": 5900
},
{
"epoch": 0.8716760454595609,
"grad_norm": 0.01904129609465599,
"learning_rate": 2.3648847474252085e-05,
"loss": 0.0726,
"step": 5925
},
{
"epoch": 0.8753540034572805,
"grad_norm": 0.010262302123010159,
"learning_rate": 2.360797776687919e-05,
"loss": 0.0087,
"step": 5950
},
{
"epoch": 0.8790319614550002,
"grad_norm": 0.006104280706495047,
"learning_rate": 2.3567108059506296e-05,
"loss": 0.0004,
"step": 5975
},
{
"epoch": 0.8827099194527198,
"grad_norm": 0.019870450720191002,
"learning_rate": 2.3526238352133398e-05,
"loss": 0.0795,
"step": 6000
},
{
"epoch": 0.8863878774504396,
"grad_norm": 0.021579677239060402,
"learning_rate": 2.3485368644760503e-05,
"loss": 0.0009,
"step": 6025
},
{
"epoch": 0.8900658354481592,
"grad_norm": 0.007828918285667896,
"learning_rate": 2.344449893738761e-05,
"loss": 0.0017,
"step": 6050
},
{
"epoch": 0.8937437934458788,
"grad_norm": 0.006341638043522835,
"learning_rate": 2.3403629230014714e-05,
"loss": 0.0198,
"step": 6075
},
{
"epoch": 0.8974217514435985,
"grad_norm": 0.004665954038500786,
"learning_rate": 2.336275952264182e-05,
"loss": 0.0002,
"step": 6100
},
{
"epoch": 0.9010997094413182,
"grad_norm": 0.0059740557335317135,
"learning_rate": 2.3321889815268925e-05,
"loss": 0.0398,
"step": 6125
},
{
"epoch": 0.9047776674390379,
"grad_norm": 0.09372496604919434,
"learning_rate": 2.328102010789603e-05,
"loss": 0.0596,
"step": 6150
},
{
"epoch": 0.9084556254367575,
"grad_norm": 0.06878636032342911,
"learning_rate": 2.3240150400523132e-05,
"loss": 0.0858,
"step": 6175
},
{
"epoch": 0.9121335834344771,
"grad_norm": 5.581681728363037,
"learning_rate": 2.3199280693150238e-05,
"loss": 0.0728,
"step": 6200
},
{
"epoch": 0.9158115414321969,
"grad_norm": 0.017690079286694527,
"learning_rate": 2.3158410985777343e-05,
"loss": 0.0109,
"step": 6225
},
{
"epoch": 0.9194894994299165,
"grad_norm": 0.009789933450520039,
"learning_rate": 2.3117541278404448e-05,
"loss": 0.003,
"step": 6250
},
{
"epoch": 0.9231674574276362,
"grad_norm": 0.007185524329543114,
"learning_rate": 2.3076671571031554e-05,
"loss": 0.0003,
"step": 6275
},
{
"epoch": 0.9268454154253558,
"grad_norm": 0.29879918694496155,
"learning_rate": 2.303580186365866e-05,
"loss": 0.0004,
"step": 6300
},
{
"epoch": 0.9305233734230756,
"grad_norm": 0.005276743322610855,
"learning_rate": 2.2994932156285764e-05,
"loss": 0.0209,
"step": 6325
},
{
"epoch": 0.9342013314207952,
"grad_norm": 4.756071090698242,
"learning_rate": 2.2954062448912866e-05,
"loss": 0.0696,
"step": 6350
},
{
"epoch": 0.9378792894185148,
"grad_norm": 0.005177750252187252,
"learning_rate": 2.291319274153997e-05,
"loss": 0.0294,
"step": 6375
},
{
"epoch": 0.9415572474162345,
"grad_norm": 0.005691983737051487,
"learning_rate": 2.2872323034167074e-05,
"loss": 0.0102,
"step": 6400
},
{
"epoch": 0.9452352054139542,
"grad_norm": 0.012254934757947922,
"learning_rate": 2.283145332679418e-05,
"loss": 0.0204,
"step": 6425
},
{
"epoch": 0.9489131634116739,
"grad_norm": 0.007204866968095303,
"learning_rate": 2.2790583619421284e-05,
"loss": 0.001,
"step": 6450
},
{
"epoch": 0.9525911214093935,
"grad_norm": 0.0022422156762331724,
"learning_rate": 2.274971391204839e-05,
"loss": 0.0074,
"step": 6475
},
{
"epoch": 0.9562690794071131,
"grad_norm": 0.0029815786983817816,
"learning_rate": 2.2708844204675495e-05,
"loss": 0.0001,
"step": 6500
},
{
"epoch": 0.9599470374048328,
"grad_norm": 0.0027428902685642242,
"learning_rate": 2.26679744973026e-05,
"loss": 0.0534,
"step": 6525
},
{
"epoch": 0.9636249954025525,
"grad_norm": 0.0038738884031772614,
"learning_rate": 2.2627104789929703e-05,
"loss": 0.0167,
"step": 6550
},
{
"epoch": 0.9673029534002722,
"grad_norm": 0.002053373260423541,
"learning_rate": 2.2586235082556808e-05,
"loss": 0.0119,
"step": 6575
},
{
"epoch": 0.9709809113979918,
"grad_norm": 0.015416144393384457,
"learning_rate": 2.2545365375183913e-05,
"loss": 0.0436,
"step": 6600
},
{
"epoch": 0.9746588693957114,
"grad_norm": 0.028199590742588043,
"learning_rate": 2.250449566781102e-05,
"loss": 0.06,
"step": 6625
},
{
"epoch": 0.9783368273934312,
"grad_norm": 0.00808124803006649,
"learning_rate": 2.2463625960438124e-05,
"loss": 0.0082,
"step": 6650
},
{
"epoch": 0.9820147853911508,
"grad_norm": 0.896677553653717,
"learning_rate": 2.242275625306523e-05,
"loss": 0.0004,
"step": 6675
},
{
"epoch": 0.9856927433888705,
"grad_norm": 0.014748472720384598,
"learning_rate": 2.2381886545692335e-05,
"loss": 0.0554,
"step": 6700
},
{
"epoch": 0.9893707013865901,
"grad_norm": 0.08279622346162796,
"learning_rate": 2.2341016838319437e-05,
"loss": 0.0727,
"step": 6725
},
{
"epoch": 0.9930486593843099,
"grad_norm": 0.0343361496925354,
"learning_rate": 2.2300147130946542e-05,
"loss": 0.0653,
"step": 6750
},
{
"epoch": 0.9967266173820295,
"grad_norm": 0.01778659224510193,
"learning_rate": 2.2259277423573648e-05,
"loss": 0.0468,
"step": 6775
},
{
"epoch": 0.9999632204200228,
"eval_accuracy": 0.9960279514527399,
"eval_auc": 0.9999026317054973,
"eval_f1": 0.9960253201825409,
"eval_loss": 0.020395906642079353,
"eval_precision": 0.9967589864466706,
"eval_recall": 0.9952927331568108,
"eval_runtime": 2488.2544,
"eval_samples_per_second": 5.464,
"eval_steps_per_second": 1.366,
"step": 6797
},
{
"epoch": 1.0004045753797493,
"grad_norm": 0.038309529423713684,
"learning_rate": 2.2218407716200753e-05,
"loss": 0.0867,
"step": 6800
},
{
"epoch": 1.0040825333774688,
"grad_norm": 0.03099379874765873,
"learning_rate": 2.217753800882786e-05,
"loss": 0.0251,
"step": 6825
},
{
"epoch": 1.0077604913751885,
"grad_norm": 0.014889312908053398,
"learning_rate": 2.2136668301454964e-05,
"loss": 0.0007,
"step": 6850
},
{
"epoch": 1.011438449372908,
"grad_norm": 0.011484134942293167,
"learning_rate": 2.209579859408207e-05,
"loss": 0.0042,
"step": 6875
},
{
"epoch": 1.0151164073706278,
"grad_norm": 0.008166844956576824,
"learning_rate": 2.205492888670917e-05,
"loss": 0.0003,
"step": 6900
},
{
"epoch": 1.0187943653683476,
"grad_norm": 0.006568376440554857,
"learning_rate": 2.2014059179336276e-05,
"loss": 0.0003,
"step": 6925
},
{
"epoch": 1.022472323366067,
"grad_norm": 0.0057509117759764194,
"learning_rate": 2.1973189471963382e-05,
"loss": 0.0084,
"step": 6950
},
{
"epoch": 1.0261502813637868,
"grad_norm": 0.004868589341640472,
"learning_rate": 2.1932319764590487e-05,
"loss": 0.0043,
"step": 6975
},
{
"epoch": 1.0298282393615066,
"grad_norm": 0.004712184425443411,
"learning_rate": 2.1891450057217593e-05,
"loss": 0.0029,
"step": 7000
},
{
"epoch": 1.033506197359226,
"grad_norm": 0.0035947624128311872,
"learning_rate": 2.1850580349844698e-05,
"loss": 0.0051,
"step": 7025
},
{
"epoch": 1.0371841553569459,
"grad_norm": 0.0033714687451720238,
"learning_rate": 2.1809710642471803e-05,
"loss": 0.0377,
"step": 7050
},
{
"epoch": 1.0408621133546654,
"grad_norm": 12.332621574401855,
"learning_rate": 2.1768840935098905e-05,
"loss": 0.0061,
"step": 7075
},
{
"epoch": 1.0445400713523851,
"grad_norm": 0.002749204868450761,
"learning_rate": 2.172797122772601e-05,
"loss": 0.0003,
"step": 7100
},
{
"epoch": 1.0482180293501049,
"grad_norm": 0.0026924049016088247,
"learning_rate": 2.1687101520353113e-05,
"loss": 0.0001,
"step": 7125
},
{
"epoch": 1.0518959873478244,
"grad_norm": 0.006290792487561703,
"learning_rate": 2.1646231812980218e-05,
"loss": 0.0443,
"step": 7150
},
{
"epoch": 1.0555739453455442,
"grad_norm": 0.0048763868398964405,
"learning_rate": 2.1605362105607323e-05,
"loss": 0.0002,
"step": 7175
},
{
"epoch": 1.059251903343264,
"grad_norm": 0.003825924126431346,
"learning_rate": 2.156449239823443e-05,
"loss": 0.0002,
"step": 7200
},
{
"epoch": 1.0629298613409834,
"grad_norm": 0.0068919663317501545,
"learning_rate": 2.1523622690861534e-05,
"loss": 0.0001,
"step": 7225
},
{
"epoch": 1.0666078193387032,
"grad_norm": 0.0029492308385670185,
"learning_rate": 2.1482752983488636e-05,
"loss": 0.0001,
"step": 7250
},
{
"epoch": 1.0702857773364227,
"grad_norm": 0.0031761634163558483,
"learning_rate": 2.144188327611574e-05,
"loss": 0.0001,
"step": 7275
},
{
"epoch": 1.0739637353341425,
"grad_norm": 0.004821736365556717,
"learning_rate": 2.1401013568742847e-05,
"loss": 0.0373,
"step": 7300
},
{
"epoch": 1.0776416933318622,
"grad_norm": 0.003594837849959731,
"learning_rate": 2.1360143861369952e-05,
"loss": 0.0004,
"step": 7325
},
{
"epoch": 1.0813196513295817,
"grad_norm": 0.004811630584299564,
"learning_rate": 2.1319274153997058e-05,
"loss": 0.0001,
"step": 7350
},
{
"epoch": 1.0849976093273015,
"grad_norm": 0.006440363824367523,
"learning_rate": 2.1278404446624163e-05,
"loss": 0.0453,
"step": 7375
},
{
"epoch": 1.0886755673250212,
"grad_norm": 0.007900132797658443,
"learning_rate": 2.123753473925127e-05,
"loss": 0.0003,
"step": 7400
},
{
"epoch": 1.0923535253227408,
"grad_norm": 0.00898217223584652,
"learning_rate": 2.1196665031878374e-05,
"loss": 0.0811,
"step": 7425
},
{
"epoch": 1.0960314833204605,
"grad_norm": 0.031215157359838486,
"learning_rate": 2.1155795324505476e-05,
"loss": 0.035,
"step": 7450
},
{
"epoch": 1.09970944131818,
"grad_norm": 0.022409003227949142,
"learning_rate": 2.111492561713258e-05,
"loss": 0.0014,
"step": 7475
},
{
"epoch": 1.1033873993158998,
"grad_norm": 0.0137456264346838,
"learning_rate": 2.1074055909759686e-05,
"loss": 0.0006,
"step": 7500
},
{
"epoch": 1.1070653573136195,
"grad_norm": 0.006075088866055012,
"learning_rate": 2.1033186202386792e-05,
"loss": 0.0005,
"step": 7525
},
{
"epoch": 1.110743315311339,
"grad_norm": 0.007382239680737257,
"learning_rate": 2.0992316495013897e-05,
"loss": 0.0003,
"step": 7550
},
{
"epoch": 1.1144212733090588,
"grad_norm": 0.016082163900136948,
"learning_rate": 2.0951446787641003e-05,
"loss": 0.0469,
"step": 7575
},
{
"epoch": 1.1180992313067786,
"grad_norm": 0.02028113603591919,
"learning_rate": 2.0910577080268108e-05,
"loss": 0.0398,
"step": 7600
},
{
"epoch": 1.121777189304498,
"grad_norm": 0.014643259346485138,
"learning_rate": 2.086970737289521e-05,
"loss": 0.0007,
"step": 7625
},
{
"epoch": 1.1254551473022179,
"grad_norm": 0.010461482219398022,
"learning_rate": 2.0828837665522315e-05,
"loss": 0.0004,
"step": 7650
},
{
"epoch": 1.1291331052999374,
"grad_norm": 0.009396770037710667,
"learning_rate": 2.078796795814942e-05,
"loss": 0.0004,
"step": 7675
},
{
"epoch": 1.1328110632976571,
"grad_norm": 0.007909806445240974,
"learning_rate": 2.0747098250776526e-05,
"loss": 0.016,
"step": 7700
},
{
"epoch": 1.1364890212953769,
"grad_norm": 0.006153750233352184,
"learning_rate": 2.070622854340363e-05,
"loss": 0.0055,
"step": 7725
},
{
"epoch": 1.1401669792930964,
"grad_norm": 0.006996823474764824,
"learning_rate": 2.0665358836030737e-05,
"loss": 0.0002,
"step": 7750
},
{
"epoch": 1.1438449372908162,
"grad_norm": 0.006032935809344053,
"learning_rate": 2.0624489128657842e-05,
"loss": 0.0331,
"step": 7775
},
{
"epoch": 1.1475228952885357,
"grad_norm": 0.003607578342780471,
"learning_rate": 2.0583619421284944e-05,
"loss": 0.0002,
"step": 7800
},
{
"epoch": 1.1512008532862554,
"grad_norm": 0.004726866725832224,
"learning_rate": 2.054274971391205e-05,
"loss": 0.0002,
"step": 7825
},
{
"epoch": 1.1548788112839752,
"grad_norm": 0.004033273551613092,
"learning_rate": 2.050188000653915e-05,
"loss": 0.0001,
"step": 7850
},
{
"epoch": 1.1585567692816947,
"grad_norm": 0.0035559283569455147,
"learning_rate": 2.0461010299166257e-05,
"loss": 0.0001,
"step": 7875
},
{
"epoch": 1.1622347272794145,
"grad_norm": 0.002765959594398737,
"learning_rate": 2.0420140591793362e-05,
"loss": 0.0001,
"step": 7900
},
{
"epoch": 1.1659126852771342,
"grad_norm": 0.003123935777693987,
"learning_rate": 2.0379270884420468e-05,
"loss": 0.0001,
"step": 7925
},
{
"epoch": 1.1695906432748537,
"grad_norm": 0.0030226910021156073,
"learning_rate": 2.0338401177047573e-05,
"loss": 0.0443,
"step": 7950
},
{
"epoch": 1.1732686012725735,
"grad_norm": 0.002675386844202876,
"learning_rate": 2.0297531469674675e-05,
"loss": 0.0001,
"step": 7975
},
{
"epoch": 1.1769465592702932,
"grad_norm": 0.002876314101740718,
"learning_rate": 2.025666176230178e-05,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 1.1806245172680128,
"grad_norm": 0.003930400125682354,
"learning_rate": 2.0215792054928886e-05,
"loss": 0.0463,
"step": 8025
},
{
"epoch": 1.1843024752657325,
"grad_norm": 0.004908836912363768,
"learning_rate": 2.017492234755599e-05,
"loss": 0.0002,
"step": 8050
},
{
"epoch": 1.187980433263452,
"grad_norm": 0.005489639472216368,
"learning_rate": 2.0134052640183097e-05,
"loss": 0.0014,
"step": 8075
},
{
"epoch": 1.1916583912611718,
"grad_norm": 0.0054463837295770645,
"learning_rate": 2.0093182932810202e-05,
"loss": 0.0418,
"step": 8100
},
{
"epoch": 1.1953363492588915,
"grad_norm": 0.004771388601511717,
"learning_rate": 2.0052313225437307e-05,
"loss": 0.0002,
"step": 8125
},
{
"epoch": 1.199014307256611,
"grad_norm": 0.004579597618430853,
"learning_rate": 2.001144351806441e-05,
"loss": 0.0002,
"step": 8150
},
{
"epoch": 1.2026922652543308,
"grad_norm": 0.005399708636105061,
"learning_rate": 1.9970573810691515e-05,
"loss": 0.0002,
"step": 8175
},
{
"epoch": 1.2063702232520503,
"grad_norm": 0.0028218550141900778,
"learning_rate": 1.992970410331862e-05,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 1.21004818124977,
"grad_norm": 0.0270390622317791,
"learning_rate": 1.9888834395945725e-05,
"loss": 0.1464,
"step": 8225
},
{
"epoch": 1.2137261392474898,
"grad_norm": 0.007817487232387066,
"learning_rate": 1.984796468857283e-05,
"loss": 0.0005,
"step": 8250
},
{
"epoch": 1.2174040972452094,
"grad_norm": 0.009673170745372772,
"learning_rate": 1.9807094981199936e-05,
"loss": 0.0003,
"step": 8275
},
{
"epoch": 1.2210820552429291,
"grad_norm": 0.006883264984935522,
"learning_rate": 1.976622527382704e-05,
"loss": 0.0364,
"step": 8300
},
{
"epoch": 1.2247600132406489,
"grad_norm": 0.038729436695575714,
"learning_rate": 1.9725355566454147e-05,
"loss": 0.0002,
"step": 8325
},
{
"epoch": 1.2284379712383684,
"grad_norm": 0.004570882301777601,
"learning_rate": 1.968448585908125e-05,
"loss": 0.0002,
"step": 8350
},
{
"epoch": 1.2321159292360881,
"grad_norm": 0.010231226682662964,
"learning_rate": 1.9643616151708354e-05,
"loss": 0.0463,
"step": 8375
},
{
"epoch": 1.235793887233808,
"grad_norm": 0.008044122718274593,
"learning_rate": 1.960274644433546e-05,
"loss": 0.0003,
"step": 8400
},
{
"epoch": 1.2394718452315274,
"grad_norm": 0.005202152766287327,
"learning_rate": 1.9561876736962565e-05,
"loss": 0.0391,
"step": 8425
},
{
"epoch": 1.2431498032292472,
"grad_norm": 0.0054007298313081264,
"learning_rate": 1.952100702958967e-05,
"loss": 0.0182,
"step": 8450
},
{
"epoch": 1.2468277612269667,
"grad_norm": 0.005195588804781437,
"learning_rate": 1.9480137322216776e-05,
"loss": 0.0392,
"step": 8475
},
{
"epoch": 1.2505057192246865,
"grad_norm": 0.00451032817363739,
"learning_rate": 1.943926761484388e-05,
"loss": 0.0002,
"step": 8500
},
{
"epoch": 1.2541836772224062,
"grad_norm": 0.00390147278085351,
"learning_rate": 1.9398397907470983e-05,
"loss": 0.0002,
"step": 8525
},
{
"epoch": 1.2578616352201257,
"grad_norm": 0.0030624952632933855,
"learning_rate": 1.935752820009809e-05,
"loss": 0.0001,
"step": 8550
},
{
"epoch": 1.2615395932178455,
"grad_norm": 0.0030448674224317074,
"learning_rate": 1.931665849272519e-05,
"loss": 0.0001,
"step": 8575
},
{
"epoch": 1.265217551215565,
"grad_norm": 0.003369387937709689,
"learning_rate": 1.9275788785352296e-05,
"loss": 0.0001,
"step": 8600
},
{
"epoch": 1.2688955092132848,
"grad_norm": 0.0026294661220163107,
"learning_rate": 1.92349190779794e-05,
"loss": 0.0001,
"step": 8625
},
{
"epoch": 1.2725734672110045,
"grad_norm": 0.002674271585419774,
"learning_rate": 1.9194049370606507e-05,
"loss": 0.0001,
"step": 8650
},
{
"epoch": 1.276251425208724,
"grad_norm": 0.016562707722187042,
"learning_rate": 1.9153179663233612e-05,
"loss": 0.0001,
"step": 8675
},
{
"epoch": 1.2799293832064438,
"grad_norm": 0.002845450770109892,
"learning_rate": 1.9112309955860714e-05,
"loss": 0.0376,
"step": 8700
},
{
"epoch": 1.2836073412041635,
"grad_norm": 0.002954358235001564,
"learning_rate": 1.907144024848782e-05,
"loss": 0.0001,
"step": 8725
},
{
"epoch": 1.287285299201883,
"grad_norm": 0.002028050599619746,
"learning_rate": 1.9030570541114925e-05,
"loss": 0.0047,
"step": 8750
},
{
"epoch": 1.2909632571996028,
"grad_norm": 0.002608607057482004,
"learning_rate": 1.898970083374203e-05,
"loss": 0.0001,
"step": 8775
},
{
"epoch": 1.2946412151973226,
"grad_norm": 0.0024424525909125805,
"learning_rate": 1.8948831126369135e-05,
"loss": 0.0001,
"step": 8800
},
{
"epoch": 1.298319173195042,
"grad_norm": 0.001993270590901375,
"learning_rate": 1.890796141899624e-05,
"loss": 0.0001,
"step": 8825
},
{
"epoch": 1.3019971311927618,
"grad_norm": 0.009992810897529125,
"learning_rate": 1.8867091711623346e-05,
"loss": 0.0001,
"step": 8850
},
{
"epoch": 1.3056750891904814,
"grad_norm": 0.003959705121815205,
"learning_rate": 1.8826222004250448e-05,
"loss": 0.0336,
"step": 8875
},
{
"epoch": 1.3093530471882011,
"grad_norm": 0.002648918190971017,
"learning_rate": 1.8785352296877554e-05,
"loss": 0.0002,
"step": 8900
},
{
"epoch": 1.3130310051859206,
"grad_norm": 0.001997936749830842,
"learning_rate": 1.874448258950466e-05,
"loss": 0.0001,
"step": 8925
},
{
"epoch": 1.3167089631836404,
"grad_norm": 0.0019702455028891563,
"learning_rate": 1.8703612882131764e-05,
"loss": 0.0001,
"step": 8950
},
{
"epoch": 1.3203869211813601,
"grad_norm": 0.0019666815642267466,
"learning_rate": 1.866274317475887e-05,
"loss": 0.015,
"step": 8975
},
{
"epoch": 1.3240648791790797,
"grad_norm": 0.016209330409765244,
"learning_rate": 1.8621873467385975e-05,
"loss": 0.0499,
"step": 9000
},
{
"epoch": 1.3277428371767994,
"grad_norm": 0.002770668361335993,
"learning_rate": 1.858100376001308e-05,
"loss": 0.0001,
"step": 9025
},
{
"epoch": 1.3314207951745192,
"grad_norm": 0.0025566229596734047,
"learning_rate": 1.8540134052640182e-05,
"loss": 0.0429,
"step": 9050
},
{
"epoch": 1.3350987531722387,
"grad_norm": 0.00490075396373868,
"learning_rate": 1.8499264345267288e-05,
"loss": 0.0391,
"step": 9075
},
{
"epoch": 1.3387767111699584,
"grad_norm": 0.002448379760608077,
"learning_rate": 1.8458394637894393e-05,
"loss": 0.0002,
"step": 9100
},
{
"epoch": 1.3424546691676782,
"grad_norm": 0.0027882566209882498,
"learning_rate": 1.84175249305215e-05,
"loss": 0.0001,
"step": 9125
},
{
"epoch": 1.3461326271653977,
"grad_norm": 0.0021890706848353148,
"learning_rate": 1.8376655223148604e-05,
"loss": 0.0001,
"step": 9150
},
{
"epoch": 1.3498105851631175,
"grad_norm": 0.002767590805888176,
"learning_rate": 1.833578551577571e-05,
"loss": 0.0001,
"step": 9175
},
{
"epoch": 1.3534885431608372,
"grad_norm": 0.0018375491490587592,
"learning_rate": 1.8294915808402815e-05,
"loss": 0.0003,
"step": 9200
},
{
"epoch": 1.3571665011585567,
"grad_norm": 0.0020680581219494343,
"learning_rate": 1.825404610102992e-05,
"loss": 0.0001,
"step": 9225
},
{
"epoch": 1.3608444591562765,
"grad_norm": 0.001452911994419992,
"learning_rate": 1.8213176393657022e-05,
"loss": 0.0001,
"step": 9250
},
{
"epoch": 1.364522417153996,
"grad_norm": 0.011856326833367348,
"learning_rate": 1.8172306686284127e-05,
"loss": 0.0498,
"step": 9275
},
{
"epoch": 1.3682003751517158,
"grad_norm": 0.005070924758911133,
"learning_rate": 1.813143697891123e-05,
"loss": 0.0003,
"step": 9300
},
{
"epoch": 1.3718783331494353,
"grad_norm": 0.003941578324884176,
"learning_rate": 1.8090567271538335e-05,
"loss": 0.0001,
"step": 9325
},
{
"epoch": 1.375556291147155,
"grad_norm": 0.0044369762763381,
"learning_rate": 1.804969756416544e-05,
"loss": 0.0395,
"step": 9350
},
{
"epoch": 1.3792342491448748,
"grad_norm": 0.003973621409386396,
"learning_rate": 1.8008827856792546e-05,
"loss": 0.0002,
"step": 9375
},
{
"epoch": 1.3829122071425943,
"grad_norm": 0.00455184280872345,
"learning_rate": 1.796795814941965e-05,
"loss": 0.0001,
"step": 9400
},
{
"epoch": 1.386590165140314,
"grad_norm": 0.0031091428827494383,
"learning_rate": 1.7927088442046753e-05,
"loss": 0.0001,
"step": 9425
},
{
"epoch": 1.3902681231380338,
"grad_norm": 0.0024325144477188587,
"learning_rate": 1.7886218734673858e-05,
"loss": 0.0001,
"step": 9450
},
{
"epoch": 1.3939460811357534,
"grad_norm": 0.0036399061791598797,
"learning_rate": 1.7845349027300964e-05,
"loss": 0.0001,
"step": 9475
},
{
"epoch": 1.397624039133473,
"grad_norm": 0.0023723021149635315,
"learning_rate": 1.780447931992807e-05,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 1.4013019971311929,
"grad_norm": 0.0027509965002536774,
"learning_rate": 1.7763609612555174e-05,
"loss": 0.0001,
"step": 9525
},
{
"epoch": 1.4049799551289124,
"grad_norm": 0.0033826676663011312,
"learning_rate": 1.772273990518228e-05,
"loss": 0.0001,
"step": 9550
},
{
"epoch": 1.4086579131266321,
"grad_norm": 0.011138912290334702,
"learning_rate": 1.7681870197809385e-05,
"loss": 0.0398,
"step": 9575
},
{
"epoch": 1.4123358711243519,
"grad_norm": 0.023271048441529274,
"learning_rate": 1.7641000490436487e-05,
"loss": 0.0747,
"step": 9600
},
{
"epoch": 1.4160138291220714,
"grad_norm": 0.18063010275363922,
"learning_rate": 1.7600130783063593e-05,
"loss": 0.0009,
"step": 9625
},
{
"epoch": 1.4196917871197912,
"grad_norm": 0.012859140522778034,
"learning_rate": 1.7559261075690698e-05,
"loss": 0.0444,
"step": 9650
},
{
"epoch": 1.4233697451175107,
"grad_norm": 0.003733620513230562,
"learning_rate": 1.7518391368317803e-05,
"loss": 0.0219,
"step": 9675
},
{
"epoch": 1.4270477031152304,
"grad_norm": 4.048089504241943,
"learning_rate": 1.747752166094491e-05,
"loss": 0.052,
"step": 9700
},
{
"epoch": 1.43072566111295,
"grad_norm": 0.02329842559993267,
"learning_rate": 1.7436651953572014e-05,
"loss": 0.0033,
"step": 9725
},
{
"epoch": 1.4344036191106697,
"grad_norm": 0.5609085559844971,
"learning_rate": 1.739578224619912e-05,
"loss": 0.0468,
"step": 9750
},
{
"epoch": 1.4380815771083895,
"grad_norm": 0.010268951766192913,
"learning_rate": 1.735491253882622e-05,
"loss": 0.0004,
"step": 9775
},
{
"epoch": 1.441759535106109,
"grad_norm": 0.005183890461921692,
"learning_rate": 1.7314042831453327e-05,
"loss": 0.0002,
"step": 9800
},
{
"epoch": 1.4454374931038287,
"grad_norm": 0.006362477317452431,
"learning_rate": 1.7273173124080432e-05,
"loss": 0.0623,
"step": 9825
},
{
"epoch": 1.4491154511015485,
"grad_norm": 0.004158661235123873,
"learning_rate": 1.7232303416707537e-05,
"loss": 0.0002,
"step": 9850
},
{
"epoch": 1.452793409099268,
"grad_norm": 0.003037210088223219,
"learning_rate": 1.7191433709334643e-05,
"loss": 0.0001,
"step": 9875
},
{
"epoch": 1.4564713670969878,
"grad_norm": 0.006479774601757526,
"learning_rate": 1.7150564001961748e-05,
"loss": 0.0562,
"step": 9900
},
{
"epoch": 1.4601493250947075,
"grad_norm": 34.625465393066406,
"learning_rate": 1.7109694294588854e-05,
"loss": 0.0423,
"step": 9925
},
{
"epoch": 1.463827283092427,
"grad_norm": 0.003740801243111491,
"learning_rate": 1.706882458721596e-05,
"loss": 0.0001,
"step": 9950
},
{
"epoch": 1.4675052410901468,
"grad_norm": 0.06391607969999313,
"learning_rate": 1.702795487984306e-05,
"loss": 0.0211,
"step": 9975
},
{
"epoch": 1.4711831990878665,
"grad_norm": 0.0029998337849974632,
"learning_rate": 1.6987085172470166e-05,
"loss": 0.0012,
"step": 10000
},
{
"epoch": 1.474861157085586,
"grad_norm": 0.002598424442112446,
"learning_rate": 1.6946215465097272e-05,
"loss": 0.0056,
"step": 10025
},
{
"epoch": 1.4785391150833058,
"grad_norm": 0.0026498546358197927,
"learning_rate": 1.6905345757724374e-05,
"loss": 0.0003,
"step": 10050
},
{
"epoch": 1.4822170730810253,
"grad_norm": 0.002896289573982358,
"learning_rate": 1.686447605035148e-05,
"loss": 0.0244,
"step": 10075
},
{
"epoch": 1.485895031078745,
"grad_norm": 0.002737634815275669,
"learning_rate": 1.6823606342978584e-05,
"loss": 0.0002,
"step": 10100
},
{
"epoch": 1.4895729890764646,
"grad_norm": 0.002295145532116294,
"learning_rate": 1.678273663560569e-05,
"loss": 0.0001,
"step": 10125
},
{
"epoch": 1.4932509470741844,
"grad_norm": 0.0018749627051874995,
"learning_rate": 1.6741866928232792e-05,
"loss": 0.0001,
"step": 10150
},
{
"epoch": 1.4969289050719041,
"grad_norm": 0.002252426231279969,
"learning_rate": 1.6700997220859897e-05,
"loss": 0.0091,
"step": 10175
},
{
"epoch": 1.5006068630696237,
"grad_norm": 0.001987684750929475,
"learning_rate": 1.6660127513487003e-05,
"loss": 0.0059,
"step": 10200
},
{
"epoch": 1.5042848210673434,
"grad_norm": 0.0018681609071791172,
"learning_rate": 1.6619257806114108e-05,
"loss": 0.0036,
"step": 10225
},
{
"epoch": 1.5079627790650632,
"grad_norm": 0.002243634080514312,
"learning_rate": 1.6578388098741213e-05,
"loss": 0.0001,
"step": 10250
},
{
"epoch": 1.5116407370627827,
"grad_norm": 0.005282828118652105,
"learning_rate": 1.653751839136832e-05,
"loss": 0.0508,
"step": 10275
},
{
"epoch": 1.5153186950605024,
"grad_norm": 0.0033266160171478987,
"learning_rate": 1.6496648683995424e-05,
"loss": 0.0036,
"step": 10300
},
{
"epoch": 1.5189966530582222,
"grad_norm": 0.0024327326100319624,
"learning_rate": 1.6455778976622526e-05,
"loss": 0.0001,
"step": 10325
},
{
"epoch": 1.5226746110559417,
"grad_norm": 0.0037725428119301796,
"learning_rate": 1.641490926924963e-05,
"loss": 0.0859,
"step": 10350
},
{
"epoch": 1.5263525690536615,
"grad_norm": 0.01479677390307188,
"learning_rate": 1.6374039561876737e-05,
"loss": 0.0002,
"step": 10375
},
{
"epoch": 1.5300305270513812,
"grad_norm": 0.002465145429596305,
"learning_rate": 1.6333169854503842e-05,
"loss": 0.0009,
"step": 10400
},
{
"epoch": 1.5337084850491007,
"grad_norm": 0.002028359565883875,
"learning_rate": 1.6292300147130948e-05,
"loss": 0.0001,
"step": 10425
},
{
"epoch": 1.5373864430468203,
"grad_norm": 0.0017766653327271342,
"learning_rate": 1.6251430439758053e-05,
"loss": 0.0001,
"step": 10450
},
{
"epoch": 1.5410644010445402,
"grad_norm": 0.002013767370954156,
"learning_rate": 1.6210560732385158e-05,
"loss": 0.0255,
"step": 10475
},
{
"epoch": 1.5447423590422598,
"grad_norm": 0.0019861028995364904,
"learning_rate": 1.616969102501226e-05,
"loss": 0.0109,
"step": 10500
},
{
"epoch": 1.5484203170399793,
"grad_norm": 0.0017919589299708605,
"learning_rate": 1.6128821317639366e-05,
"loss": 0.0063,
"step": 10525
},
{
"epoch": 1.552098275037699,
"grad_norm": 0.001575242611579597,
"learning_rate": 1.608795161026647e-05,
"loss": 0.0001,
"step": 10550
},
{
"epoch": 1.5557762330354188,
"grad_norm": 0.0017625424079596996,
"learning_rate": 1.6047081902893576e-05,
"loss": 0.0001,
"step": 10575
},
{
"epoch": 1.5594541910331383,
"grad_norm": 0.0014293509302660823,
"learning_rate": 1.6006212195520682e-05,
"loss": 0.0001,
"step": 10600
},
{
"epoch": 1.563132149030858,
"grad_norm": 3.637284994125366,
"learning_rate": 1.5965342488147787e-05,
"loss": 0.0319,
"step": 10625
},
{
"epoch": 1.5668101070285778,
"grad_norm": 0.0015190584817901254,
"learning_rate": 1.5924472780774893e-05,
"loss": 0.1112,
"step": 10650
},
{
"epoch": 1.5704880650262973,
"grad_norm": 0.0019073854200541973,
"learning_rate": 1.5883603073401995e-05,
"loss": 0.0001,
"step": 10675
},
{
"epoch": 1.574166023024017,
"grad_norm": 0.15334878861904144,
"learning_rate": 1.58427333660291e-05,
"loss": 0.0001,
"step": 10700
},
{
"epoch": 1.5778439810217368,
"grad_norm": 0.0013233659556135535,
"learning_rate": 1.5801863658656205e-05,
"loss": 0.0006,
"step": 10725
},
{
"epoch": 1.5815219390194564,
"grad_norm": 60.88636779785156,
"learning_rate": 1.576099395128331e-05,
"loss": 0.0794,
"step": 10750
},
{
"epoch": 1.5851998970171761,
"grad_norm": 0.006810314953327179,
"learning_rate": 1.5720124243910413e-05,
"loss": 0.0272,
"step": 10775
},
{
"epoch": 1.5888778550148959,
"grad_norm": 0.006012595724314451,
"learning_rate": 1.5679254536537518e-05,
"loss": 0.0177,
"step": 10800
},
{
"epoch": 1.5925558130126154,
"grad_norm": 0.0041669500060379505,
"learning_rate": 1.5638384829164623e-05,
"loss": 0.0007,
"step": 10825
},
{
"epoch": 1.596233771010335,
"grad_norm": 0.0024410944897681475,
"learning_rate": 1.559751512179173e-05,
"loss": 0.0001,
"step": 10850
},
{
"epoch": 1.5999117290080547,
"grad_norm": 0.002287843730300665,
"learning_rate": 1.555664541441883e-05,
"loss": 0.0001,
"step": 10875
},
{
"epoch": 1.6035896870057744,
"grad_norm": 0.002450288040563464,
"learning_rate": 1.5515775707045936e-05,
"loss": 0.0001,
"step": 10900
},
{
"epoch": 1.607267645003494,
"grad_norm": 0.0017540917033329606,
"learning_rate": 1.547490599967304e-05,
"loss": 0.0001,
"step": 10925
},
{
"epoch": 1.6109456030012137,
"grad_norm": 0.0018945990595966578,
"learning_rate": 1.5434036292300147e-05,
"loss": 0.0001,
"step": 10950
},
{
"epoch": 1.6146235609989334,
"grad_norm": 0.38427916169166565,
"learning_rate": 1.5393166584927252e-05,
"loss": 0.0478,
"step": 10975
},
{
"epoch": 1.618301518996653,
"grad_norm": 0.005249540787190199,
"learning_rate": 1.5352296877554358e-05,
"loss": 0.0005,
"step": 11000
},
{
"epoch": 1.6219794769943727,
"grad_norm": 0.049626559019088745,
"learning_rate": 1.5311427170181463e-05,
"loss": 0.0803,
"step": 11025
},
{
"epoch": 1.6256574349920925,
"grad_norm": 0.006765100173652172,
"learning_rate": 1.5270557462808565e-05,
"loss": 0.021,
"step": 11050
},
{
"epoch": 1.629335392989812,
"grad_norm": 0.012057892046868801,
"learning_rate": 1.522968775543567e-05,
"loss": 0.0005,
"step": 11075
},
{
"epoch": 1.6330133509875318,
"grad_norm": 0.012171362526714802,
"learning_rate": 1.5188818048062776e-05,
"loss": 0.0171,
"step": 11100
},
{
"epoch": 1.6366913089852515,
"grad_norm": 0.006173169240355492,
"learning_rate": 1.5147948340689881e-05,
"loss": 0.0183,
"step": 11125
},
{
"epoch": 1.640369266982971,
"grad_norm": 0.025982793420553207,
"learning_rate": 1.5107078633316986e-05,
"loss": 0.041,
"step": 11150
},
{
"epoch": 1.6440472249806906,
"grad_norm": 0.0121184466406703,
"learning_rate": 1.5066208925944092e-05,
"loss": 0.0066,
"step": 11175
},
{
"epoch": 1.6477251829784105,
"grad_norm": 0.008928947150707245,
"learning_rate": 1.5025339218571197e-05,
"loss": 0.0013,
"step": 11200
},
{
"epoch": 1.65140314097613,
"grad_norm": 0.003572331042960286,
"learning_rate": 1.4984469511198301e-05,
"loss": 0.0448,
"step": 11225
},
{
"epoch": 1.6550810989738496,
"grad_norm": 0.012093408964574337,
"learning_rate": 1.4943599803825406e-05,
"loss": 0.0003,
"step": 11250
},
{
"epoch": 1.6587590569715693,
"grad_norm": 0.005746824201196432,
"learning_rate": 1.490273009645251e-05,
"loss": 0.0002,
"step": 11275
},
{
"epoch": 1.662437014969289,
"grad_norm": 0.005075458902865648,
"learning_rate": 1.4861860389079615e-05,
"loss": 0.0431,
"step": 11300
},
{
"epoch": 1.6661149729670086,
"grad_norm": 0.006644480861723423,
"learning_rate": 1.4820990681706719e-05,
"loss": 0.0003,
"step": 11325
},
{
"epoch": 1.6697929309647284,
"grad_norm": 0.016171354800462723,
"learning_rate": 1.4780120974333823e-05,
"loss": 0.0163,
"step": 11350
},
{
"epoch": 1.673470888962448,
"grad_norm": 0.005658384878188372,
"learning_rate": 1.4739251266960928e-05,
"loss": 0.0022,
"step": 11375
},
{
"epoch": 1.6771488469601676,
"grad_norm": 0.010968804359436035,
"learning_rate": 1.4698381559588033e-05,
"loss": 0.0804,
"step": 11400
},
{
"epoch": 1.6808268049578874,
"grad_norm": 0.029876096174120903,
"learning_rate": 1.4657511852215139e-05,
"loss": 0.067,
"step": 11425
},
{
"epoch": 1.6845047629556071,
"grad_norm": 0.03841656073927879,
"learning_rate": 1.4616642144842242e-05,
"loss": 0.0349,
"step": 11450
},
{
"epoch": 1.6881827209533267,
"grad_norm": 0.017025554552674294,
"learning_rate": 1.4575772437469348e-05,
"loss": 0.001,
"step": 11475
},
{
"epoch": 1.6918606789510464,
"grad_norm": 0.024776197969913483,
"learning_rate": 1.4534902730096453e-05,
"loss": 0.0356,
"step": 11500
},
{
"epoch": 1.6955386369487662,
"grad_norm": 0.018094466999173164,
"learning_rate": 1.4494033022723559e-05,
"loss": 0.0006,
"step": 11525
},
{
"epoch": 1.6992165949464857,
"grad_norm": 0.010948434472084045,
"learning_rate": 1.4453163315350662e-05,
"loss": 0.0566,
"step": 11550
},
{
"epoch": 1.7028945529442052,
"grad_norm": 0.06060256063938141,
"learning_rate": 1.4412293607977768e-05,
"loss": 0.087,
"step": 11575
},
{
"epoch": 1.7065725109419252,
"grad_norm": 0.0425218902528286,
"learning_rate": 1.4371423900604873e-05,
"loss": 0.0014,
"step": 11600
},
{
"epoch": 1.7102504689396447,
"grad_norm": 0.03931298479437828,
"learning_rate": 1.4330554193231977e-05,
"loss": 0.0329,
"step": 11625
},
{
"epoch": 1.7139284269373642,
"grad_norm": 0.05203554406762123,
"learning_rate": 1.4289684485859082e-05,
"loss": 0.0667,
"step": 11650
},
{
"epoch": 1.717606384935084,
"grad_norm": 0.059145841747522354,
"learning_rate": 1.4248814778486187e-05,
"loss": 0.0464,
"step": 11675
},
{
"epoch": 1.7212843429328037,
"grad_norm": 0.053441960364580154,
"learning_rate": 1.4207945071113291e-05,
"loss": 0.0598,
"step": 11700
},
{
"epoch": 1.7249623009305233,
"grad_norm": 0.0338728241622448,
"learning_rate": 1.4167075363740395e-05,
"loss": 0.0014,
"step": 11725
},
{
"epoch": 1.728640258928243,
"grad_norm": 0.03298606723546982,
"learning_rate": 1.41262056563675e-05,
"loss": 0.0011,
"step": 11750
},
{
"epoch": 1.7323182169259628,
"grad_norm": 0.007968394085764885,
"learning_rate": 1.4085335948994606e-05,
"loss": 0.0332,
"step": 11775
},
{
"epoch": 1.7359961749236823,
"grad_norm": 0.033015619963407516,
"learning_rate": 1.404446624162171e-05,
"loss": 0.0471,
"step": 11800
},
{
"epoch": 1.739674132921402,
"grad_norm": 0.03123684972524643,
"learning_rate": 1.4003596534248815e-05,
"loss": 0.0008,
"step": 11825
},
{
"epoch": 1.7433520909191218,
"grad_norm": 0.026270106434822083,
"learning_rate": 1.396272682687592e-05,
"loss": 0.027,
"step": 11850
},
{
"epoch": 1.7470300489168413,
"grad_norm": 0.025614146143198013,
"learning_rate": 1.3921857119503025e-05,
"loss": 0.0006,
"step": 11875
},
{
"epoch": 1.750708006914561,
"grad_norm": 0.011196363717317581,
"learning_rate": 1.3880987412130129e-05,
"loss": 0.0004,
"step": 11900
},
{
"epoch": 1.7543859649122808,
"grad_norm": 0.014085380360484123,
"learning_rate": 1.3840117704757234e-05,
"loss": 0.0007,
"step": 11925
},
{
"epoch": 1.7580639229100004,
"grad_norm": 0.2520334720611572,
"learning_rate": 1.379924799738434e-05,
"loss": 0.0012,
"step": 11950
},
{
"epoch": 1.7617418809077199,
"grad_norm": 0.0027042387519031763,
"learning_rate": 1.3758378290011445e-05,
"loss": 0.0657,
"step": 11975
},
{
"epoch": 1.7654198389054399,
"grad_norm": 0.007959190756082535,
"learning_rate": 1.3717508582638549e-05,
"loss": 0.0009,
"step": 12000
},
{
"epoch": 1.7690977969031594,
"grad_norm": 0.006802896503359079,
"learning_rate": 1.3676638875265654e-05,
"loss": 0.0002,
"step": 12025
},
{
"epoch": 1.772775754900879,
"grad_norm": 0.0037322076968848705,
"learning_rate": 1.3635769167892758e-05,
"loss": 0.0002,
"step": 12050
},
{
"epoch": 1.7764537128985987,
"grad_norm": 0.004444212652742863,
"learning_rate": 1.3594899460519862e-05,
"loss": 0.0113,
"step": 12075
},
{
"epoch": 1.7801316708963184,
"grad_norm": 0.0029294530395418406,
"learning_rate": 1.3554029753146967e-05,
"loss": 0.0024,
"step": 12100
},
{
"epoch": 1.783809628894038,
"grad_norm": 0.006351064890623093,
"learning_rate": 1.3513160045774072e-05,
"loss": 0.0339,
"step": 12125
},
{
"epoch": 1.7874875868917577,
"grad_norm": 0.0033591645769774914,
"learning_rate": 1.3472290338401178e-05,
"loss": 0.003,
"step": 12150
},
{
"epoch": 1.7911655448894774,
"grad_norm": 0.003340468741953373,
"learning_rate": 1.3431420631028281e-05,
"loss": 0.0002,
"step": 12175
},
{
"epoch": 1.794843502887197,
"grad_norm": 0.12212031334638596,
"learning_rate": 1.3390550923655387e-05,
"loss": 0.0836,
"step": 12200
},
{
"epoch": 1.7985214608849167,
"grad_norm": 0.014243889600038528,
"learning_rate": 1.3349681216282492e-05,
"loss": 0.0318,
"step": 12225
},
{
"epoch": 1.8021994188826365,
"grad_norm": 0.016160359606146812,
"learning_rate": 1.3308811508909596e-05,
"loss": 0.0003,
"step": 12250
},
{
"epoch": 1.805877376880356,
"grad_norm": 0.011376752518117428,
"learning_rate": 1.3267941801536701e-05,
"loss": 0.0003,
"step": 12275
},
{
"epoch": 1.8095553348780757,
"grad_norm": 0.00865715742111206,
"learning_rate": 1.3227072094163807e-05,
"loss": 0.0133,
"step": 12300
},
{
"epoch": 1.8132332928757955,
"grad_norm": 0.007116909604519606,
"learning_rate": 1.3186202386790912e-05,
"loss": 0.0003,
"step": 12325
},
{
"epoch": 1.816911250873515,
"grad_norm": 0.008155121468007565,
"learning_rate": 1.3145332679418016e-05,
"loss": 0.0385,
"step": 12350
},
{
"epoch": 1.8205892088712345,
"grad_norm": 0.013204419054090977,
"learning_rate": 1.3104462972045121e-05,
"loss": 0.0356,
"step": 12375
},
{
"epoch": 1.8242671668689545,
"grad_norm": 0.013173281215131283,
"learning_rate": 1.3063593264672226e-05,
"loss": 0.0004,
"step": 12400
},
{
"epoch": 1.827945124866674,
"grad_norm": 0.010820701718330383,
"learning_rate": 1.302272355729933e-05,
"loss": 0.0003,
"step": 12425
},
{
"epoch": 1.8316230828643936,
"grad_norm": 0.00571137759834528,
"learning_rate": 1.2981853849926434e-05,
"loss": 0.0011,
"step": 12450
},
{
"epoch": 1.8353010408621133,
"grad_norm": 0.007815693505108356,
"learning_rate": 1.2940984142553539e-05,
"loss": 0.0002,
"step": 12475
},
{
"epoch": 1.838978998859833,
"grad_norm": 0.04561807960271835,
"learning_rate": 1.2900114435180645e-05,
"loss": 0.0002,
"step": 12500
},
{
"epoch": 1.8426569568575526,
"grad_norm": 0.007523215841501951,
"learning_rate": 1.2859244727807748e-05,
"loss": 0.0203,
"step": 12525
},
{
"epoch": 1.8463349148552723,
"grad_norm": 0.007975575514137745,
"learning_rate": 1.2818375020434854e-05,
"loss": 0.0002,
"step": 12550
},
{
"epoch": 1.850012872852992,
"grad_norm": 0.007269065361469984,
"learning_rate": 1.2777505313061959e-05,
"loss": 0.0002,
"step": 12575
},
{
"epoch": 1.8536908308507116,
"grad_norm": 0.004501336719840765,
"learning_rate": 1.2736635605689064e-05,
"loss": 0.0001,
"step": 12600
},
{
"epoch": 1.8573687888484314,
"grad_norm": 0.004011464770883322,
"learning_rate": 1.2695765898316168e-05,
"loss": 0.0003,
"step": 12625
},
{
"epoch": 1.8610467468461511,
"grad_norm": 0.002334051998332143,
"learning_rate": 1.2654896190943273e-05,
"loss": 0.0234,
"step": 12650
},
{
"epoch": 1.8647247048438707,
"grad_norm": 0.004475513007491827,
"learning_rate": 1.2614026483570379e-05,
"loss": 0.0002,
"step": 12675
},
{
"epoch": 1.8684026628415904,
"grad_norm": 0.003851409535855055,
"learning_rate": 1.2573156776197482e-05,
"loss": 0.0001,
"step": 12700
},
{
"epoch": 1.8720806208393101,
"grad_norm": 0.0028481779154390097,
"learning_rate": 1.2532287068824588e-05,
"loss": 0.0255,
"step": 12725
},
{
"epoch": 1.8757585788370297,
"grad_norm": 0.0030939916614443064,
"learning_rate": 1.2491417361451693e-05,
"loss": 0.0332,
"step": 12750
},
{
"epoch": 1.8794365368347492,
"grad_norm": 0.0065445504151284695,
"learning_rate": 1.2450547654078797e-05,
"loss": 0.0422,
"step": 12775
},
{
"epoch": 1.8831144948324692,
"grad_norm": 0.005459626670926809,
"learning_rate": 1.24096779467059e-05,
"loss": 0.0113,
"step": 12800
},
{
"epoch": 1.8867924528301887,
"grad_norm": 0.002942801220342517,
"learning_rate": 1.2368808239333006e-05,
"loss": 0.0002,
"step": 12825
},
{
"epoch": 1.8904704108279082,
"grad_norm": 0.0067766509018838406,
"learning_rate": 1.2327938531960111e-05,
"loss": 0.018,
"step": 12850
},
{
"epoch": 1.894148368825628,
"grad_norm": 0.005411918740719557,
"learning_rate": 1.2287068824587217e-05,
"loss": 0.0486,
"step": 12875
},
{
"epoch": 1.8978263268233477,
"grad_norm": 0.006009817123413086,
"learning_rate": 1.224619911721432e-05,
"loss": 0.0002,
"step": 12900
},
{
"epoch": 1.9015042848210673,
"grad_norm": 0.005595459137111902,
"learning_rate": 1.2205329409841426e-05,
"loss": 0.0408,
"step": 12925
},
{
"epoch": 1.905182242818787,
"grad_norm": 0.012987248599529266,
"learning_rate": 1.2164459702468531e-05,
"loss": 0.0823,
"step": 12950
},
{
"epoch": 1.9088602008165068,
"grad_norm": 0.16368244588375092,
"learning_rate": 1.2123589995095635e-05,
"loss": 0.0018,
"step": 12975
},
{
"epoch": 1.9125381588142263,
"grad_norm": 0.00949876382946968,
"learning_rate": 1.208272028772274e-05,
"loss": 0.0163,
"step": 13000
},
{
"epoch": 1.916216116811946,
"grad_norm": 14.246623039245605,
"learning_rate": 1.2041850580349846e-05,
"loss": 0.0342,
"step": 13025
},
{
"epoch": 1.9198940748096658,
"grad_norm": 0.00826562475413084,
"learning_rate": 1.2000980872976951e-05,
"loss": 0.0003,
"step": 13050
},
{
"epoch": 1.9235720328073853,
"grad_norm": 0.006868135649710894,
"learning_rate": 1.1960111165604055e-05,
"loss": 0.0003,
"step": 13075
},
{
"epoch": 1.9272499908051048,
"grad_norm": 0.00808362290263176,
"learning_rate": 1.191924145823116e-05,
"loss": 0.0402,
"step": 13100
},
{
"epoch": 1.9309279488028248,
"grad_norm": 0.010807299055159092,
"learning_rate": 1.1878371750858265e-05,
"loss": 0.078,
"step": 13125
},
{
"epoch": 1.9346059068005443,
"grad_norm": 0.01139509491622448,
"learning_rate": 1.1837502043485367e-05,
"loss": 0.0007,
"step": 13150
},
{
"epoch": 1.9382838647982639,
"grad_norm": 0.00977110955864191,
"learning_rate": 1.1796632336112473e-05,
"loss": 0.0004,
"step": 13175
},
{
"epoch": 1.9419618227959836,
"grad_norm": 0.006910570897161961,
"learning_rate": 1.1755762628739578e-05,
"loss": 0.0003,
"step": 13200
},
{
"epoch": 1.9456397807937034,
"grad_norm": 4.1620564460754395,
"learning_rate": 1.1714892921366683e-05,
"loss": 0.0667,
"step": 13225
},
{
"epoch": 1.949317738791423,
"grad_norm": 0.015238853171467781,
"learning_rate": 1.1674023213993787e-05,
"loss": 0.0015,
"step": 13250
},
{
"epoch": 1.9529956967891426,
"grad_norm": 0.007931707426905632,
"learning_rate": 1.1633153506620892e-05,
"loss": 0.0003,
"step": 13275
},
{
"epoch": 1.9566736547868624,
"grad_norm": 0.009560568258166313,
"learning_rate": 1.1592283799247998e-05,
"loss": 0.0003,
"step": 13300
},
{
"epoch": 1.960351612784582,
"grad_norm": 0.008578946813941002,
"learning_rate": 1.1551414091875103e-05,
"loss": 0.0008,
"step": 13325
},
{
"epoch": 1.9640295707823017,
"grad_norm": 0.011748207733035088,
"learning_rate": 1.1510544384502207e-05,
"loss": 0.0002,
"step": 13350
},
{
"epoch": 1.9677075287800214,
"grad_norm": 0.007073475047945976,
"learning_rate": 1.1469674677129312e-05,
"loss": 0.0002,
"step": 13375
},
{
"epoch": 1.971385486777741,
"grad_norm": 0.003219211706891656,
"learning_rate": 1.1428804969756418e-05,
"loss": 0.0323,
"step": 13400
},
{
"epoch": 1.9750634447754607,
"grad_norm": 0.0061137378215789795,
"learning_rate": 1.1387935262383521e-05,
"loss": 0.0002,
"step": 13425
},
{
"epoch": 1.9787414027731804,
"grad_norm": 0.006435078103095293,
"learning_rate": 1.1347065555010627e-05,
"loss": 0.0409,
"step": 13450
},
{
"epoch": 1.9824193607709,
"grad_norm": 0.002217411994934082,
"learning_rate": 1.1306195847637732e-05,
"loss": 0.0002,
"step": 13475
},
{
"epoch": 1.9860973187686195,
"grad_norm": 0.009155605919659138,
"learning_rate": 1.1265326140264837e-05,
"loss": 0.0487,
"step": 13500
},
{
"epoch": 1.9897752767663395,
"grad_norm": 0.011870177462697029,
"learning_rate": 1.122445643289194e-05,
"loss": 0.0004,
"step": 13525
},
{
"epoch": 1.993453234764059,
"grad_norm": 0.008746917359530926,
"learning_rate": 1.1183586725519045e-05,
"loss": 0.0411,
"step": 13550
},
{
"epoch": 1.9971311927617785,
"grad_norm": 0.005829541012644768,
"learning_rate": 1.114271701814615e-05,
"loss": 0.0003,
"step": 13575
},
{
"epoch": 1.9999264408400457,
"eval_accuracy": 0.9963221772710555,
"eval_auc": 0.9999360039904769,
"eval_f1": 0.9963186570460905,
"eval_loss": 0.023916827514767647,
"eval_precision": 0.9973466981132075,
"eval_recall": 0.9952927331568108,
"eval_runtime": 2353.5774,
"eval_samples_per_second": 5.776,
"eval_steps_per_second": 1.444,
"step": 13594
},
{
"epoch": 2.0008091507594985,
"grad_norm": 0.010372490622103214,
"learning_rate": 1.1101847310773254e-05,
"loss": 0.0003,
"step": 13600
},
{
"epoch": 2.004487108757218,
"grad_norm": 0.014902903698384762,
"learning_rate": 1.106097760340036e-05,
"loss": 0.0673,
"step": 13625
},
{
"epoch": 2.0081650667549376,
"grad_norm": 0.005416123196482658,
"learning_rate": 1.1020107896027465e-05,
"loss": 0.0003,
"step": 13650
},
{
"epoch": 2.0118430247526575,
"grad_norm": 0.007089643273502588,
"learning_rate": 1.097923818865457e-05,
"loss": 0.0003,
"step": 13675
},
{
"epoch": 2.015520982750377,
"grad_norm": 0.005935342982411385,
"learning_rate": 1.0938368481281674e-05,
"loss": 0.0004,
"step": 13700
},
{
"epoch": 2.0191989407480966,
"grad_norm": 0.004356461577117443,
"learning_rate": 1.0897498773908779e-05,
"loss": 0.0003,
"step": 13725
},
{
"epoch": 2.022876898745816,
"grad_norm": 0.010521539486944675,
"learning_rate": 1.0856629066535884e-05,
"loss": 0.0839,
"step": 13750
},
{
"epoch": 2.026554856743536,
"grad_norm": 0.007211623247712851,
"learning_rate": 1.081575935916299e-05,
"loss": 0.0215,
"step": 13775
},
{
"epoch": 2.0302328147412556,
"grad_norm": 0.008732822723686695,
"learning_rate": 1.0774889651790093e-05,
"loss": 0.0002,
"step": 13800
},
{
"epoch": 2.033910772738975,
"grad_norm": 0.005103670991957188,
"learning_rate": 1.0734019944417199e-05,
"loss": 0.0002,
"step": 13825
},
{
"epoch": 2.037588730736695,
"grad_norm": 0.00569286709651351,
"learning_rate": 1.0693150237044304e-05,
"loss": 0.0002,
"step": 13850
},
{
"epoch": 2.0412666887344146,
"grad_norm": 0.004690663423389196,
"learning_rate": 1.0652280529671408e-05,
"loss": 0.0002,
"step": 13875
},
{
"epoch": 2.044944646732134,
"grad_norm": 0.003813117044046521,
"learning_rate": 1.0611410822298512e-05,
"loss": 0.0001,
"step": 13900
},
{
"epoch": 2.048622604729854,
"grad_norm": 0.0031241225078701973,
"learning_rate": 1.0570541114925617e-05,
"loss": 0.0001,
"step": 13925
},
{
"epoch": 2.0523005627275737,
"grad_norm": 0.001760639250278473,
"learning_rate": 1.0529671407552722e-05,
"loss": 0.0003,
"step": 13950
},
{
"epoch": 2.055978520725293,
"grad_norm": 0.00507943844422698,
"learning_rate": 1.0488801700179826e-05,
"loss": 0.0465,
"step": 13975
},
{
"epoch": 2.059656478723013,
"grad_norm": 0.005704312119632959,
"learning_rate": 1.0447931992806931e-05,
"loss": 0.0002,
"step": 14000
},
{
"epoch": 2.0633344367207327,
"grad_norm": 0.0037137740291655064,
"learning_rate": 1.0407062285434037e-05,
"loss": 0.0002,
"step": 14025
},
{
"epoch": 2.067012394718452,
"grad_norm": 0.004969414323568344,
"learning_rate": 1.036619257806114e-05,
"loss": 0.0002,
"step": 14050
},
{
"epoch": 2.070690352716172,
"grad_norm": 0.002151261083781719,
"learning_rate": 1.0325322870688246e-05,
"loss": 0.0001,
"step": 14075
},
{
"epoch": 2.0743683107138917,
"grad_norm": 0.004214055370539427,
"learning_rate": 1.0284453163315351e-05,
"loss": 0.0001,
"step": 14100
},
{
"epoch": 2.0780462687116112,
"grad_norm": 0.004696809686720371,
"learning_rate": 1.0243583455942457e-05,
"loss": 0.0001,
"step": 14125
},
{
"epoch": 2.0817242267093308,
"grad_norm": 8.668023109436035,
"learning_rate": 1.020271374856956e-05,
"loss": 0.0642,
"step": 14150
},
{
"epoch": 2.0854021847070507,
"grad_norm": 0.00823593232780695,
"learning_rate": 1.0161844041196666e-05,
"loss": 0.0004,
"step": 14175
},
{
"epoch": 2.0890801427047703,
"grad_norm": 0.006173284724354744,
"learning_rate": 1.0120974333823771e-05,
"loss": 0.0002,
"step": 14200
},
{
"epoch": 2.09275810070249,
"grad_norm": 0.004422744270414114,
"learning_rate": 1.0080104626450876e-05,
"loss": 0.0002,
"step": 14225
},
{
"epoch": 2.0964360587002098,
"grad_norm": 0.0038796046283096075,
"learning_rate": 1.0039234919077978e-05,
"loss": 0.0002,
"step": 14250
},
{
"epoch": 2.1001140166979293,
"grad_norm": 0.003889993764460087,
"learning_rate": 9.998365211705084e-06,
"loss": 0.0008,
"step": 14275
},
{
"epoch": 2.103791974695649,
"grad_norm": 0.0035641242284327745,
"learning_rate": 9.957495504332189e-06,
"loss": 0.0001,
"step": 14300
},
{
"epoch": 2.107469932693369,
"grad_norm": 0.0037507452070713043,
"learning_rate": 9.916625796959293e-06,
"loss": 0.0001,
"step": 14325
},
{
"epoch": 2.1111478906910883,
"grad_norm": 0.002810309175401926,
"learning_rate": 9.875756089586398e-06,
"loss": 0.0001,
"step": 14350
},
{
"epoch": 2.114825848688808,
"grad_norm": 0.0030445558950304985,
"learning_rate": 9.834886382213504e-06,
"loss": 0.0001,
"step": 14375
},
{
"epoch": 2.118503806686528,
"grad_norm": 0.0025213556364178658,
"learning_rate": 9.794016674840609e-06,
"loss": 0.0001,
"step": 14400
},
{
"epoch": 2.1221817646842474,
"grad_norm": 0.0027236223686486483,
"learning_rate": 9.753146967467713e-06,
"loss": 0.0001,
"step": 14425
},
{
"epoch": 2.125859722681967,
"grad_norm": 0.002416795352473855,
"learning_rate": 9.712277260094818e-06,
"loss": 0.0004,
"step": 14450
},
{
"epoch": 2.129537680679687,
"grad_norm": 0.0019158340292051435,
"learning_rate": 9.671407552721923e-06,
"loss": 0.0001,
"step": 14475
},
{
"epoch": 2.1332156386774064,
"grad_norm": 0.002519650151953101,
"learning_rate": 9.630537845349029e-06,
"loss": 0.0001,
"step": 14500
},
{
"epoch": 2.136893596675126,
"grad_norm": 0.002294061239808798,
"learning_rate": 9.589668137976132e-06,
"loss": 0.0001,
"step": 14525
},
{
"epoch": 2.1405715546728454,
"grad_norm": 0.0021358055528253317,
"learning_rate": 9.548798430603238e-06,
"loss": 0.0471,
"step": 14550
},
{
"epoch": 2.1442495126705654,
"grad_norm": 0.001824073726311326,
"learning_rate": 9.507928723230343e-06,
"loss": 0.0001,
"step": 14575
},
{
"epoch": 2.147927470668285,
"grad_norm": 0.001960406079888344,
"learning_rate": 9.467059015857447e-06,
"loss": 0.0001,
"step": 14600
},
{
"epoch": 2.1516054286660045,
"grad_norm": 0.0018290438456460834,
"learning_rate": 9.42618930848455e-06,
"loss": 0.0001,
"step": 14625
},
{
"epoch": 2.1552833866637244,
"grad_norm": 0.0019052918069064617,
"learning_rate": 9.385319601111656e-06,
"loss": 0.0001,
"step": 14650
},
{
"epoch": 2.158961344661444,
"grad_norm": 0.0018661071080714464,
"learning_rate": 9.344449893738761e-06,
"loss": 0.0001,
"step": 14675
},
{
"epoch": 2.1626393026591635,
"grad_norm": 0.0031746248714625835,
"learning_rate": 9.303580186365865e-06,
"loss": 0.049,
"step": 14700
},
{
"epoch": 2.1663172606568835,
"grad_norm": 0.003573804395273328,
"learning_rate": 9.26271047899297e-06,
"loss": 0.0001,
"step": 14725
},
{
"epoch": 2.169995218654603,
"grad_norm": 0.003289070213213563,
"learning_rate": 9.221840771620076e-06,
"loss": 0.0113,
"step": 14750
},
{
"epoch": 2.1736731766523225,
"grad_norm": 0.00257130921818316,
"learning_rate": 9.18097106424718e-06,
"loss": 0.0483,
"step": 14775
},
{
"epoch": 2.1773511346500425,
"grad_norm": 0.005980730522423983,
"learning_rate": 9.140101356874285e-06,
"loss": 0.0002,
"step": 14800
},
{
"epoch": 2.181029092647762,
"grad_norm": 0.005953842308372259,
"learning_rate": 9.09923164950139e-06,
"loss": 0.0002,
"step": 14825
},
{
"epoch": 2.1847070506454815,
"grad_norm": 0.037090156227350235,
"learning_rate": 9.058361942128496e-06,
"loss": 0.0785,
"step": 14850
},
{
"epoch": 2.188385008643201,
"grad_norm": 0.007919345051050186,
"learning_rate": 9.0174922347556e-06,
"loss": 0.0006,
"step": 14875
},
{
"epoch": 2.192062966640921,
"grad_norm": 0.021819893270730972,
"learning_rate": 8.976622527382705e-06,
"loss": 0.0376,
"step": 14900
},
{
"epoch": 2.1957409246386406,
"grad_norm": 0.024493372067809105,
"learning_rate": 8.93575282000981e-06,
"loss": 0.0439,
"step": 14925
},
{
"epoch": 2.19941888263636,
"grad_norm": 0.038370776921510696,
"learning_rate": 8.894883112636915e-06,
"loss": 0.0802,
"step": 14950
},
{
"epoch": 2.20309684063408,
"grad_norm": 0.019332151859998703,
"learning_rate": 8.854013405264019e-06,
"loss": 0.0012,
"step": 14975
},
{
"epoch": 2.2067747986317996,
"grad_norm": 0.03362823650240898,
"learning_rate": 8.813143697891123e-06,
"loss": 0.0369,
"step": 15000
},
{
"epoch": 2.210452756629519,
"grad_norm": 0.024772603064775467,
"learning_rate": 8.772273990518228e-06,
"loss": 0.0008,
"step": 15025
},
{
"epoch": 2.214130714627239,
"grad_norm": 0.02276591770350933,
"learning_rate": 8.731404283145332e-06,
"loss": 0.1007,
"step": 15050
},
{
"epoch": 2.2178086726249586,
"grad_norm": 0.016099456697702408,
"learning_rate": 8.690534575772437e-06,
"loss": 0.0009,
"step": 15075
},
{
"epoch": 2.221486630622678,
"grad_norm": 0.003277967683970928,
"learning_rate": 8.649664868399542e-06,
"loss": 0.0069,
"step": 15100
},
{
"epoch": 2.225164588620398,
"grad_norm": 0.011233772151172161,
"learning_rate": 8.608795161026648e-06,
"loss": 0.0386,
"step": 15125
},
{
"epoch": 2.2288425466181176,
"grad_norm": 0.007455474231392145,
"learning_rate": 8.567925453653752e-06,
"loss": 0.0003,
"step": 15150
},
{
"epoch": 2.232520504615837,
"grad_norm": 0.011497107334434986,
"learning_rate": 8.527055746280857e-06,
"loss": 0.0004,
"step": 15175
},
{
"epoch": 2.236198462613557,
"grad_norm": 0.003145186696201563,
"learning_rate": 8.486186038907962e-06,
"loss": 0.0003,
"step": 15200
},
{
"epoch": 2.2398764206112767,
"grad_norm": 0.00954380352050066,
"learning_rate": 8.445316331535066e-06,
"loss": 0.0595,
"step": 15225
},
{
"epoch": 2.243554378608996,
"grad_norm": 0.007323611527681351,
"learning_rate": 8.404446624162171e-06,
"loss": 0.0004,
"step": 15250
},
{
"epoch": 2.247232336606716,
"grad_norm": 0.011944909580051899,
"learning_rate": 8.363576916789277e-06,
"loss": 0.0003,
"step": 15275
},
{
"epoch": 2.2509102946044357,
"grad_norm": 0.01304931566119194,
"learning_rate": 8.322707209416382e-06,
"loss": 0.0389,
"step": 15300
},
{
"epoch": 2.2545882526021552,
"grad_norm": 0.008787041530013084,
"learning_rate": 8.281837502043486e-06,
"loss": 0.0004,
"step": 15325
},
{
"epoch": 2.2582662105998748,
"grad_norm": 0.011969480663537979,
"learning_rate": 8.24096779467059e-06,
"loss": 0.0004,
"step": 15350
},
{
"epoch": 2.2619441685975947,
"grad_norm": 0.011229045689105988,
"learning_rate": 8.200098087297695e-06,
"loss": 0.0003,
"step": 15375
},
{
"epoch": 2.2656221265953143,
"grad_norm": 0.00922977551817894,
"learning_rate": 8.1592283799248e-06,
"loss": 0.0004,
"step": 15400
},
{
"epoch": 2.269300084593034,
"grad_norm": 0.008094431832432747,
"learning_rate": 8.118358672551904e-06,
"loss": 0.0003,
"step": 15425
},
{
"epoch": 2.2729780425907538,
"grad_norm": 0.0032492594327777624,
"learning_rate": 8.07748896517901e-06,
"loss": 0.0002,
"step": 15450
},
{
"epoch": 2.2766560005884733,
"grad_norm": 0.004196746740490198,
"learning_rate": 8.036619257806115e-06,
"loss": 0.0002,
"step": 15475
},
{
"epoch": 2.280333958586193,
"grad_norm": 0.005214506760239601,
"learning_rate": 7.995749550433218e-06,
"loss": 0.0002,
"step": 15500
},
{
"epoch": 2.284011916583913,
"grad_norm": 0.0034893976990133524,
"learning_rate": 7.954879843060324e-06,
"loss": 0.0002,
"step": 15525
},
{
"epoch": 2.2876898745816323,
"grad_norm": 0.0036745897959917784,
"learning_rate": 7.914010135687429e-06,
"loss": 0.0002,
"step": 15550
},
{
"epoch": 2.291367832579352,
"grad_norm": 0.0020664865151047707,
"learning_rate": 7.873140428314534e-06,
"loss": 0.0001,
"step": 15575
},
{
"epoch": 2.2950457905770714,
"grad_norm": 0.005072563886642456,
"learning_rate": 7.832270720941638e-06,
"loss": 0.0417,
"step": 15600
},
{
"epoch": 2.2987237485747913,
"grad_norm": 0.004465815611183643,
"learning_rate": 7.791401013568743e-06,
"loss": 0.0002,
"step": 15625
},
{
"epoch": 2.302401706572511,
"grad_norm": 0.005166616756469011,
"learning_rate": 7.750531306195849e-06,
"loss": 0.016,
"step": 15650
},
{
"epoch": 2.3060796645702304,
"grad_norm": 0.0010274857049807906,
"learning_rate": 7.709661598822953e-06,
"loss": 0.0002,
"step": 15675
},
{
"epoch": 2.3097576225679504,
"grad_norm": 0.006900500506162643,
"learning_rate": 7.668791891450058e-06,
"loss": 0.0002,
"step": 15700
},
{
"epoch": 2.31343558056567,
"grad_norm": 0.004663816653192043,
"learning_rate": 7.6279221840771624e-06,
"loss": 0.0001,
"step": 15725
},
{
"epoch": 2.3171135385633894,
"grad_norm": 0.006946474779397249,
"learning_rate": 7.587052476704268e-06,
"loss": 0.0001,
"step": 15750
},
{
"epoch": 2.3207914965611094,
"grad_norm": 0.003868917003273964,
"learning_rate": 7.5461827693313715e-06,
"loss": 0.0342,
"step": 15775
},
{
"epoch": 2.324469454558829,
"grad_norm": 0.0028817090205848217,
"learning_rate": 7.505313061958477e-06,
"loss": 0.0138,
"step": 15800
},
{
"epoch": 2.3281474125565484,
"grad_norm": 0.0059151784516870975,
"learning_rate": 7.464443354585581e-06,
"loss": 0.0733,
"step": 15825
},
{
"epoch": 2.3318253705542684,
"grad_norm": 0.004359770100563765,
"learning_rate": 7.423573647212686e-06,
"loss": 0.0421,
"step": 15850
},
{
"epoch": 2.335503328551988,
"grad_norm": 0.011809108778834343,
"learning_rate": 7.3827039398397904e-06,
"loss": 0.0003,
"step": 15875
},
{
"epoch": 2.3391812865497075,
"grad_norm": 0.005823772866278887,
"learning_rate": 7.341834232466896e-06,
"loss": 0.0003,
"step": 15900
},
{
"epoch": 2.3428592445474274,
"grad_norm": 0.003460386535152793,
"learning_rate": 7.300964525094e-06,
"loss": 0.0002,
"step": 15925
},
{
"epoch": 2.346537202545147,
"grad_norm": 0.008056416176259518,
"learning_rate": 7.260094817721106e-06,
"loss": 0.0381,
"step": 15950
},
{
"epoch": 2.3502151605428665,
"grad_norm": 0.007788171526044607,
"learning_rate": 7.21922511034821e-06,
"loss": 0.0002,
"step": 15975
},
{
"epoch": 2.3538931185405865,
"grad_norm": 0.0066045369021594524,
"learning_rate": 7.178355402975315e-06,
"loss": 0.0002,
"step": 16000
},
{
"epoch": 2.357571076538306,
"grad_norm": 0.004805906675755978,
"learning_rate": 7.137485695602419e-06,
"loss": 0.0053,
"step": 16025
},
{
"epoch": 2.3612490345360255,
"grad_norm": 0.010813217610120773,
"learning_rate": 7.096615988229525e-06,
"loss": 0.0381,
"step": 16050
},
{
"epoch": 2.3649269925337455,
"grad_norm": 0.009302555583417416,
"learning_rate": 7.055746280856629e-06,
"loss": 0.0393,
"step": 16075
},
{
"epoch": 2.368604950531465,
"grad_norm": 0.011496507562696934,
"learning_rate": 7.014876573483734e-06,
"loss": 0.0386,
"step": 16100
},
{
"epoch": 2.3722829085291846,
"grad_norm": 0.025231193751096725,
"learning_rate": 6.974006866110839e-06,
"loss": 0.0367,
"step": 16125
},
{
"epoch": 2.375960866526904,
"grad_norm": 0.020235830917954445,
"learning_rate": 6.933137158737944e-06,
"loss": 0.0006,
"step": 16150
},
{
"epoch": 2.379638824524624,
"grad_norm": 0.006687480956315994,
"learning_rate": 6.892267451365048e-06,
"loss": 0.0004,
"step": 16175
},
{
"epoch": 2.3833167825223436,
"grad_norm": 0.003918817732483149,
"learning_rate": 6.851397743992153e-06,
"loss": 0.0003,
"step": 16200
},
{
"epoch": 2.386994740520063,
"grad_norm": 0.011175381019711494,
"learning_rate": 6.810528036619258e-06,
"loss": 0.0003,
"step": 16225
},
{
"epoch": 2.390672698517783,
"grad_norm": 0.007755937986075878,
"learning_rate": 6.769658329246363e-06,
"loss": 0.0002,
"step": 16250
},
{
"epoch": 2.3943506565155026,
"grad_norm": 0.004887331277132034,
"learning_rate": 6.728788621873468e-06,
"loss": 0.0002,
"step": 16275
},
{
"epoch": 2.398028614513222,
"grad_norm": 0.0048552751541137695,
"learning_rate": 6.6879189145005725e-06,
"loss": 0.0002,
"step": 16300
},
{
"epoch": 2.401706572510942,
"grad_norm": 0.011255592107772827,
"learning_rate": 6.647049207127677e-06,
"loss": 0.0002,
"step": 16325
},
{
"epoch": 2.4053845305086616,
"grad_norm": 0.009114415384829044,
"learning_rate": 6.6061794997547816e-06,
"loss": 0.0002,
"step": 16350
},
{
"epoch": 2.409062488506381,
"grad_norm": 0.009386932477355003,
"learning_rate": 6.565309792381886e-06,
"loss": 0.0395,
"step": 16375
},
{
"epoch": 2.4127404465041007,
"grad_norm": 0.005927698221057653,
"learning_rate": 6.5244400850089915e-06,
"loss": 0.0002,
"step": 16400
},
{
"epoch": 2.4164184045018207,
"grad_norm": 0.0084453159943223,
"learning_rate": 6.483570377636096e-06,
"loss": 0.0506,
"step": 16425
},
{
"epoch": 2.42009636249954,
"grad_norm": 0.008083072490990162,
"learning_rate": 6.442700670263201e-06,
"loss": 0.0003,
"step": 16450
},
{
"epoch": 2.4237743204972597,
"grad_norm": 0.00735598336905241,
"learning_rate": 6.401830962890306e-06,
"loss": 0.0003,
"step": 16475
},
{
"epoch": 2.4274522784949797,
"grad_norm": 0.007824303582310677,
"learning_rate": 6.360961255517411e-06,
"loss": 0.0398,
"step": 16500
},
{
"epoch": 2.431130236492699,
"grad_norm": 0.009155460633337498,
"learning_rate": 6.320091548144516e-06,
"loss": 0.0003,
"step": 16525
},
{
"epoch": 2.4348081944904187,
"grad_norm": 0.005739257670938969,
"learning_rate": 6.27922184077162e-06,
"loss": 0.0003,
"step": 16550
},
{
"epoch": 2.4384861524881387,
"grad_norm": 0.006940542254596949,
"learning_rate": 6.238352133398725e-06,
"loss": 0.0003,
"step": 16575
},
{
"epoch": 2.4421641104858582,
"grad_norm": 0.0053449515253305435,
"learning_rate": 6.197482426025829e-06,
"loss": 0.0002,
"step": 16600
},
{
"epoch": 2.4458420684835778,
"grad_norm": 0.005325790494680405,
"learning_rate": 6.156612718652935e-06,
"loss": 0.0002,
"step": 16625
},
{
"epoch": 2.4495200264812977,
"grad_norm": 0.006259521469473839,
"learning_rate": 6.115743011280039e-06,
"loss": 0.0002,
"step": 16650
},
{
"epoch": 2.4531979844790173,
"grad_norm": 0.006854058708995581,
"learning_rate": 6.074873303907145e-06,
"loss": 0.0002,
"step": 16675
},
{
"epoch": 2.456875942476737,
"grad_norm": 0.004361658822745085,
"learning_rate": 6.034003596534249e-06,
"loss": 0.0002,
"step": 16700
},
{
"epoch": 2.4605539004744568,
"grad_norm": 0.0055083055049180984,
"learning_rate": 5.993133889161354e-06,
"loss": 0.0002,
"step": 16725
},
{
"epoch": 2.4642318584721763,
"grad_norm": 0.0033617918379604816,
"learning_rate": 5.952264181788458e-06,
"loss": 0.0002,
"step": 16750
},
{
"epoch": 2.467909816469896,
"grad_norm": 0.0048737069591879845,
"learning_rate": 5.911394474415564e-06,
"loss": 0.0001,
"step": 16775
},
{
"epoch": 2.471587774467616,
"grad_norm": 0.0036280914209783077,
"learning_rate": 5.870524767042668e-06,
"loss": 0.0001,
"step": 16800
},
{
"epoch": 2.4752657324653353,
"grad_norm": 0.003542742459103465,
"learning_rate": 5.829655059669773e-06,
"loss": 0.0001,
"step": 16825
},
{
"epoch": 2.478943690463055,
"grad_norm": 0.004226271994411945,
"learning_rate": 5.788785352296878e-06,
"loss": 0.0001,
"step": 16850
},
{
"epoch": 2.482621648460775,
"grad_norm": 0.0033333373721688986,
"learning_rate": 5.7479156449239826e-06,
"loss": 0.0001,
"step": 16875
},
{
"epoch": 2.4862996064584943,
"grad_norm": 0.003888545325025916,
"learning_rate": 5.707045937551087e-06,
"loss": 0.0001,
"step": 16900
},
{
"epoch": 2.489977564456214,
"grad_norm": 0.0031992702279239893,
"learning_rate": 5.666176230178192e-06,
"loss": 0.0001,
"step": 16925
},
{
"epoch": 2.4936555224539334,
"grad_norm": 0.0026705926284193993,
"learning_rate": 5.625306522805297e-06,
"loss": 0.0001,
"step": 16950
},
{
"epoch": 2.4973334804516534,
"grad_norm": 0.001754347002133727,
"learning_rate": 5.5844368154324015e-06,
"loss": 0.0001,
"step": 16975
},
{
"epoch": 2.501011438449373,
"grad_norm": 0.0018643263028934598,
"learning_rate": 5.543567108059507e-06,
"loss": 0.0006,
"step": 17000
},
{
"epoch": 2.5046893964470924,
"grad_norm": 0.002491478342562914,
"learning_rate": 5.502697400686611e-06,
"loss": 0.0001,
"step": 17025
},
{
"epoch": 2.5083673544448124,
"grad_norm": 0.002735487651079893,
"learning_rate": 5.461827693313716e-06,
"loss": 0.0001,
"step": 17050
},
{
"epoch": 2.512045312442532,
"grad_norm": 0.002121156081557274,
"learning_rate": 5.420957985940821e-06,
"loss": 0.0013,
"step": 17075
},
{
"epoch": 2.5157232704402515,
"grad_norm": 0.001368986559100449,
"learning_rate": 5.380088278567925e-06,
"loss": 0.0001,
"step": 17100
},
{
"epoch": 2.519401228437971,
"grad_norm": 0.0018654069863259792,
"learning_rate": 5.33921857119503e-06,
"loss": 0.0001,
"step": 17125
},
{
"epoch": 2.523079186435691,
"grad_norm": 0.0008688032394275069,
"learning_rate": 5.298348863822135e-06,
"loss": 0.0001,
"step": 17150
},
{
"epoch": 2.5267571444334105,
"grad_norm": 0.0014730022521689534,
"learning_rate": 5.25747915644924e-06,
"loss": 0.0001,
"step": 17175
},
{
"epoch": 2.53043510243113,
"grad_norm": 589.290283203125,
"learning_rate": 5.216609449076345e-06,
"loss": 0.0295,
"step": 17200
},
{
"epoch": 2.53411306042885,
"grad_norm": 0.0014689558884128928,
"learning_rate": 5.17573974170345e-06,
"loss": 0.0,
"step": 17225
},
{
"epoch": 2.5377910184265695,
"grad_norm": 0.001330269267782569,
"learning_rate": 5.134870034330555e-06,
"loss": 0.0,
"step": 17250
},
{
"epoch": 2.541468976424289,
"grad_norm": 0.001491030678153038,
"learning_rate": 5.094000326957658e-06,
"loss": 0.0,
"step": 17275
},
{
"epoch": 2.545146934422009,
"grad_norm": 0.002089619869366288,
"learning_rate": 5.053130619584764e-06,
"loss": 0.0778,
"step": 17300
},
{
"epoch": 2.5488248924197285,
"grad_norm": 0.0015247270930558443,
"learning_rate": 5.012260912211868e-06,
"loss": 0.0188,
"step": 17325
},
{
"epoch": 2.552502850417448,
"grad_norm": 0.002242110203951597,
"learning_rate": 4.971391204838974e-06,
"loss": 0.0179,
"step": 17350
},
{
"epoch": 2.556180808415168,
"grad_norm": 0.0018629367696121335,
"learning_rate": 4.930521497466078e-06,
"loss": 0.0181,
"step": 17375
},
{
"epoch": 2.5598587664128876,
"grad_norm": 0.0014634733088314533,
"learning_rate": 4.8896517900931836e-06,
"loss": 0.0328,
"step": 17400
},
{
"epoch": 2.563536724410607,
"grad_norm": 0.001321232644841075,
"learning_rate": 4.848782082720288e-06,
"loss": 0.0,
"step": 17425
},
{
"epoch": 2.567214682408327,
"grad_norm": 0.0012456915574148297,
"learning_rate": 4.807912375347393e-06,
"loss": 0.0003,
"step": 17450
},
{
"epoch": 2.5708926404060466,
"grad_norm": 0.0009979073656722903,
"learning_rate": 4.767042667974497e-06,
"loss": 0.0001,
"step": 17475
},
{
"epoch": 2.574570598403766,
"grad_norm": 0.001377744134515524,
"learning_rate": 4.726172960601602e-06,
"loss": 0.0,
"step": 17500
},
{
"epoch": 2.578248556401486,
"grad_norm": 0.0022715404629707336,
"learning_rate": 4.685303253228707e-06,
"loss": 0.0498,
"step": 17525
},
{
"epoch": 2.5819265143992056,
"grad_norm": 0.002307375194504857,
"learning_rate": 4.644433545855812e-06,
"loss": 0.0001,
"step": 17550
},
{
"epoch": 2.585604472396925,
"grad_norm": 0.002744297729805112,
"learning_rate": 4.603563838482917e-06,
"loss": 0.0444,
"step": 17575
},
{
"epoch": 2.589282430394645,
"grad_norm": 0.004225959535688162,
"learning_rate": 4.5626941311100215e-06,
"loss": 0.0148,
"step": 17600
},
{
"epoch": 2.5929603883923646,
"grad_norm": 0.0028173536993563175,
"learning_rate": 4.521824423737127e-06,
"loss": 0.0033,
"step": 17625
},
{
"epoch": 2.596638346390084,
"grad_norm": 0.00215067807585001,
"learning_rate": 4.4809547163642305e-06,
"loss": 0.0001,
"step": 17650
},
{
"epoch": 2.600316304387804,
"grad_norm": 0.004402931313961744,
"learning_rate": 4.440085008991336e-06,
"loss": 0.0001,
"step": 17675
},
{
"epoch": 2.6039942623855237,
"grad_norm": 0.0019863785710185766,
"learning_rate": 4.3992153016184404e-06,
"loss": 0.0001,
"step": 17700
},
{
"epoch": 2.607672220383243,
"grad_norm": 0.0032948977313935757,
"learning_rate": 4.358345594245545e-06,
"loss": 0.0001,
"step": 17725
},
{
"epoch": 2.6113501783809627,
"grad_norm": 0.0017591605428606272,
"learning_rate": 4.31747588687265e-06,
"loss": 0.0001,
"step": 17750
},
{
"epoch": 2.6150281363786827,
"grad_norm": 0.5669000148773193,
"learning_rate": 4.276606179499755e-06,
"loss": 0.0002,
"step": 17775
},
{
"epoch": 2.6187060943764022,
"grad_norm": 0.0018617259338498116,
"learning_rate": 4.23573647212686e-06,
"loss": 0.044,
"step": 17800
},
{
"epoch": 2.6223840523741218,
"grad_norm": 0.004173843190073967,
"learning_rate": 4.194866764753964e-06,
"loss": 0.0001,
"step": 17825
},
{
"epoch": 2.6260620103718413,
"grad_norm": 0.005529914982616901,
"learning_rate": 4.153997057381069e-06,
"loss": 0.0001,
"step": 17850
},
{
"epoch": 2.6297399683695613,
"grad_norm": 0.003100366098806262,
"learning_rate": 4.113127350008174e-06,
"loss": 0.0001,
"step": 17875
},
{
"epoch": 2.633417926367281,
"grad_norm": 0.0017961232224479318,
"learning_rate": 4.072257642635279e-06,
"loss": 0.012,
"step": 17900
},
{
"epoch": 2.6370958843650003,
"grad_norm": 0.0022237550001591444,
"learning_rate": 4.031387935262384e-06,
"loss": 0.0001,
"step": 17925
},
{
"epoch": 2.6407738423627203,
"grad_norm": 0.002973005408421159,
"learning_rate": 3.990518227889488e-06,
"loss": 0.0438,
"step": 17950
},
{
"epoch": 2.64445180036044,
"grad_norm": 0.003434759797528386,
"learning_rate": 3.949648520516594e-06,
"loss": 0.0003,
"step": 17975
},
{
"epoch": 2.6481297583581593,
"grad_norm": 0.003463399363681674,
"learning_rate": 3.908778813143697e-06,
"loss": 0.0001,
"step": 18000
},
{
"epoch": 2.6518077163558793,
"grad_norm": 0.003393635619431734,
"learning_rate": 3.867909105770803e-06,
"loss": 0.0002,
"step": 18025
},
{
"epoch": 2.655485674353599,
"grad_norm": 0.0027733049355447292,
"learning_rate": 3.827039398397907e-06,
"loss": 0.0001,
"step": 18050
},
{
"epoch": 2.6591636323513184,
"grad_norm": 0.0038054571487009525,
"learning_rate": 3.7861696910250126e-06,
"loss": 0.0001,
"step": 18075
},
{
"epoch": 2.6628415903490383,
"grad_norm": 0.0029823731165379286,
"learning_rate": 3.745299983652117e-06,
"loss": 0.0001,
"step": 18100
},
{
"epoch": 2.666519548346758,
"grad_norm": 0.0019862265326082706,
"learning_rate": 3.704430276279222e-06,
"loss": 0.0001,
"step": 18125
},
{
"epoch": 2.6701975063444774,
"grad_norm": 0.003500757971778512,
"learning_rate": 3.6635605689063266e-06,
"loss": 0.0001,
"step": 18150
},
{
"epoch": 2.6738754643421974,
"grad_norm": 0.002085187705233693,
"learning_rate": 3.6226908615334315e-06,
"loss": 0.0001,
"step": 18175
},
{
"epoch": 2.677553422339917,
"grad_norm": 0.0023257972206920385,
"learning_rate": 3.5818211541605365e-06,
"loss": 0.0001,
"step": 18200
},
{
"epoch": 2.6812313803376364,
"grad_norm": 0.0022203666158020496,
"learning_rate": 3.5409514467876414e-06,
"loss": 0.0001,
"step": 18225
},
{
"epoch": 2.6849093383353564,
"grad_norm": 0.0012388962786644697,
"learning_rate": 3.500081739414746e-06,
"loss": 0.0,
"step": 18250
},
{
"epoch": 2.688587296333076,
"grad_norm": 0.0008910479955375195,
"learning_rate": 3.4592120320418505e-06,
"loss": 0.0003,
"step": 18275
},
{
"epoch": 2.6922652543307954,
"grad_norm": 0.0010503758676350117,
"learning_rate": 3.4183423246689554e-06,
"loss": 0.0,
"step": 18300
},
{
"epoch": 2.6959432123285154,
"grad_norm": 0.000730241066776216,
"learning_rate": 3.37747261729606e-06,
"loss": 0.0001,
"step": 18325
},
{
"epoch": 2.699621170326235,
"grad_norm": 0.000822307774797082,
"learning_rate": 3.336602909923165e-06,
"loss": 0.0,
"step": 18350
},
{
"epoch": 2.7032991283239545,
"grad_norm": 1.4722820520401,
"learning_rate": 3.29573320255027e-06,
"loss": 0.1083,
"step": 18375
},
{
"epoch": 2.7069770863216744,
"grad_norm": 0.004885438829660416,
"learning_rate": 3.254863495177375e-06,
"loss": 0.0002,
"step": 18400
},
{
"epoch": 2.710655044319394,
"grad_norm": 0.0033965399488806725,
"learning_rate": 3.2139937878044794e-06,
"loss": 0.0001,
"step": 18425
},
{
"epoch": 2.7143330023171135,
"grad_norm": 0.004250906407833099,
"learning_rate": 3.1731240804315843e-06,
"loss": 0.0231,
"step": 18450
},
{
"epoch": 2.7180109603148335,
"grad_norm": 0.003409018972888589,
"learning_rate": 3.1322543730586893e-06,
"loss": 0.0002,
"step": 18475
},
{
"epoch": 2.721688918312553,
"grad_norm": 0.0036356241907924414,
"learning_rate": 3.0913846656857938e-06,
"loss": 0.0409,
"step": 18500
},
{
"epoch": 2.7253668763102725,
"grad_norm": 0.006237304303795099,
"learning_rate": 3.0505149583128983e-06,
"loss": 0.0386,
"step": 18525
},
{
"epoch": 2.729044834307992,
"grad_norm": 0.006783687509596348,
"learning_rate": 3.0096452509400033e-06,
"loss": 0.0002,
"step": 18550
},
{
"epoch": 2.732722792305712,
"grad_norm": 0.04287054389715195,
"learning_rate": 2.9687755435671082e-06,
"loss": 0.0321,
"step": 18575
},
{
"epoch": 2.7364007503034316,
"grad_norm": 0.0038001120556145906,
"learning_rate": 2.9279058361942127e-06,
"loss": 0.0003,
"step": 18600
},
{
"epoch": 2.740078708301151,
"grad_norm": 0.003841620171442628,
"learning_rate": 2.8870361288213177e-06,
"loss": 0.0001,
"step": 18625
},
{
"epoch": 2.7437566662988706,
"grad_norm": 0.002676568925380707,
"learning_rate": 2.8461664214484226e-06,
"loss": 0.0001,
"step": 18650
},
{
"epoch": 2.7474346242965906,
"grad_norm": 0.008307211101055145,
"learning_rate": 2.8052967140755276e-06,
"loss": 0.0001,
"step": 18675
},
{
"epoch": 2.75111258229431,
"grad_norm": 0.0034743708092719316,
"learning_rate": 2.764427006702632e-06,
"loss": 0.0001,
"step": 18700
},
{
"epoch": 2.7547905402920296,
"grad_norm": 0.0020617684349417686,
"learning_rate": 2.7235572993297367e-06,
"loss": 0.0001,
"step": 18725
},
{
"epoch": 2.7584684982897496,
"grad_norm": 0.0017286173533648252,
"learning_rate": 2.6826875919568416e-06,
"loss": 0.0001,
"step": 18750
},
{
"epoch": 2.762146456287469,
"grad_norm": 0.001774169155396521,
"learning_rate": 2.6418178845839466e-06,
"loss": 0.0001,
"step": 18775
},
{
"epoch": 2.7658244142851887,
"grad_norm": 0.003061393741518259,
"learning_rate": 2.600948177211051e-06,
"loss": 0.0298,
"step": 18800
},
{
"epoch": 2.7695023722829086,
"grad_norm": 0.00195386353880167,
"learning_rate": 2.560078469838156e-06,
"loss": 0.0001,
"step": 18825
},
{
"epoch": 2.773180330280628,
"grad_norm": 0.0015053004026412964,
"learning_rate": 2.519208762465261e-06,
"loss": 0.0001,
"step": 18850
},
{
"epoch": 2.7768582882783477,
"grad_norm": 0.002827111864462495,
"learning_rate": 2.4783390550923655e-06,
"loss": 0.0001,
"step": 18875
},
{
"epoch": 2.7805362462760677,
"grad_norm": 0.0010932940058410168,
"learning_rate": 2.4374693477194705e-06,
"loss": 0.0001,
"step": 18900
},
{
"epoch": 2.784214204273787,
"grad_norm": 7.858973026275635,
"learning_rate": 2.3965996403465754e-06,
"loss": 0.0468,
"step": 18925
},
{
"epoch": 2.7878921622715067,
"grad_norm": 0.002107949461787939,
"learning_rate": 2.35572993297368e-06,
"loss": 0.0001,
"step": 18950
},
{
"epoch": 2.7915701202692267,
"grad_norm": 0.001860212185420096,
"learning_rate": 2.3148602256007845e-06,
"loss": 0.0001,
"step": 18975
},
{
"epoch": 2.795248078266946,
"grad_norm": 0.002180658746510744,
"learning_rate": 2.2739905182278894e-06,
"loss": 0.0001,
"step": 19000
},
{
"epoch": 2.7989260362646657,
"grad_norm": 0.001684672199189663,
"learning_rate": 2.2331208108549944e-06,
"loss": 0.0001,
"step": 19025
},
{
"epoch": 2.8026039942623857,
"grad_norm": 0.0015821090200915933,
"learning_rate": 2.1922511034820993e-06,
"loss": 0.0001,
"step": 19050
},
{
"epoch": 2.8062819522601052,
"grad_norm": 0.0031413165852427483,
"learning_rate": 2.151381396109204e-06,
"loss": 0.0222,
"step": 19075
},
{
"epoch": 2.8099599102578248,
"grad_norm": 0.001654456602409482,
"learning_rate": 2.110511688736309e-06,
"loss": 0.0001,
"step": 19100
},
{
"epoch": 2.8136378682555447,
"grad_norm": 0.0025208396837115288,
"learning_rate": 2.0696419813634138e-06,
"loss": 0.0297,
"step": 19125
},
{
"epoch": 2.8173158262532643,
"grad_norm": 0.0016039038309827447,
"learning_rate": 2.0287722739905183e-06,
"loss": 0.0001,
"step": 19150
},
{
"epoch": 2.820993784250984,
"grad_norm": 0.0015692878514528275,
"learning_rate": 1.987902566617623e-06,
"loss": 0.0001,
"step": 19175
},
{
"epoch": 2.8246717422487038,
"grad_norm": 0.0014573705848306417,
"learning_rate": 1.9470328592447278e-06,
"loss": 0.0001,
"step": 19200
},
{
"epoch": 2.8283497002464233,
"grad_norm": 0.005317123141139746,
"learning_rate": 1.9061631518718325e-06,
"loss": 0.0001,
"step": 19225
},
{
"epoch": 2.832027658244143,
"grad_norm": 0.0014695243444293737,
"learning_rate": 1.8652934444989374e-06,
"loss": 0.0312,
"step": 19250
},
{
"epoch": 2.835705616241863,
"grad_norm": 0.04826376587152481,
"learning_rate": 1.8244237371260422e-06,
"loss": 0.0263,
"step": 19275
},
{
"epoch": 2.8393835742395823,
"grad_norm": 0.0012747733853757381,
"learning_rate": 1.7835540297531471e-06,
"loss": 0.0001,
"step": 19300
},
{
"epoch": 2.843061532237302,
"grad_norm": 0.0011536297388374805,
"learning_rate": 1.7426843223802519e-06,
"loss": 0.025,
"step": 19325
},
{
"epoch": 2.8467394902350214,
"grad_norm": 0.00559173384681344,
"learning_rate": 1.7018146150073564e-06,
"loss": 0.0001,
"step": 19350
},
{
"epoch": 2.8504174482327413,
"grad_norm": 0.0011801973450928926,
"learning_rate": 1.6609449076344614e-06,
"loss": 0.0001,
"step": 19375
},
{
"epoch": 2.854095406230461,
"grad_norm": 0.020327366888523102,
"learning_rate": 1.620075200261566e-06,
"loss": 0.0001,
"step": 19400
},
{
"epoch": 2.8577733642281804,
"grad_norm": 0.0012536696158349514,
"learning_rate": 1.579205492888671e-06,
"loss": 0.0001,
"step": 19425
},
{
"epoch": 2.8614513222259,
"grad_norm": 0.0010541353840380907,
"learning_rate": 1.5383357855157758e-06,
"loss": 0.0,
"step": 19450
},
{
"epoch": 2.86512928022362,
"grad_norm": 0.0011492278426885605,
"learning_rate": 1.4974660781428805e-06,
"loss": 0.0001,
"step": 19475
},
{
"epoch": 2.8688072382213394,
"grad_norm": 0.002121875062584877,
"learning_rate": 1.4565963707699853e-06,
"loss": 0.0339,
"step": 19500
},
{
"epoch": 2.872485196219059,
"grad_norm": 0.0013062648940831423,
"learning_rate": 1.4157266633970902e-06,
"loss": 0.0001,
"step": 19525
},
{
"epoch": 2.876163154216779,
"grad_norm": 0.0012365735601633787,
"learning_rate": 1.374856956024195e-06,
"loss": 0.0001,
"step": 19550
},
{
"epoch": 2.8798411122144985,
"grad_norm": 0.001490547088906169,
"learning_rate": 1.3339872486512997e-06,
"loss": 0.0389,
"step": 19575
},
{
"epoch": 2.883519070212218,
"grad_norm": 0.0010857345769181848,
"learning_rate": 1.2931175412784044e-06,
"loss": 0.0002,
"step": 19600
},
{
"epoch": 2.887197028209938,
"grad_norm": 0.0016767021734267473,
"learning_rate": 1.2522478339055092e-06,
"loss": 0.0001,
"step": 19625
},
{
"epoch": 2.8908749862076575,
"grad_norm": 0.004218839108943939,
"learning_rate": 1.2113781265326141e-06,
"loss": 0.0001,
"step": 19650
},
{
"epoch": 2.894552944205377,
"grad_norm": 0.0010596220381557941,
"learning_rate": 1.1705084191597189e-06,
"loss": 0.0001,
"step": 19675
},
{
"epoch": 2.898230902203097,
"grad_norm": 0.005758639425039291,
"learning_rate": 1.1296387117868236e-06,
"loss": 0.0001,
"step": 19700
},
{
"epoch": 2.9019088602008165,
"grad_norm": 0.004077006597071886,
"learning_rate": 1.0887690044139283e-06,
"loss": 0.0001,
"step": 19725
},
{
"epoch": 2.905586818198536,
"grad_norm": 0.023057300597429276,
"learning_rate": 1.0478992970410333e-06,
"loss": 0.0001,
"step": 19750
},
{
"epoch": 2.909264776196256,
"grad_norm": 0.0010171595495194197,
"learning_rate": 1.007029589668138e-06,
"loss": 0.0002,
"step": 19775
},
{
"epoch": 2.9129427341939755,
"grad_norm": 0.0021811590995639563,
"learning_rate": 9.661598822952428e-07,
"loss": 0.0018,
"step": 19800
},
{
"epoch": 2.916620692191695,
"grad_norm": 0.0007530258735641837,
"learning_rate": 9.252901749223475e-07,
"loss": 0.0,
"step": 19825
},
{
"epoch": 2.920298650189415,
"grad_norm": 0.0008248965605162084,
"learning_rate": 8.844204675494524e-07,
"loss": 0.0,
"step": 19850
},
{
"epoch": 2.9239766081871346,
"grad_norm": 0.0008437008364126086,
"learning_rate": 8.435507601765572e-07,
"loss": 0.0001,
"step": 19875
},
{
"epoch": 2.927654566184854,
"grad_norm": 0.0011598097626119852,
"learning_rate": 8.026810528036619e-07,
"loss": 0.029,
"step": 19900
},
{
"epoch": 2.931332524182574,
"grad_norm": 0.000989201944321394,
"learning_rate": 7.618113454307668e-07,
"loss": 0.0001,
"step": 19925
},
{
"epoch": 2.9350104821802936,
"grad_norm": 0.0009332878980785608,
"learning_rate": 7.209416380578715e-07,
"loss": 0.0001,
"step": 19950
},
{
"epoch": 2.938688440178013,
"grad_norm": 0.0010302929440513253,
"learning_rate": 6.800719306849764e-07,
"loss": 0.0316,
"step": 19975
},
{
"epoch": 2.942366398175733,
"grad_norm": 0.0011053696507588029,
"learning_rate": 6.392022233120811e-07,
"loss": 0.0001,
"step": 20000
},
{
"epoch": 2.9460443561734526,
"grad_norm": 0.001087658922187984,
"learning_rate": 5.983325159391858e-07,
"loss": 0.0,
"step": 20025
},
{
"epoch": 2.949722314171172,
"grad_norm": 0.0008900929242372513,
"learning_rate": 5.574628085662906e-07,
"loss": 0.0001,
"step": 20050
},
{
"epoch": 2.9534002721688917,
"grad_norm": 0.001053415471687913,
"learning_rate": 5.165931011933954e-07,
"loss": 0.0,
"step": 20075
},
{
"epoch": 2.9570782301666116,
"grad_norm": 0.0008429349982179701,
"learning_rate": 4.757233938205003e-07,
"loss": 0.0,
"step": 20100
},
{
"epoch": 2.960756188164331,
"grad_norm": 0.0009649925632402301,
"learning_rate": 4.34853686447605e-07,
"loss": 0.0,
"step": 20125
},
{
"epoch": 2.9644341461620507,
"grad_norm": 0.0009367198217660189,
"learning_rate": 3.939839790747098e-07,
"loss": 0.0002,
"step": 20150
},
{
"epoch": 2.9681121041597702,
"grad_norm": 0.0008432368049398065,
"learning_rate": 3.5311427170181465e-07,
"loss": 0.0,
"step": 20175
},
{
"epoch": 2.97179006215749,
"grad_norm": 2.9367611408233643,
"learning_rate": 3.1224456432891944e-07,
"loss": 0.0002,
"step": 20200
},
{
"epoch": 2.9754680201552097,
"grad_norm": 0.0008842748356983066,
"learning_rate": 2.7137485695602424e-07,
"loss": 0.0001,
"step": 20225
},
{
"epoch": 2.9791459781529293,
"grad_norm": 0.3803035616874695,
"learning_rate": 2.30505149583129e-07,
"loss": 0.0303,
"step": 20250
},
{
"epoch": 2.9828239361506492,
"grad_norm": 0.001255788840353489,
"learning_rate": 1.8963544221023377e-07,
"loss": 0.0001,
"step": 20275
},
{
"epoch": 2.9865018941483688,
"grad_norm": 0.0012517735594883561,
"learning_rate": 1.4876573483733856e-07,
"loss": 0.0001,
"step": 20300
},
{
"epoch": 2.9901798521460883,
"grad_norm": 0.0008377633057534695,
"learning_rate": 1.0789602746444335e-07,
"loss": 0.0001,
"step": 20325
},
{
"epoch": 2.9938578101438083,
"grad_norm": 0.0008699085447005928,
"learning_rate": 6.702632009154815e-08,
"loss": 0.0001,
"step": 20350
},
{
"epoch": 2.997535768141528,
"grad_norm": 0.000927777262404561,
"learning_rate": 2.6156612718652934e-08,
"loss": 0.023,
"step": 20375
},
{
"epoch": 2.9998896612600685,
"eval_accuracy": 0.9969841853622655,
"eval_auc": 0.9999289486306174,
"eval_f1": 0.9969837416317222,
"eval_loss": 0.01777876727283001,
"eval_precision": 0.9972038263428992,
"eval_recall": 0.9967637540453075,
"eval_runtime": 2385.463,
"eval_samples_per_second": 5.699,
"eval_steps_per_second": 1.425,
"step": 20391
}
],
"logging_steps": 25,
"max_steps": 20391,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.073019505969152e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}