{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 2000, "global_step": 22065, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 6.5194, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 6.4472, "step": 20 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 6.313, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 6.188, "step": 40 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 6.0095, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 5.8336, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.4e-05, "loss": 5.4948, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 5.2933, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.8e-05, "loss": 4.908, "step": 90 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 4.6039, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.9999989771610435e-05, "loss": 4.37, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.9999959086462666e-05, "loss": 4.2062, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.999990794461946e-05, "loss": 4.0018, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.999983634618544e-05, "loss": 3.914, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.9999744291307077e-05, "loss": 3.7417, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.9999631780172678e-05, "loss": 3.652, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.999949881301241e-05, "loss": 3.6293, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.9999345390098276e-05, "loss": 3.5554, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9999171511744134e-05, "loss": 3.4717, "step": 190 }, { "epoch": 0.01, "learning_rate": 1.9998977178305683e-05, "loss": 3.3474, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.9998762390180467e-05, "loss": 3.3879, "step": 210 }, { "epoch": 0.01, "learning_rate": 1.999852714780787e-05, "loss": 3.3087, "step": 220 }, { "epoch": 0.01, "learning_rate": 1.9998271451669127e-05, "loss": 3.2531, "step": 230 }, { "epoch": 0.01, "learning_rate": 1.9997995302287306e-05, "loss": 3.2259, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.999769870022732e-05, "loss": 3.1982, "step": 250 }, { "epoch": 0.01, "learning_rate": 1.9997381646095923e-05, "loss": 3.2029, "step": 260 }, { "epoch": 0.01, "learning_rate": 1.9997044140541704e-05, "loss": 3.1327, "step": 270 }, { "epoch": 0.01, "learning_rate": 1.9996686184255093e-05, "loss": 3.1122, "step": 280 }, { "epoch": 0.01, "learning_rate": 1.999630777796835e-05, "loss": 3.0607, "step": 290 }, { "epoch": 0.01, "learning_rate": 1.9995908922455574e-05, "loss": 3.0865, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.9995489618532696e-05, "loss": 3.0345, "step": 310 }, { "epoch": 0.01, "learning_rate": 1.9995049867057475e-05, "loss": 3.066, "step": 320 }, { "epoch": 0.01, "learning_rate": 1.99945896689295e-05, "loss": 3.0605, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.9994109025090193e-05, "loss": 2.9867, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.9993607936522795e-05, "loss": 2.9954, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.9993086404252368e-05, "loss": 2.9902, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.99925444293458e-05, "loss": 2.967, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.9991982012911796e-05, "loss": 2.9373, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.9991399156100888e-05, "loss": 2.9242, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.99907958601054e-05, "loss": 2.8879, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.9990172126159493e-05, "loss": 2.8951, "step": 410 }, { "epoch": 0.02, "learning_rate": 1.9989527955539117e-05, "loss": 2.9223, "step": 420 }, { "epoch": 0.02, "learning_rate": 1.998886334956204e-05, "loss": 2.9071, "step": 430 }, { "epoch": 0.02, "learning_rate": 1.9988178309587834e-05, "loss": 2.8767, "step": 440 }, { "epoch": 0.02, "learning_rate": 1.9987472837017875e-05, "loss": 2.8798, "step": 450 }, { "epoch": 0.02, "learning_rate": 1.998674693329532e-05, "loss": 2.8751, "step": 460 }, { "epoch": 0.02, "learning_rate": 1.9986000599905144e-05, "loss": 2.8694, "step": 470 }, { "epoch": 0.02, "learning_rate": 1.9985233838374095e-05, "loss": 2.8434, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.9984446650270735e-05, "loss": 2.8669, "step": 490 }, { "epoch": 0.02, "learning_rate": 1.9983639037205383e-05, "loss": 2.8698, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9982811000830168e-05, "loss": 2.7942, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.9981962542838973e-05, "loss": 2.8396, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.998109366496748e-05, "loss": 2.8324, "step": 530 }, { "epoch": 0.02, "learning_rate": 1.9980204368993125e-05, "loss": 2.8112, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.9979294656735128e-05, "loss": 2.789, "step": 550 }, { "epoch": 0.03, "learning_rate": 1.9978364530054465e-05, "loss": 2.7973, "step": 560 }, { "epoch": 0.03, "learning_rate": 1.997741399085387e-05, "loss": 2.7944, "step": 570 }, { "epoch": 0.03, "learning_rate": 1.997644304107785e-05, "loss": 2.8026, "step": 580 }, { "epoch": 0.03, "learning_rate": 1.9975451682712653e-05, "loss": 2.7717, "step": 590 }, { "epoch": 0.03, "learning_rate": 1.997443991778627e-05, "loss": 2.7765, "step": 600 }, { "epoch": 0.03, "learning_rate": 1.9973407748368456e-05, "loss": 2.7911, "step": 610 }, { "epoch": 0.03, "learning_rate": 1.9972355176570695e-05, "loss": 2.7744, "step": 620 }, { "epoch": 0.03, "learning_rate": 1.9971282204546206e-05, "loss": 2.7749, "step": 630 }, { "epoch": 0.03, "learning_rate": 1.997018883448995e-05, "loss": 2.7949, "step": 640 }, { "epoch": 0.03, "learning_rate": 1.9969075068638603e-05, "loss": 2.73, "step": 650 }, { "epoch": 0.03, "learning_rate": 1.996794090927058e-05, "loss": 2.7475, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.9966786358705997e-05, "loss": 2.7215, "step": 670 }, { "epoch": 0.03, "learning_rate": 1.9965611419306697e-05, "loss": 2.7395, "step": 680 }, { "epoch": 0.03, "learning_rate": 1.9964416093476227e-05, "loss": 2.7544, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.996320038365984e-05, "loss": 2.7504, "step": 700 }, { "epoch": 0.03, "learning_rate": 1.9961964292344485e-05, "loss": 2.7467, "step": 710 }, { "epoch": 0.03, "learning_rate": 1.9960707822058807e-05, "loss": 2.7522, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.995943097537314e-05, "loss": 2.7601, "step": 730 }, { "epoch": 0.03, "learning_rate": 1.99581337548995e-05, "loss": 2.7437, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.9956816163291583e-05, "loss": 2.7516, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.9955478203244758e-05, "loss": 2.7282, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.9954119877496058e-05, "loss": 2.7259, "step": 770 }, { "epoch": 0.04, "learning_rate": 1.995274118882418e-05, "loss": 2.7284, "step": 780 }, { "epoch": 0.04, "learning_rate": 1.9951342140049483e-05, "loss": 2.723, "step": 790 }, { "epoch": 0.04, "learning_rate": 1.9949922734033958e-05, "loss": 2.7367, "step": 800 }, { "epoch": 0.04, "learning_rate": 1.9948482973681267e-05, "loss": 2.7136, "step": 810 }, { "epoch": 0.04, "learning_rate": 1.9947022861936686e-05, "loss": 2.7344, "step": 820 }, { "epoch": 0.04, "learning_rate": 1.9945542401787137e-05, "loss": 2.7196, "step": 830 }, { "epoch": 0.04, "learning_rate": 1.9944041596261162e-05, "loss": 2.7228, "step": 840 }, { "epoch": 0.04, "learning_rate": 1.994252044842893e-05, "loss": 2.6978, "step": 850 }, { "epoch": 0.04, "learning_rate": 1.9940978961402217e-05, "loss": 2.7169, "step": 860 }, { "epoch": 0.04, "learning_rate": 1.993941713833441e-05, "loss": 2.7058, "step": 870 }, { "epoch": 0.04, "learning_rate": 1.9937834982420492e-05, "loss": 2.7083, "step": 880 }, { "epoch": 0.04, "learning_rate": 1.993623249689705e-05, "loss": 2.7348, "step": 890 }, { "epoch": 0.04, "learning_rate": 1.9934609685042248e-05, "loss": 2.7023, "step": 900 }, { "epoch": 0.04, "learning_rate": 1.9932966550175842e-05, "loss": 2.69, "step": 910 }, { "epoch": 0.04, "learning_rate": 1.9931303095659153e-05, "loss": 2.6818, "step": 920 }, { "epoch": 0.04, "learning_rate": 1.9929619324895074e-05, "loss": 2.724, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.9927915241328056e-05, "loss": 2.6865, "step": 940 }, { "epoch": 0.04, "learning_rate": 1.992619084844411e-05, "loss": 2.6784, "step": 950 }, { "epoch": 0.04, "learning_rate": 1.992444614977078e-05, "loss": 2.6982, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.992268114887717e-05, "loss": 2.698, "step": 970 }, { "epoch": 0.04, "learning_rate": 1.992089584937389e-05, "loss": 2.6747, "step": 980 }, { "epoch": 0.04, "learning_rate": 1.9919090254913095e-05, "loss": 2.6793, "step": 990 }, { "epoch": 0.05, "learning_rate": 1.9917264369188453e-05, "loss": 2.685, "step": 1000 }, { "epoch": 0.05, "learning_rate": 1.9915418195935128e-05, "loss": 2.6685, "step": 1010 }, { "epoch": 0.05, "learning_rate": 1.9913551738929803e-05, "loss": 2.672, "step": 1020 }, { "epoch": 0.05, "learning_rate": 1.9911665001990648e-05, "loss": 2.7005, "step": 1030 }, { "epoch": 0.05, "learning_rate": 1.9909757988977315e-05, "loss": 2.678, "step": 1040 }, { "epoch": 0.05, "learning_rate": 1.9907830703790943e-05, "loss": 2.6533, "step": 1050 }, { "epoch": 0.05, "learning_rate": 1.9905883150374134e-05, "loss": 2.6703, "step": 1060 }, { "epoch": 0.05, "learning_rate": 1.9903915332710952e-05, "loss": 2.6778, "step": 1070 }, { "epoch": 0.05, "learning_rate": 1.990192725482693e-05, "loss": 2.6672, "step": 1080 }, { "epoch": 0.05, "learning_rate": 1.9899918920789017e-05, "loss": 2.6442, "step": 1090 }, { "epoch": 0.05, "learning_rate": 1.9897890334705636e-05, "loss": 2.662, "step": 1100 }, { "epoch": 0.05, "learning_rate": 1.9895841500726606e-05, "loss": 2.6608, "step": 1110 }, { "epoch": 0.05, "learning_rate": 1.9893772423043188e-05, "loss": 2.6562, "step": 1120 }, { "epoch": 0.05, "learning_rate": 1.9891683105888048e-05, "loss": 2.6866, "step": 1130 }, { "epoch": 0.05, "learning_rate": 1.9889573553535258e-05, "loss": 2.6592, "step": 1140 }, { "epoch": 0.05, "learning_rate": 1.988744377030028e-05, "loss": 2.6526, "step": 1150 }, { "epoch": 0.05, "learning_rate": 1.988529376053997e-05, "loss": 2.644, "step": 1160 }, { "epoch": 0.05, "learning_rate": 1.988312352865254e-05, "loss": 2.6692, "step": 1170 }, { "epoch": 0.05, "learning_rate": 1.9880933079077605e-05, "loss": 2.663, "step": 1180 }, { "epoch": 0.05, "learning_rate": 1.9878722416296104e-05, "loss": 2.6357, "step": 1190 }, { "epoch": 0.05, "learning_rate": 1.9876491544830352e-05, "loss": 2.6277, "step": 1200 }, { "epoch": 0.05, "learning_rate": 1.9874240469243987e-05, "loss": 2.6691, "step": 1210 }, { "epoch": 0.06, "learning_rate": 1.9871969194141985e-05, "loss": 2.6788, "step": 1220 }, { "epoch": 0.06, "learning_rate": 1.986967772417065e-05, "loss": 2.6312, "step": 1230 }, { "epoch": 0.06, "learning_rate": 1.9867366064017584e-05, "loss": 2.6423, "step": 1240 }, { "epoch": 0.06, "learning_rate": 1.9865034218411698e-05, "loss": 2.6013, "step": 1250 }, { "epoch": 0.06, "learning_rate": 1.9862682192123203e-05, "loss": 2.6355, "step": 1260 }, { "epoch": 0.06, "learning_rate": 1.9860309989963585e-05, "loss": 2.6463, "step": 1270 }, { "epoch": 0.06, "learning_rate": 1.9857917616785606e-05, "loss": 2.6404, "step": 1280 }, { "epoch": 0.06, "learning_rate": 1.9855505077483288e-05, "loss": 2.6601, "step": 1290 }, { "epoch": 0.06, "learning_rate": 1.9853072376991913e-05, "loss": 2.6469, "step": 1300 }, { "epoch": 0.06, "learning_rate": 1.9850619520288e-05, "loss": 2.6393, "step": 1310 }, { "epoch": 0.06, "learning_rate": 1.9848146512389305e-05, "loss": 2.6231, "step": 1320 }, { "epoch": 0.06, "learning_rate": 1.984565335835481e-05, "loss": 2.6386, "step": 1330 }, { "epoch": 0.06, "learning_rate": 1.9843140063284694e-05, "loss": 2.6451, "step": 1340 }, { "epoch": 0.06, "learning_rate": 1.9840606632320362e-05, "loss": 2.6009, "step": 1350 }, { "epoch": 0.06, "learning_rate": 1.9838053070644388e-05, "loss": 2.621, "step": 1360 }, { "epoch": 0.06, "learning_rate": 1.983547938348054e-05, "loss": 2.633, "step": 1370 }, { "epoch": 0.06, "learning_rate": 1.9832885576093755e-05, "loss": 2.6157, "step": 1380 }, { "epoch": 0.06, "learning_rate": 1.983027165379013e-05, "loss": 2.6275, "step": 1390 }, { "epoch": 0.06, "learning_rate": 1.9827637621916897e-05, "loss": 2.6063, "step": 1400 }, { "epoch": 0.06, "learning_rate": 1.9824983485862446e-05, "loss": 2.6351, "step": 1410 }, { "epoch": 0.06, "learning_rate": 1.9822309251056286e-05, "loss": 2.6623, "step": 1420 }, { "epoch": 0.06, "learning_rate": 1.981961492296903e-05, "loss": 2.6489, "step": 1430 }, { "epoch": 0.07, "learning_rate": 1.981690050711242e-05, "loss": 2.6471, "step": 1440 }, { "epoch": 0.07, "learning_rate": 1.9814166009039266e-05, "loss": 2.6274, "step": 1450 }, { "epoch": 0.07, "learning_rate": 1.9811411434343467e-05, "loss": 2.633, "step": 1460 }, { "epoch": 0.07, "learning_rate": 1.9808636788660008e-05, "loss": 2.6094, "step": 1470 }, { "epoch": 0.07, "learning_rate": 1.9805842077664913e-05, "loss": 2.6053, "step": 1480 }, { "epoch": 0.07, "learning_rate": 1.9803027307075258e-05, "loss": 2.6303, "step": 1490 }, { "epoch": 0.07, "learning_rate": 1.9800192482649164e-05, "loss": 2.6214, "step": 1500 }, { "epoch": 0.07, "learning_rate": 1.9797337610185763e-05, "loss": 2.6238, "step": 1510 }, { "epoch": 0.07, "learning_rate": 1.979446269552521e-05, "loss": 2.6193, "step": 1520 }, { "epoch": 0.07, "learning_rate": 1.9791567744548644e-05, "loss": 2.6114, "step": 1530 }, { "epoch": 0.07, "learning_rate": 1.9788652763178215e-05, "loss": 2.6314, "step": 1540 }, { "epoch": 0.07, "learning_rate": 1.9785717757377033e-05, "loss": 2.6047, "step": 1550 }, { "epoch": 0.07, "learning_rate": 1.9782762733149167e-05, "loss": 2.6028, "step": 1560 }, { "epoch": 0.07, "learning_rate": 1.977978769653965e-05, "loss": 2.6117, "step": 1570 }, { "epoch": 0.07, "learning_rate": 1.977679265363445e-05, "loss": 2.603, "step": 1580 }, { "epoch": 0.07, "learning_rate": 1.9773777610560458e-05, "loss": 2.5901, "step": 1590 }, { "epoch": 0.07, "learning_rate": 1.977074257348548e-05, "loss": 2.6346, "step": 1600 }, { "epoch": 0.07, "learning_rate": 1.9767687548618227e-05, "loss": 2.5964, "step": 1610 }, { "epoch": 0.07, "learning_rate": 1.9764612542208297e-05, "loss": 2.6068, "step": 1620 }, { "epoch": 0.07, "learning_rate": 1.976151756054616e-05, "loss": 2.5897, "step": 1630 }, { "epoch": 0.07, "learning_rate": 1.975840260996315e-05, "loss": 2.6059, "step": 1640 }, { "epoch": 0.07, "learning_rate": 1.9755267696831454e-05, "loss": 2.6095, "step": 1650 }, { "epoch": 0.08, "learning_rate": 1.9752112827564098e-05, "loss": 2.6109, "step": 1660 }, { "epoch": 0.08, "learning_rate": 1.9748938008614925e-05, "loss": 2.6161, "step": 1670 }, { "epoch": 0.08, "learning_rate": 1.974574324647859e-05, "loss": 2.6231, "step": 1680 }, { "epoch": 0.08, "learning_rate": 1.9742528547690554e-05, "loss": 2.6016, "step": 1690 }, { "epoch": 0.08, "learning_rate": 1.9739293918827047e-05, "loss": 2.5802, "step": 1700 }, { "epoch": 0.08, "learning_rate": 1.9736039366505087e-05, "loss": 2.5746, "step": 1710 }, { "epoch": 0.08, "learning_rate": 1.973276489738243e-05, "loss": 2.6299, "step": 1720 }, { "epoch": 0.08, "learning_rate": 1.9729470518157595e-05, "loss": 2.5615, "step": 1730 }, { "epoch": 0.08, "learning_rate": 1.972615623556981e-05, "loss": 2.6017, "step": 1740 }, { "epoch": 0.08, "learning_rate": 1.972282205639904e-05, "loss": 2.5797, "step": 1750 }, { "epoch": 0.08, "learning_rate": 1.9719467987465934e-05, "loss": 2.5667, "step": 1760 }, { "epoch": 0.08, "learning_rate": 1.9716094035631843e-05, "loss": 2.5938, "step": 1770 }, { "epoch": 0.08, "learning_rate": 1.9712700207798783e-05, "loss": 2.5997, "step": 1780 }, { "epoch": 0.08, "learning_rate": 1.970928651090943e-05, "loss": 2.6088, "step": 1790 }, { "epoch": 0.08, "learning_rate": 1.9705852951947115e-05, "loss": 2.5769, "step": 1800 }, { "epoch": 0.08, "learning_rate": 1.9702399537935785e-05, "loss": 2.5784, "step": 1810 }, { "epoch": 0.08, "learning_rate": 1.9698926275940017e-05, "loss": 2.6015, "step": 1820 }, { "epoch": 0.08, "learning_rate": 1.9695433173064987e-05, "loss": 2.5902, "step": 1830 }, { "epoch": 0.08, "learning_rate": 1.9691920236456462e-05, "loss": 2.5692, "step": 1840 }, { "epoch": 0.08, "learning_rate": 1.968838747330077e-05, "loss": 2.6207, "step": 1850 }, { "epoch": 0.08, "learning_rate": 1.9684834890824813e-05, "loss": 2.5922, "step": 1860 }, { "epoch": 0.08, "learning_rate": 1.9681262496296028e-05, "loss": 2.5989, "step": 1870 }, { "epoch": 0.09, "learning_rate": 1.9677670297022383e-05, "loss": 2.5859, "step": 1880 }, { "epoch": 0.09, "learning_rate": 1.9674058300352364e-05, "loss": 2.5726, "step": 1890 }, { "epoch": 0.09, "learning_rate": 1.9670426513674954e-05, "loss": 2.6198, "step": 1900 }, { "epoch": 0.09, "learning_rate": 1.9666774944419613e-05, "loss": 2.5914, "step": 1910 }, { "epoch": 0.09, "learning_rate": 1.9663103600056276e-05, "loss": 2.6187, "step": 1920 }, { "epoch": 0.09, "learning_rate": 1.965941248809534e-05, "loss": 2.5771, "step": 1930 }, { "epoch": 0.09, "learning_rate": 1.965570161608762e-05, "loss": 2.5923, "step": 1940 }, { "epoch": 0.09, "learning_rate": 1.965197099162437e-05, "loss": 2.6079, "step": 1950 }, { "epoch": 0.09, "learning_rate": 1.9648220622337245e-05, "loss": 2.6138, "step": 1960 }, { "epoch": 0.09, "learning_rate": 1.9644450515898293e-05, "loss": 2.5875, "step": 1970 }, { "epoch": 0.09, "learning_rate": 1.9640660680019938e-05, "loss": 2.5786, "step": 1980 }, { "epoch": 0.09, "learning_rate": 1.9636851122454965e-05, "loss": 2.5824, "step": 1990 }, { "epoch": 0.09, "learning_rate": 1.9633021850996496e-05, "loss": 2.6021, "step": 2000 }, { "epoch": 0.09, "eval_accuracy": 0.5305171973647798, "eval_loss": 2.3284668922424316, "eval_runtime": 9.9238, "eval_samples_per_second": 137.749, "eval_steps_per_second": 1.108, "step": 2000 }, { "epoch": 0.09, "learning_rate": 1.9629172873477995e-05, "loss": 2.5867, "step": 2010 }, { "epoch": 0.09, "learning_rate": 1.9625304197773226e-05, "loss": 2.5904, "step": 2020 }, { "epoch": 0.09, "learning_rate": 1.962141583179625e-05, "loss": 2.571, "step": 2030 }, { "epoch": 0.09, "learning_rate": 1.9617507783501416e-05, "loss": 2.5682, "step": 2040 }, { "epoch": 0.09, "learning_rate": 1.9613580060883336e-05, "loss": 2.5904, "step": 2050 }, { "epoch": 0.09, "learning_rate": 1.960963267197686e-05, "loss": 2.5614, "step": 2060 }, { "epoch": 0.09, "learning_rate": 1.960566562485707e-05, "loss": 2.5949, "step": 2070 }, { "epoch": 0.09, "learning_rate": 1.9601678927639275e-05, "loss": 2.555, "step": 2080 }, { "epoch": 0.09, "learning_rate": 1.959767258847897e-05, "loss": 2.5861, "step": 2090 }, { "epoch": 0.1, "learning_rate": 1.9593646615571834e-05, "loss": 2.5877, "step": 2100 }, { "epoch": 0.1, "learning_rate": 1.958960101715371e-05, "loss": 2.5815, "step": 2110 }, { "epoch": 0.1, "learning_rate": 1.9585535801500593e-05, "loss": 2.5512, "step": 2120 }, { "epoch": 0.1, "learning_rate": 1.9581450976928603e-05, "loss": 2.5319, "step": 2130 }, { "epoch": 0.1, "learning_rate": 1.957734655179397e-05, "loss": 2.5692, "step": 2140 }, { "epoch": 0.1, "learning_rate": 1.9573222534493034e-05, "loss": 2.5709, "step": 2150 }, { "epoch": 0.1, "learning_rate": 1.9569078933462206e-05, "loss": 2.5987, "step": 2160 }, { "epoch": 0.1, "learning_rate": 1.9564915757177955e-05, "loss": 2.5647, "step": 2170 }, { "epoch": 0.1, "learning_rate": 1.9560733014156798e-05, "loss": 2.5883, "step": 2180 }, { "epoch": 0.1, "learning_rate": 1.9556530712955282e-05, "loss": 2.5858, "step": 2190 }, { "epoch": 0.1, "learning_rate": 1.9552308862169962e-05, "loss": 2.5903, "step": 2200 }, { "epoch": 0.1, "learning_rate": 1.9548067470437386e-05, "loss": 2.5687, "step": 2210 }, { "epoch": 0.1, "learning_rate": 1.9543806546434074e-05, "loss": 2.5815, "step": 2220 }, { "epoch": 0.1, "learning_rate": 1.95395260988765e-05, "loss": 2.5657, "step": 2230 }, { "epoch": 0.1, "learning_rate": 1.9535226136521088e-05, "loss": 2.5691, "step": 2240 }, { "epoch": 0.1, "learning_rate": 1.9530906668164172e-05, "loss": 2.5632, "step": 2250 }, { "epoch": 0.1, "learning_rate": 1.9526567702642e-05, "loss": 2.5308, "step": 2260 }, { "epoch": 0.1, "learning_rate": 1.9522209248830686e-05, "loss": 2.564, "step": 2270 }, { "epoch": 0.1, "learning_rate": 1.951783131564623e-05, "loss": 2.5459, "step": 2280 }, { "epoch": 0.1, "learning_rate": 1.9513433912044473e-05, "loss": 2.587, "step": 2290 }, { "epoch": 0.1, "learning_rate": 1.9509017047021085e-05, "loss": 2.5739, "step": 2300 }, { "epoch": 0.1, "learning_rate": 1.950458072961155e-05, "loss": 2.5777, "step": 2310 }, { "epoch": 0.11, "learning_rate": 1.950012496889114e-05, "loss": 2.5725, "step": 2320 }, { "epoch": 0.11, "learning_rate": 1.9495649773974913e-05, "loss": 2.5745, "step": 2330 }, { "epoch": 0.11, "learning_rate": 1.9491155154017675e-05, "loss": 2.5686, "step": 2340 }, { "epoch": 0.11, "learning_rate": 1.948664111821397e-05, "loss": 2.5904, "step": 2350 }, { "epoch": 0.11, "learning_rate": 1.948210767579806e-05, "loss": 2.5503, "step": 2360 }, { "epoch": 0.11, "learning_rate": 1.9477554836043907e-05, "loss": 2.5743, "step": 2370 }, { "epoch": 0.11, "learning_rate": 1.947298260826516e-05, "loss": 2.5746, "step": 2380 }, { "epoch": 0.11, "learning_rate": 1.946839100181512e-05, "loss": 2.5761, "step": 2390 }, { "epoch": 0.11, "learning_rate": 1.9463780026086735e-05, "loss": 2.5883, "step": 2400 }, { "epoch": 0.11, "learning_rate": 1.945914969051258e-05, "loss": 2.5673, "step": 2410 }, { "epoch": 0.11, "learning_rate": 1.9454500004564825e-05, "loss": 2.5564, "step": 2420 }, { "epoch": 0.11, "learning_rate": 1.9449830977755234e-05, "loss": 2.5568, "step": 2430 }, { "epoch": 0.11, "learning_rate": 1.9445142619635127e-05, "loss": 2.5582, "step": 2440 }, { "epoch": 0.11, "learning_rate": 1.9440434939795382e-05, "loss": 2.5595, "step": 2450 }, { "epoch": 0.11, "learning_rate": 1.943570794786639e-05, "loss": 2.5521, "step": 2460 }, { "epoch": 0.11, "learning_rate": 1.9430961653518056e-05, "loss": 2.5493, "step": 2470 }, { "epoch": 0.11, "learning_rate": 1.9426196066459766e-05, "loss": 2.5564, "step": 2480 }, { "epoch": 0.11, "learning_rate": 1.942141119644038e-05, "loss": 2.5652, "step": 2490 }, { "epoch": 0.11, "learning_rate": 1.9416607053248203e-05, "loss": 2.5486, "step": 2500 }, { "epoch": 0.11, "learning_rate": 1.941178364671096e-05, "loss": 2.5633, "step": 2510 }, { "epoch": 0.11, "learning_rate": 1.940694098669579e-05, "loss": 2.5736, "step": 2520 }, { "epoch": 0.11, "learning_rate": 1.9402079083109217e-05, "loss": 2.5312, "step": 2530 }, { "epoch": 0.12, "learning_rate": 1.939719794589712e-05, "loss": 2.5523, "step": 2540 }, { "epoch": 0.12, "learning_rate": 1.9392297585044748e-05, "loss": 2.5435, "step": 2550 }, { "epoch": 0.12, "learning_rate": 1.9387378010576653e-05, "loss": 2.5447, "step": 2560 }, { "epoch": 0.12, "learning_rate": 1.9382439232556696e-05, "loss": 2.5613, "step": 2570 }, { "epoch": 0.12, "learning_rate": 1.9377481261088036e-05, "loss": 2.5827, "step": 2580 }, { "epoch": 0.12, "learning_rate": 1.937250410631308e-05, "loss": 2.5472, "step": 2590 }, { "epoch": 0.12, "learning_rate": 1.9367507778413483e-05, "loss": 2.5242, "step": 2600 }, { "epoch": 0.12, "learning_rate": 1.9362492287610126e-05, "loss": 2.5497, "step": 2610 }, { "epoch": 0.12, "learning_rate": 1.9357457644163086e-05, "loss": 2.5683, "step": 2620 }, { "epoch": 0.12, "learning_rate": 1.9352403858371618e-05, "loss": 2.5524, "step": 2630 }, { "epoch": 0.12, "learning_rate": 1.9347330940574145e-05, "loss": 2.5521, "step": 2640 }, { "epoch": 0.12, "learning_rate": 1.9342238901148222e-05, "loss": 2.5512, "step": 2650 }, { "epoch": 0.12, "learning_rate": 1.933712775051052e-05, "loss": 2.5571, "step": 2660 }, { "epoch": 0.12, "learning_rate": 1.933199749911681e-05, "loss": 2.5624, "step": 2670 }, { "epoch": 0.12, "learning_rate": 1.932684815746193e-05, "loss": 2.5375, "step": 2680 }, { "epoch": 0.12, "learning_rate": 1.9321679736079777e-05, "loss": 2.5468, "step": 2690 }, { "epoch": 0.12, "learning_rate": 1.9316492245543276e-05, "loss": 2.5677, "step": 2700 }, { "epoch": 0.12, "learning_rate": 1.9311285696464362e-05, "loss": 2.5439, "step": 2710 }, { "epoch": 0.12, "learning_rate": 1.9306060099493956e-05, "loss": 2.5453, "step": 2720 }, { "epoch": 0.12, "learning_rate": 1.930081546532195e-05, "loss": 2.5596, "step": 2730 }, { "epoch": 0.12, "learning_rate": 1.929555180467717e-05, "loss": 2.5487, "step": 2740 }, { "epoch": 0.12, "learning_rate": 1.9290269128327374e-05, "loss": 2.567, "step": 2750 }, { "epoch": 0.13, "learning_rate": 1.9284967447079218e-05, "loss": 2.5375, "step": 2760 }, { "epoch": 0.13, "learning_rate": 1.9279646771778234e-05, "loss": 2.5614, "step": 2770 }, { "epoch": 0.13, "learning_rate": 1.927430711330881e-05, "loss": 2.5561, "step": 2780 }, { "epoch": 0.13, "learning_rate": 1.926894848259416e-05, "loss": 2.557, "step": 2790 }, { "epoch": 0.13, "learning_rate": 1.9263570890596327e-05, "loss": 2.5625, "step": 2800 }, { "epoch": 0.13, "learning_rate": 1.9258174348316126e-05, "loss": 2.514, "step": 2810 }, { "epoch": 0.13, "learning_rate": 1.925275886679315e-05, "loss": 2.5545, "step": 2820 }, { "epoch": 0.13, "learning_rate": 1.924732445710572e-05, "loss": 2.5612, "step": 2830 }, { "epoch": 0.13, "learning_rate": 1.92418711303709e-05, "loss": 2.5577, "step": 2840 }, { "epoch": 0.13, "learning_rate": 1.923639889774443e-05, "loss": 2.516, "step": 2850 }, { "epoch": 0.13, "learning_rate": 1.9230907770420737e-05, "loss": 2.536, "step": 2860 }, { "epoch": 0.13, "learning_rate": 1.9225397759632905e-05, "loss": 2.5547, "step": 2870 }, { "epoch": 0.13, "learning_rate": 1.9219868876652637e-05, "loss": 2.5592, "step": 2880 }, { "epoch": 0.13, "learning_rate": 1.9214321132790247e-05, "loss": 2.5764, "step": 2890 }, { "epoch": 0.13, "learning_rate": 1.9208754539394632e-05, "loss": 2.537, "step": 2900 }, { "epoch": 0.13, "learning_rate": 1.920316910785325e-05, "loss": 2.5297, "step": 2910 }, { "epoch": 0.13, "learning_rate": 1.9197564849592093e-05, "loss": 2.5446, "step": 2920 }, { "epoch": 0.13, "learning_rate": 1.919194177607567e-05, "loss": 2.5511, "step": 2930 }, { "epoch": 0.13, "learning_rate": 1.9186299898806983e-05, "loss": 2.5469, "step": 2940 }, { "epoch": 0.13, "learning_rate": 1.9180639229327483e-05, "loss": 2.5429, "step": 2950 }, { "epoch": 0.13, "learning_rate": 1.9174959779217087e-05, "loss": 2.5394, "step": 2960 }, { "epoch": 0.13, "learning_rate": 1.9169261560094122e-05, "loss": 2.5503, "step": 2970 }, { "epoch": 0.14, "learning_rate": 1.9163544583615303e-05, "loss": 2.5425, "step": 2980 }, { "epoch": 0.14, "learning_rate": 1.9157808861475723e-05, "loss": 2.5261, "step": 2990 }, { "epoch": 0.14, "learning_rate": 1.9152054405408823e-05, "loss": 2.5452, "step": 3000 }, { "epoch": 0.14, "learning_rate": 1.914628122718637e-05, "loss": 2.5342, "step": 3010 }, { "epoch": 0.14, "learning_rate": 1.9140489338618423e-05, "loss": 2.5413, "step": 3020 }, { "epoch": 0.14, "learning_rate": 1.9134678751553325e-05, "loss": 2.5623, "step": 3030 }, { "epoch": 0.14, "learning_rate": 1.912884947787766e-05, "loss": 2.5459, "step": 3040 }, { "epoch": 0.14, "learning_rate": 1.9123001529516246e-05, "loss": 2.524, "step": 3050 }, { "epoch": 0.14, "learning_rate": 1.9117134918432106e-05, "loss": 2.5516, "step": 3060 }, { "epoch": 0.14, "learning_rate": 1.911124965662643e-05, "loss": 2.5663, "step": 3070 }, { "epoch": 0.14, "learning_rate": 1.9105345756138575e-05, "loss": 2.5605, "step": 3080 }, { "epoch": 0.14, "learning_rate": 1.9099423229046015e-05, "loss": 2.5279, "step": 3090 }, { "epoch": 0.14, "learning_rate": 1.909348208746433e-05, "loss": 2.5512, "step": 3100 }, { "epoch": 0.14, "learning_rate": 1.9087522343547195e-05, "loss": 2.523, "step": 3110 }, { "epoch": 0.14, "learning_rate": 1.9081544009486316e-05, "loss": 2.5283, "step": 3120 }, { "epoch": 0.14, "learning_rate": 1.9075547097511435e-05, "loss": 2.5236, "step": 3130 }, { "epoch": 0.14, "learning_rate": 1.9069531619890314e-05, "loss": 2.5417, "step": 3140 }, { "epoch": 0.14, "learning_rate": 1.9063497588928676e-05, "loss": 2.5548, "step": 3150 }, { "epoch": 0.14, "learning_rate": 1.90574450169702e-05, "loss": 2.5413, "step": 3160 }, { "epoch": 0.14, "learning_rate": 1.905137391639651e-05, "loss": 2.5521, "step": 3170 }, { "epoch": 0.14, "learning_rate": 1.904528429962712e-05, "loss": 2.5547, "step": 3180 }, { "epoch": 0.14, "learning_rate": 1.9039176179119418e-05, "loss": 2.528, "step": 3190 }, { "epoch": 0.15, "learning_rate": 1.9033049567368655e-05, "loss": 2.5322, "step": 3200 }, { "epoch": 0.15, "learning_rate": 1.90269044769079e-05, "loss": 2.533, "step": 3210 }, { "epoch": 0.15, "learning_rate": 1.902074092030804e-05, "loss": 2.5438, "step": 3220 }, { "epoch": 0.15, "learning_rate": 1.901455891017772e-05, "loss": 2.5053, "step": 3230 }, { "epoch": 0.15, "learning_rate": 1.900835845916334e-05, "loss": 2.5348, "step": 3240 }, { "epoch": 0.15, "learning_rate": 1.900213957994903e-05, "loss": 2.5244, "step": 3250 }, { "epoch": 0.15, "learning_rate": 1.899590228525661e-05, "loss": 2.5434, "step": 3260 }, { "epoch": 0.15, "learning_rate": 1.898964658784558e-05, "loss": 2.5258, "step": 3270 }, { "epoch": 0.15, "learning_rate": 1.8983372500513075e-05, "loss": 2.5363, "step": 3280 }, { "epoch": 0.15, "learning_rate": 1.8977080036093863e-05, "loss": 2.5525, "step": 3290 }, { "epoch": 0.15, "learning_rate": 1.89707692074603e-05, "loss": 2.5336, "step": 3300 }, { "epoch": 0.15, "learning_rate": 1.8964440027522306e-05, "loss": 2.5109, "step": 3310 }, { "epoch": 0.15, "learning_rate": 1.8958092509227347e-05, "loss": 2.5359, "step": 3320 }, { "epoch": 0.15, "learning_rate": 1.8951726665560395e-05, "loss": 2.536, "step": 3330 }, { "epoch": 0.15, "learning_rate": 1.8945342509543923e-05, "loss": 2.5076, "step": 3340 }, { "epoch": 0.15, "learning_rate": 1.8938940054237856e-05, "loss": 2.5351, "step": 3350 }, { "epoch": 0.15, "learning_rate": 1.8932519312739554e-05, "loss": 2.5101, "step": 3360 }, { "epoch": 0.15, "learning_rate": 1.8926080298183784e-05, "loss": 2.5267, "step": 3370 }, { "epoch": 0.15, "learning_rate": 1.89196230237427e-05, "loss": 2.5512, "step": 3380 }, { "epoch": 0.15, "learning_rate": 1.8913147502625802e-05, "loss": 2.5042, "step": 3390 }, { "epoch": 0.15, "learning_rate": 1.8906653748079927e-05, "loss": 2.5479, "step": 3400 }, { "epoch": 0.15, "learning_rate": 1.89001417733892e-05, "loss": 2.5267, "step": 3410 }, { "epoch": 0.15, "learning_rate": 1.889361159187502e-05, "loss": 2.5475, "step": 3420 }, { "epoch": 0.16, "learning_rate": 1.8887063216896043e-05, "loss": 2.5353, "step": 3430 }, { "epoch": 0.16, "learning_rate": 1.8880496661848132e-05, "loss": 2.551, "step": 3440 }, { "epoch": 0.16, "learning_rate": 1.8873911940164344e-05, "loss": 2.5408, "step": 3450 }, { "epoch": 0.16, "learning_rate": 1.88673090653149e-05, "loss": 2.528, "step": 3460 }, { "epoch": 0.16, "learning_rate": 1.886068805080715e-05, "loss": 2.5241, "step": 3470 }, { "epoch": 0.16, "learning_rate": 1.885404891018556e-05, "loss": 2.5586, "step": 3480 }, { "epoch": 0.16, "learning_rate": 1.8847391657031674e-05, "loss": 2.5153, "step": 3490 }, { "epoch": 0.16, "learning_rate": 1.884071630496409e-05, "loss": 2.5022, "step": 3500 }, { "epoch": 0.16, "learning_rate": 1.8834022867638423e-05, "loss": 2.527, "step": 3510 }, { "epoch": 0.16, "learning_rate": 1.8827311358747295e-05, "loss": 2.512, "step": 3520 }, { "epoch": 0.16, "learning_rate": 1.882058179202029e-05, "loss": 2.5374, "step": 3530 }, { "epoch": 0.16, "learning_rate": 1.8813834181223935e-05, "loss": 2.5514, "step": 3540 }, { "epoch": 0.16, "learning_rate": 1.880706854016166e-05, "loss": 2.5169, "step": 3550 }, { "epoch": 0.16, "learning_rate": 1.88002848826738e-05, "loss": 2.4966, "step": 3560 }, { "epoch": 0.16, "learning_rate": 1.8793483222637524e-05, "loss": 2.5155, "step": 3570 }, { "epoch": 0.16, "learning_rate": 1.8786663573966845e-05, "loss": 2.509, "step": 3580 }, { "epoch": 0.16, "learning_rate": 1.877982595061256e-05, "loss": 2.5304, "step": 3590 }, { "epoch": 0.16, "learning_rate": 1.877297036656225e-05, "loss": 2.5268, "step": 3600 }, { "epoch": 0.16, "learning_rate": 1.8766096835840226e-05, "loss": 2.5605, "step": 3610 }, { "epoch": 0.16, "learning_rate": 1.8759205372507528e-05, "loss": 2.5112, "step": 3620 }, { "epoch": 0.16, "learning_rate": 1.8752295990661857e-05, "loss": 2.5627, "step": 3630 }, { "epoch": 0.16, "learning_rate": 1.8745368704437594e-05, "loss": 2.5212, "step": 3640 }, { "epoch": 0.17, "learning_rate": 1.8738423528005733e-05, "loss": 2.5379, "step": 3650 }, { "epoch": 0.17, "learning_rate": 1.8731460475573864e-05, "loss": 2.5574, "step": 3660 }, { "epoch": 0.17, "learning_rate": 1.8724479561386152e-05, "loss": 2.5381, "step": 3670 }, { "epoch": 0.17, "learning_rate": 1.87174807997233e-05, "loss": 2.5339, "step": 3680 }, { "epoch": 0.17, "learning_rate": 1.871046420490252e-05, "loss": 2.5476, "step": 3690 }, { "epoch": 0.17, "learning_rate": 1.8703429791277502e-05, "loss": 2.5304, "step": 3700 }, { "epoch": 0.17, "learning_rate": 1.8696377573238393e-05, "loss": 2.5252, "step": 3710 }, { "epoch": 0.17, "learning_rate": 1.8689307565211757e-05, "loss": 2.5276, "step": 3720 }, { "epoch": 0.17, "learning_rate": 1.868221978166056e-05, "loss": 2.5171, "step": 3730 }, { "epoch": 0.17, "learning_rate": 1.8675114237084117e-05, "loss": 2.5023, "step": 3740 }, { "epoch": 0.17, "learning_rate": 1.8667990946018086e-05, "loss": 2.5093, "step": 3750 }, { "epoch": 0.17, "learning_rate": 1.866084992303443e-05, "loss": 2.5555, "step": 3760 }, { "epoch": 0.17, "learning_rate": 1.8653691182741374e-05, "loss": 2.5282, "step": 3770 }, { "epoch": 0.17, "learning_rate": 1.8646514739783404e-05, "loss": 2.5215, "step": 3780 }, { "epoch": 0.17, "learning_rate": 1.863932060884121e-05, "loss": 2.5001, "step": 3790 }, { "epoch": 0.17, "learning_rate": 1.8632108804631653e-05, "loss": 2.4973, "step": 3800 }, { "epoch": 0.17, "learning_rate": 1.862487934190778e-05, "loss": 2.5067, "step": 3810 }, { "epoch": 0.17, "learning_rate": 1.861763223545873e-05, "loss": 2.517, "step": 3820 }, { "epoch": 0.17, "learning_rate": 1.8610367500109757e-05, "loss": 2.529, "step": 3830 }, { "epoch": 0.17, "learning_rate": 1.8603085150722165e-05, "loss": 2.5225, "step": 3840 }, { "epoch": 0.17, "learning_rate": 1.8595785202193295e-05, "loss": 2.5309, "step": 3850 }, { "epoch": 0.17, "learning_rate": 1.8588467669456494e-05, "loss": 2.5283, "step": 3860 }, { "epoch": 0.18, "learning_rate": 1.8581132567481075e-05, "loss": 2.5573, "step": 3870 }, { "epoch": 0.18, "learning_rate": 1.8573779911272293e-05, "loss": 2.5339, "step": 3880 }, { "epoch": 0.18, "learning_rate": 1.856640971587132e-05, "loss": 2.5111, "step": 3890 }, { "epoch": 0.18, "learning_rate": 1.855902199635519e-05, "loss": 2.5247, "step": 3900 }, { "epoch": 0.18, "learning_rate": 1.855161676783681e-05, "loss": 2.529, "step": 3910 }, { "epoch": 0.18, "learning_rate": 1.8544194045464888e-05, "loss": 2.5396, "step": 3920 }, { "epoch": 0.18, "learning_rate": 1.8536753844423923e-05, "loss": 2.5122, "step": 3930 }, { "epoch": 0.18, "learning_rate": 1.8529296179934167e-05, "loss": 2.5119, "step": 3940 }, { "epoch": 0.18, "learning_rate": 1.8521821067251604e-05, "loss": 2.5135, "step": 3950 }, { "epoch": 0.18, "learning_rate": 1.8514328521667904e-05, "loss": 2.5477, "step": 3960 }, { "epoch": 0.18, "learning_rate": 1.8506818558510404e-05, "loss": 2.5173, "step": 3970 }, { "epoch": 0.18, "learning_rate": 1.8499291193142072e-05, "loss": 2.5303, "step": 3980 }, { "epoch": 0.18, "learning_rate": 1.8491746440961463e-05, "loss": 2.5448, "step": 3990 }, { "epoch": 0.18, "learning_rate": 1.8484184317402725e-05, "loss": 2.5355, "step": 4000 }, { "epoch": 0.18, "eval_accuracy": 0.5364902773874621, "eval_loss": 2.274355888366699, "eval_runtime": 9.8852, "eval_samples_per_second": 138.288, "eval_steps_per_second": 1.113, "step": 4000 }, { "epoch": 0.18, "learning_rate": 1.8476604837935515e-05, "loss": 2.5192, "step": 4010 }, { "epoch": 0.18, "learning_rate": 1.846900801806501e-05, "loss": 2.5216, "step": 4020 }, { "epoch": 0.18, "learning_rate": 1.846139387333186e-05, "loss": 2.5109, "step": 4030 }, { "epoch": 0.18, "learning_rate": 1.845376241931215e-05, "loss": 2.492, "step": 4040 }, { "epoch": 0.18, "learning_rate": 1.8446113671617376e-05, "loss": 2.4837, "step": 4050 }, { "epoch": 0.18, "learning_rate": 1.843844764589441e-05, "loss": 2.5299, "step": 4060 }, { "epoch": 0.18, "learning_rate": 1.8430764357825482e-05, "loss": 2.5008, "step": 4070 }, { "epoch": 0.18, "learning_rate": 1.842306382312811e-05, "loss": 2.4974, "step": 4080 }, { "epoch": 0.19, "learning_rate": 1.8415346057555122e-05, "loss": 2.5336, "step": 4090 }, { "epoch": 0.19, "learning_rate": 1.8407611076894572e-05, "loss": 2.524, "step": 4100 }, { "epoch": 0.19, "learning_rate": 1.839985889696974e-05, "loss": 2.5193, "step": 4110 }, { "epoch": 0.19, "learning_rate": 1.839208953363909e-05, "loss": 2.4849, "step": 4120 }, { "epoch": 0.19, "learning_rate": 1.8384303002796236e-05, "loss": 2.5555, "step": 4130 }, { "epoch": 0.19, "learning_rate": 1.8376499320369917e-05, "loss": 2.5041, "step": 4140 }, { "epoch": 0.19, "learning_rate": 1.8368678502323947e-05, "loss": 2.4856, "step": 4150 }, { "epoch": 0.19, "learning_rate": 1.8360840564657202e-05, "loss": 2.5083, "step": 4160 }, { "epoch": 0.19, "learning_rate": 1.8352985523403584e-05, "loss": 2.5207, "step": 4170 }, { "epoch": 0.19, "learning_rate": 1.8345113394631973e-05, "loss": 2.5169, "step": 4180 }, { "epoch": 0.19, "learning_rate": 1.8337224194446205e-05, "loss": 2.5269, "step": 4190 }, { "epoch": 0.19, "learning_rate": 1.8329317938985052e-05, "loss": 2.5056, "step": 4200 }, { "epoch": 0.19, "learning_rate": 1.8321394644422155e-05, "loss": 2.5142, "step": 4210 }, { "epoch": 0.19, "learning_rate": 1.8313454326966035e-05, "loss": 2.5418, "step": 4220 }, { "epoch": 0.19, "learning_rate": 1.8305497002860015e-05, "loss": 2.5119, "step": 4230 }, { "epoch": 0.19, "learning_rate": 1.829752268838222e-05, "loss": 2.5243, "step": 4240 }, { "epoch": 0.19, "learning_rate": 1.8289531399845528e-05, "loss": 2.4917, "step": 4250 }, { "epoch": 0.19, "learning_rate": 1.8281523153597543e-05, "loss": 2.4768, "step": 4260 }, { "epoch": 0.19, "learning_rate": 1.8273497966020553e-05, "loss": 2.4815, "step": 4270 }, { "epoch": 0.19, "learning_rate": 1.8265455853531516e-05, "loss": 2.5219, "step": 4280 }, { "epoch": 0.19, "learning_rate": 1.8257396832581997e-05, "loss": 2.4825, "step": 4290 }, { "epoch": 0.19, "learning_rate": 1.8249320919658158e-05, "loss": 2.5093, "step": 4300 }, { "epoch": 0.2, "learning_rate": 1.8241228131280716e-05, "loss": 2.5584, "step": 4310 }, { "epoch": 0.2, "learning_rate": 1.823311848400491e-05, "loss": 2.5098, "step": 4320 }, { "epoch": 0.2, "learning_rate": 1.8224991994420466e-05, "loss": 2.5519, "step": 4330 }, { "epoch": 0.2, "learning_rate": 1.8216848679151565e-05, "loss": 2.4879, "step": 4340 }, { "epoch": 0.2, "learning_rate": 1.820868855485681e-05, "loss": 2.5185, "step": 4350 }, { "epoch": 0.2, "learning_rate": 1.8200511638229178e-05, "loss": 2.5258, "step": 4360 }, { "epoch": 0.2, "learning_rate": 1.8192317945996016e-05, "loss": 2.5009, "step": 4370 }, { "epoch": 0.2, "learning_rate": 1.8184107494918976e-05, "loss": 2.517, "step": 4380 }, { "epoch": 0.2, "learning_rate": 1.8175880301793995e-05, "loss": 2.4987, "step": 4390 }, { "epoch": 0.2, "learning_rate": 1.816763638345126e-05, "loss": 2.498, "step": 4400 }, { "epoch": 0.2, "learning_rate": 1.815937575675518e-05, "loss": 2.5115, "step": 4410 }, { "epoch": 0.2, "learning_rate": 1.8151098438604323e-05, "loss": 2.5069, "step": 4420 }, { "epoch": 0.2, "learning_rate": 1.8142804445931425e-05, "loss": 2.5153, "step": 4430 }, { "epoch": 0.2, "learning_rate": 1.8134493795703325e-05, "loss": 2.5119, "step": 4440 }, { "epoch": 0.2, "learning_rate": 1.812616650492093e-05, "loss": 2.5026, "step": 4450 }, { "epoch": 0.2, "learning_rate": 1.81178225906192e-05, "loss": 2.5119, "step": 4460 }, { "epoch": 0.2, "learning_rate": 1.81094620698671e-05, "loss": 2.5089, "step": 4470 }, { "epoch": 0.2, "learning_rate": 1.810108495976755e-05, "loss": 2.527, "step": 4480 }, { "epoch": 0.2, "learning_rate": 1.8092691277457432e-05, "loss": 2.4969, "step": 4490 }, { "epoch": 0.2, "learning_rate": 1.8084281040107505e-05, "loss": 2.5204, "step": 4500 }, { "epoch": 0.2, "learning_rate": 1.8075854264922417e-05, "loss": 2.5072, "step": 4510 }, { "epoch": 0.2, "learning_rate": 1.8067410969140627e-05, "loss": 2.524, "step": 4520 }, { "epoch": 0.21, "learning_rate": 1.8058951170034407e-05, "loss": 2.5161, "step": 4530 }, { "epoch": 0.21, "learning_rate": 1.8050474884909772e-05, "loss": 2.5365, "step": 4540 }, { "epoch": 0.21, "learning_rate": 1.804198213110648e-05, "loss": 2.4983, "step": 4550 }, { "epoch": 0.21, "learning_rate": 1.8033472925997963e-05, "loss": 2.498, "step": 4560 }, { "epoch": 0.21, "learning_rate": 1.8024947286991316e-05, "loss": 2.4998, "step": 4570 }, { "epoch": 0.21, "learning_rate": 1.8016405231527253e-05, "loss": 2.4966, "step": 4580 }, { "epoch": 0.21, "learning_rate": 1.8007846777080064e-05, "loss": 2.5147, "step": 4590 }, { "epoch": 0.21, "learning_rate": 1.7999271941157596e-05, "loss": 2.5035, "step": 4600 }, { "epoch": 0.21, "learning_rate": 1.7990680741301195e-05, "loss": 2.499, "step": 4610 }, { "epoch": 0.21, "learning_rate": 1.7982073195085692e-05, "loss": 2.4885, "step": 4620 }, { "epoch": 0.21, "learning_rate": 1.7973449320119355e-05, "loss": 2.5061, "step": 4630 }, { "epoch": 0.21, "learning_rate": 1.796480913404385e-05, "loss": 2.4925, "step": 4640 }, { "epoch": 0.21, "learning_rate": 1.795615265453422e-05, "loss": 2.4839, "step": 4650 }, { "epoch": 0.21, "learning_rate": 1.7947479899298837e-05, "loss": 2.5002, "step": 4660 }, { "epoch": 0.21, "learning_rate": 1.7938790886079355e-05, "loss": 2.5347, "step": 4670 }, { "epoch": 0.21, "learning_rate": 1.7930085632650706e-05, "loss": 2.5222, "step": 4680 }, { "epoch": 0.21, "learning_rate": 1.7921364156821024e-05, "loss": 2.5128, "step": 4690 }, { "epoch": 0.21, "learning_rate": 1.7912626476431648e-05, "loss": 2.5115, "step": 4700 }, { "epoch": 0.21, "learning_rate": 1.7903872609357057e-05, "loss": 2.5147, "step": 4710 }, { "epoch": 0.21, "learning_rate": 1.7895102573504843e-05, "loss": 2.5246, "step": 4720 }, { "epoch": 0.21, "learning_rate": 1.788631638681567e-05, "loss": 2.5282, "step": 4730 }, { "epoch": 0.21, "learning_rate": 1.787751406726325e-05, "loss": 2.4971, "step": 4740 }, { "epoch": 0.22, "learning_rate": 1.7868695632854294e-05, "loss": 2.4834, "step": 4750 }, { "epoch": 0.22, "learning_rate": 1.785986110162848e-05, "loss": 2.4973, "step": 4760 }, { "epoch": 0.22, "learning_rate": 1.785101049165841e-05, "loss": 2.5288, "step": 4770 }, { "epoch": 0.22, "learning_rate": 1.784214382104958e-05, "loss": 2.4988, "step": 4780 }, { "epoch": 0.22, "learning_rate": 1.7833261107940347e-05, "loss": 2.4955, "step": 4790 }, { "epoch": 0.22, "learning_rate": 1.782436237050188e-05, "loss": 2.5097, "step": 4800 }, { "epoch": 0.22, "learning_rate": 1.7815447626938125e-05, "loss": 2.5047, "step": 4810 }, { "epoch": 0.22, "learning_rate": 1.780651689548578e-05, "loss": 2.512, "step": 4820 }, { "epoch": 0.22, "learning_rate": 1.7797570194414246e-05, "loss": 2.4946, "step": 4830 }, { "epoch": 0.22, "learning_rate": 1.7788607542025592e-05, "loss": 2.476, "step": 4840 }, { "epoch": 0.22, "learning_rate": 1.7779628956654513e-05, "loss": 2.5146, "step": 4850 }, { "epoch": 0.22, "learning_rate": 1.777063445666831e-05, "loss": 2.5086, "step": 4860 }, { "epoch": 0.22, "learning_rate": 1.7761624060466824e-05, "loss": 2.4862, "step": 4870 }, { "epoch": 0.22, "learning_rate": 1.775259778648243e-05, "loss": 2.5173, "step": 4880 }, { "epoch": 0.22, "learning_rate": 1.7743555653179978e-05, "loss": 2.5241, "step": 4890 }, { "epoch": 0.22, "learning_rate": 1.773449767905676e-05, "loss": 2.5003, "step": 4900 }, { "epoch": 0.22, "learning_rate": 1.7725423882642467e-05, "loss": 2.5295, "step": 4910 }, { "epoch": 0.22, "learning_rate": 1.771633428249917e-05, "loss": 2.4967, "step": 4920 }, { "epoch": 0.22, "learning_rate": 1.770722889722126e-05, "loss": 2.5178, "step": 4930 }, { "epoch": 0.22, "learning_rate": 1.769810774543543e-05, "loss": 2.4951, "step": 4940 }, { "epoch": 0.22, "learning_rate": 1.768897084580061e-05, "loss": 2.4986, "step": 4950 }, { "epoch": 0.22, "learning_rate": 1.767981821700796e-05, "loss": 2.5109, "step": 4960 }, { "epoch": 0.23, "learning_rate": 1.7670649877780803e-05, "loss": 2.5234, "step": 4970 }, { "epoch": 0.23, "learning_rate": 1.7661465846874618e-05, "loss": 2.5029, "step": 4980 }, { "epoch": 0.23, "learning_rate": 1.7652266143076964e-05, "loss": 2.5181, "step": 4990 }, { "epoch": 0.23, "learning_rate": 1.7643050785207478e-05, "loss": 2.5092, "step": 5000 }, { "epoch": 0.23, "learning_rate": 1.7633819792117815e-05, "loss": 2.5061, "step": 5010 }, { "epoch": 0.23, "learning_rate": 1.762457318269161e-05, "loss": 2.5022, "step": 5020 }, { "epoch": 0.23, "learning_rate": 1.7615310975844447e-05, "loss": 2.5213, "step": 5030 }, { "epoch": 0.23, "learning_rate": 1.7606033190523823e-05, "loss": 2.507, "step": 5040 }, { "epoch": 0.23, "learning_rate": 1.7596739845709096e-05, "loss": 2.519, "step": 5050 }, { "epoch": 0.23, "learning_rate": 1.7587430960411454e-05, "loss": 2.5171, "step": 5060 }, { "epoch": 0.23, "learning_rate": 1.757810655367388e-05, "loss": 2.5059, "step": 5070 }, { "epoch": 0.23, "learning_rate": 1.7568766644571108e-05, "loss": 2.5114, "step": 5080 }, { "epoch": 0.23, "learning_rate": 1.7559411252209583e-05, "loss": 2.5041, "step": 5090 }, { "epoch": 0.23, "learning_rate": 1.7550040395727422e-05, "loss": 2.4847, "step": 5100 }, { "epoch": 0.23, "learning_rate": 1.754065409429438e-05, "loss": 2.5067, "step": 5110 }, { "epoch": 0.23, "learning_rate": 1.7531252367111812e-05, "loss": 2.4977, "step": 5120 }, { "epoch": 0.23, "learning_rate": 1.752183523341262e-05, "loss": 2.4925, "step": 5130 }, { "epoch": 0.23, "learning_rate": 1.7512402712461222e-05, "loss": 2.5229, "step": 5140 }, { "epoch": 0.23, "learning_rate": 1.7502954823553522e-05, "loss": 2.4954, "step": 5150 }, { "epoch": 0.23, "learning_rate": 1.749349158601686e-05, "loss": 2.4876, "step": 5160 }, { "epoch": 0.23, "learning_rate": 1.7484013019209968e-05, "loss": 2.4912, "step": 5170 }, { "epoch": 0.23, "learning_rate": 1.7474519142522938e-05, "loss": 2.5208, "step": 5180 }, { "epoch": 0.24, "learning_rate": 1.7465009975377193e-05, "loss": 2.4989, "step": 5190 }, { "epoch": 0.24, "learning_rate": 1.745548553722542e-05, "loss": 2.5165, "step": 5200 }, { "epoch": 0.24, "learning_rate": 1.7445945847551553e-05, "loss": 2.4886, "step": 5210 }, { "epoch": 0.24, "learning_rate": 1.7436390925870722e-05, "loss": 2.4798, "step": 5220 }, { "epoch": 0.24, "learning_rate": 1.7426820791729223e-05, "loss": 2.5086, "step": 5230 }, { "epoch": 0.24, "learning_rate": 1.741723546470447e-05, "loss": 2.4916, "step": 5240 }, { "epoch": 0.24, "learning_rate": 1.740763496440495e-05, "loss": 2.4957, "step": 5250 }, { "epoch": 0.24, "learning_rate": 1.739801931047019e-05, "loss": 2.5054, "step": 5260 }, { "epoch": 0.24, "learning_rate": 1.7388388522570734e-05, "loss": 2.4975, "step": 5270 }, { "epoch": 0.24, "learning_rate": 1.737874262040806e-05, "loss": 2.4723, "step": 5280 }, { "epoch": 0.24, "learning_rate": 1.7369081623714582e-05, "loss": 2.4917, "step": 5290 }, { "epoch": 0.24, "learning_rate": 1.735940555225359e-05, "loss": 2.5188, "step": 5300 }, { "epoch": 0.24, "learning_rate": 1.7349714425819203e-05, "loss": 2.5201, "step": 5310 }, { "epoch": 0.24, "learning_rate": 1.7340008264236355e-05, "loss": 2.4911, "step": 5320 }, { "epoch": 0.24, "learning_rate": 1.7330287087360712e-05, "loss": 2.5128, "step": 5330 }, { "epoch": 0.24, "learning_rate": 1.7320550915078683e-05, "loss": 2.4931, "step": 5340 }, { "epoch": 0.24, "learning_rate": 1.7310799767307337e-05, "loss": 2.4939, "step": 5350 }, { "epoch": 0.24, "learning_rate": 1.730103366399438e-05, "loss": 2.5136, "step": 5360 }, { "epoch": 0.24, "learning_rate": 1.729125262511811e-05, "loss": 2.5051, "step": 5370 }, { "epoch": 0.24, "learning_rate": 1.728145667068739e-05, "loss": 2.5114, "step": 5380 }, { "epoch": 0.24, "learning_rate": 1.7271645820741586e-05, "loss": 2.466, "step": 5390 }, { "epoch": 0.24, "learning_rate": 1.7261820095350535e-05, "loss": 2.5067, "step": 5400 }, { "epoch": 0.25, "learning_rate": 1.7251979514614504e-05, "loss": 2.514, "step": 5410 }, { "epoch": 0.25, "learning_rate": 1.7242124098664157e-05, "loss": 2.4983, "step": 5420 }, { "epoch": 0.25, "learning_rate": 1.72322538676605e-05, "loss": 2.4783, "step": 5430 }, { "epoch": 0.25, "learning_rate": 1.722236884179484e-05, "loss": 2.4695, "step": 5440 }, { "epoch": 0.25, "learning_rate": 1.7212469041288763e-05, "loss": 2.4844, "step": 5450 }, { "epoch": 0.25, "learning_rate": 1.7202554486394068e-05, "loss": 2.475, "step": 5460 }, { "epoch": 0.25, "learning_rate": 1.7192625197392745e-05, "loss": 2.5036, "step": 5470 }, { "epoch": 0.25, "learning_rate": 1.718268119459692e-05, "loss": 2.534, "step": 5480 }, { "epoch": 0.25, "learning_rate": 1.717272249834882e-05, "loss": 2.475, "step": 5490 }, { "epoch": 0.25, "learning_rate": 1.7162749129020728e-05, "loss": 2.4817, "step": 5500 }, { "epoch": 0.25, "learning_rate": 1.7152761107014945e-05, "loss": 2.5154, "step": 5510 }, { "epoch": 0.25, "learning_rate": 1.714275845276375e-05, "loss": 2.4787, "step": 5520 }, { "epoch": 0.25, "learning_rate": 1.7132741186729347e-05, "loss": 2.5196, "step": 5530 }, { "epoch": 0.25, "learning_rate": 1.7122709329403845e-05, "loss": 2.5128, "step": 5540 }, { "epoch": 0.25, "learning_rate": 1.711266290130918e-05, "loss": 2.5025, "step": 5550 }, { "epoch": 0.25, "learning_rate": 1.7102601922997122e-05, "loss": 2.5246, "step": 5560 }, { "epoch": 0.25, "learning_rate": 1.709252641504918e-05, "loss": 2.4984, "step": 5570 }, { "epoch": 0.25, "learning_rate": 1.7082436398076608e-05, "loss": 2.4903, "step": 5580 }, { "epoch": 0.25, "learning_rate": 1.7072331892720323e-05, "loss": 2.4873, "step": 5590 }, { "epoch": 0.25, "learning_rate": 1.7062212919650895e-05, "loss": 2.4894, "step": 5600 }, { "epoch": 0.25, "learning_rate": 1.7052079499568477e-05, "loss": 2.4858, "step": 5610 }, { "epoch": 0.25, "learning_rate": 1.7041931653202788e-05, "loss": 2.5158, "step": 5620 }, { "epoch": 0.26, "learning_rate": 1.703176940131305e-05, "loss": 2.4952, "step": 5630 }, { "epoch": 0.26, "learning_rate": 1.702159276468796e-05, "loss": 2.504, "step": 5640 }, { "epoch": 0.26, "learning_rate": 1.7011401764145637e-05, "loss": 2.4712, "step": 5650 }, { "epoch": 0.26, "learning_rate": 1.7001196420533584e-05, "loss": 2.503, "step": 5660 }, { "epoch": 0.26, "learning_rate": 1.699097675472865e-05, "loss": 2.4978, "step": 5670 }, { "epoch": 0.26, "learning_rate": 1.6980742787636977e-05, "loss": 2.5121, "step": 5680 }, { "epoch": 0.26, "learning_rate": 1.697049454019397e-05, "loss": 2.5233, "step": 5690 }, { "epoch": 0.26, "learning_rate": 1.696023203336424e-05, "loss": 2.5124, "step": 5700 }, { "epoch": 0.26, "learning_rate": 1.6949955288141566e-05, "loss": 2.5144, "step": 5710 }, { "epoch": 0.26, "learning_rate": 1.6939664325548868e-05, "loss": 2.4903, "step": 5720 }, { "epoch": 0.26, "learning_rate": 1.6929359166638132e-05, "loss": 2.4857, "step": 5730 }, { "epoch": 0.26, "learning_rate": 1.69190398324904e-05, "loss": 2.5045, "step": 5740 }, { "epoch": 0.26, "learning_rate": 1.69087063442157e-05, "loss": 2.4727, "step": 5750 }, { "epoch": 0.26, "learning_rate": 1.6898358722953028e-05, "loss": 2.4956, "step": 5760 }, { "epoch": 0.26, "learning_rate": 1.688799698987028e-05, "loss": 2.4701, "step": 5770 }, { "epoch": 0.26, "learning_rate": 1.6877621166164222e-05, "loss": 2.5074, "step": 5780 }, { "epoch": 0.26, "learning_rate": 1.6867231273060452e-05, "loss": 2.4902, "step": 5790 }, { "epoch": 0.26, "learning_rate": 1.6856827331813344e-05, "loss": 2.5153, "step": 5800 }, { "epoch": 0.26, "learning_rate": 1.684640936370601e-05, "loss": 2.5026, "step": 5810 }, { "epoch": 0.26, "learning_rate": 1.6835977390050256e-05, "loss": 2.4931, "step": 5820 }, { "epoch": 0.26, "learning_rate": 1.6825531432186545e-05, "loss": 2.5182, "step": 5830 }, { "epoch": 0.26, "learning_rate": 1.6815071511483933e-05, "loss": 2.4823, "step": 5840 }, { "epoch": 0.27, "learning_rate": 1.680459764934006e-05, "loss": 2.5198, "step": 5850 }, { "epoch": 0.27, "learning_rate": 1.6794109867181066e-05, "loss": 2.4722, "step": 5860 }, { "epoch": 0.27, "learning_rate": 1.6783608186461576e-05, "loss": 2.4904, "step": 5870 }, { "epoch": 0.27, "learning_rate": 1.6773092628664654e-05, "loss": 2.4845, "step": 5880 }, { "epoch": 0.27, "learning_rate": 1.6762563215301735e-05, "loss": 2.5111, "step": 5890 }, { "epoch": 0.27, "learning_rate": 1.6752019967912613e-05, "loss": 2.4876, "step": 5900 }, { "epoch": 0.27, "learning_rate": 1.6741462908065373e-05, "loss": 2.5221, "step": 5910 }, { "epoch": 0.27, "learning_rate": 1.6730892057356363e-05, "loss": 2.518, "step": 5920 }, { "epoch": 0.27, "learning_rate": 1.6720307437410134e-05, "loss": 2.4853, "step": 5930 }, { "epoch": 0.27, "learning_rate": 1.6709709069879413e-05, "loss": 2.5022, "step": 5940 }, { "epoch": 0.27, "learning_rate": 1.6699096976445046e-05, "loss": 2.5092, "step": 5950 }, { "epoch": 0.27, "learning_rate": 1.668847117881596e-05, "loss": 2.4856, "step": 5960 }, { "epoch": 0.27, "learning_rate": 1.6677831698729106e-05, "loss": 2.5005, "step": 5970 }, { "epoch": 0.27, "learning_rate": 1.666717855794944e-05, "loss": 2.5036, "step": 5980 }, { "epoch": 0.27, "learning_rate": 1.6656511778269856e-05, "loss": 2.4962, "step": 5990 }, { "epoch": 0.27, "learning_rate": 1.664583138151115e-05, "loss": 2.5063, "step": 6000 }, { "epoch": 0.27, "eval_accuracy": 0.539072438522612, "eval_loss": 2.253763437271118, "eval_runtime": 9.8813, "eval_samples_per_second": 138.342, "eval_steps_per_second": 1.113, "step": 6000 }, { "epoch": 0.27, "learning_rate": 1.663513738952198e-05, "loss": 2.4694, "step": 6010 }, { "epoch": 0.27, "learning_rate": 1.6624429824178794e-05, "loss": 2.4673, "step": 6020 }, { "epoch": 0.27, "learning_rate": 1.6613708707385832e-05, "loss": 2.4789, "step": 6030 }, { "epoch": 0.27, "learning_rate": 1.660297406107505e-05, "loss": 2.495, "step": 6040 }, { "epoch": 0.27, "learning_rate": 1.6592225907206065e-05, "loss": 2.4933, "step": 6050 }, { "epoch": 0.27, "learning_rate": 1.6581464267766144e-05, "loss": 2.4915, "step": 6060 }, { "epoch": 0.28, "learning_rate": 1.6570689164770138e-05, "loss": 2.5032, "step": 6070 }, { "epoch": 0.28, "learning_rate": 1.6559900620260435e-05, "loss": 2.4912, "step": 6080 }, { "epoch": 0.28, "learning_rate": 1.654909865630692e-05, "loss": 2.5178, "step": 6090 }, { "epoch": 0.28, "learning_rate": 1.6538283295006933e-05, "loss": 2.4834, "step": 6100 }, { "epoch": 0.28, "learning_rate": 1.6527454558485222e-05, "loss": 2.4941, "step": 6110 }, { "epoch": 0.28, "learning_rate": 1.6516612468893892e-05, "loss": 2.5131, "step": 6120 }, { "epoch": 0.28, "learning_rate": 1.650575704841237e-05, "loss": 2.485, "step": 6130 }, { "epoch": 0.28, "learning_rate": 1.6494888319247345e-05, "loss": 2.5086, "step": 6140 }, { "epoch": 0.28, "learning_rate": 1.648400630363274e-05, "loss": 2.4829, "step": 6150 }, { "epoch": 0.28, "learning_rate": 1.6473111023829646e-05, "loss": 2.5099, "step": 6160 }, { "epoch": 0.28, "learning_rate": 1.6462202502126307e-05, "loss": 2.4983, "step": 6170 }, { "epoch": 0.28, "learning_rate": 1.6451280760838036e-05, "loss": 2.4665, "step": 6180 }, { "epoch": 0.28, "learning_rate": 1.6440345822307204e-05, "loss": 2.5018, "step": 6190 }, { "epoch": 0.28, "learning_rate": 1.6429397708903172e-05, "loss": 2.5068, "step": 6200 }, { "epoch": 0.28, "learning_rate": 1.6418436443022254e-05, "loss": 2.5092, "step": 6210 }, { "epoch": 0.28, "learning_rate": 1.6407462047087667e-05, "loss": 2.5059, "step": 6220 }, { "epoch": 0.28, "learning_rate": 1.639647454354949e-05, "loss": 2.4755, "step": 6230 }, { "epoch": 0.28, "learning_rate": 1.638547395488462e-05, "loss": 2.5195, "step": 6240 }, { "epoch": 0.28, "learning_rate": 1.6374460303596718e-05, "loss": 2.5125, "step": 6250 }, { "epoch": 0.28, "learning_rate": 1.6363433612216163e-05, "loss": 2.4828, "step": 6260 }, { "epoch": 0.28, "learning_rate": 1.635239390330002e-05, "loss": 2.4812, "step": 6270 }, { "epoch": 0.28, "learning_rate": 1.6341341199431974e-05, "loss": 2.5017, "step": 6280 }, { "epoch": 0.29, "learning_rate": 1.6330275523222294e-05, "loss": 2.5011, "step": 6290 }, { "epoch": 0.29, "learning_rate": 1.6319196897307795e-05, "loss": 2.4904, "step": 6300 }, { "epoch": 0.29, "learning_rate": 1.6308105344351776e-05, "loss": 2.4735, "step": 6310 }, { "epoch": 0.29, "learning_rate": 1.6297000887043978e-05, "loss": 2.5133, "step": 6320 }, { "epoch": 0.29, "learning_rate": 1.6285883548100552e-05, "loss": 2.4806, "step": 6330 }, { "epoch": 0.29, "learning_rate": 1.6274753350263987e-05, "loss": 2.4784, "step": 6340 }, { "epoch": 0.29, "learning_rate": 1.6263610316303084e-05, "loss": 2.4876, "step": 6350 }, { "epoch": 0.29, "learning_rate": 1.62524544690129e-05, "loss": 2.4851, "step": 6360 }, { "epoch": 0.29, "learning_rate": 1.6241285831214707e-05, "loss": 2.4619, "step": 6370 }, { "epoch": 0.29, "learning_rate": 1.623010442575594e-05, "loss": 2.4809, "step": 6380 }, { "epoch": 0.29, "learning_rate": 1.621891027551015e-05, "loss": 2.5083, "step": 6390 }, { "epoch": 0.29, "learning_rate": 1.6207703403376973e-05, "loss": 2.4874, "step": 6400 }, { "epoch": 0.29, "learning_rate": 1.619648383228205e-05, "loss": 2.489, "step": 6410 }, { "epoch": 0.29, "learning_rate": 1.6185251585177013e-05, "loss": 2.4943, "step": 6420 }, { "epoch": 0.29, "learning_rate": 1.6174006685039425e-05, "loss": 2.505, "step": 6430 }, { "epoch": 0.29, "learning_rate": 1.6162749154872725e-05, "loss": 2.4801, "step": 6440 }, { "epoch": 0.29, "learning_rate": 1.6151479017706192e-05, "loss": 2.5019, "step": 6450 }, { "epoch": 0.29, "learning_rate": 1.6140196296594903e-05, "loss": 2.508, "step": 6460 }, { "epoch": 0.29, "learning_rate": 1.612890101461967e-05, "loss": 2.4834, "step": 6470 }, { "epoch": 0.29, "learning_rate": 1.6117593194886998e-05, "loss": 2.4867, "step": 6480 }, { "epoch": 0.29, "learning_rate": 1.6106272860529047e-05, "loss": 2.5108, "step": 6490 }, { "epoch": 0.29, "learning_rate": 1.6094940034703577e-05, "loss": 2.4923, "step": 6500 }, { "epoch": 0.3, "learning_rate": 1.6083594740593896e-05, "loss": 2.5038, "step": 6510 }, { "epoch": 0.3, "learning_rate": 1.6072237001408822e-05, "loss": 2.5141, "step": 6520 }, { "epoch": 0.3, "learning_rate": 1.6060866840382632e-05, "loss": 2.5012, "step": 6530 }, { "epoch": 0.3, "learning_rate": 1.6049484280775012e-05, "loss": 2.4999, "step": 6540 }, { "epoch": 0.3, "learning_rate": 1.6038089345871015e-05, "loss": 2.4975, "step": 6550 }, { "epoch": 0.3, "learning_rate": 1.6026682058981006e-05, "loss": 2.5073, "step": 6560 }, { "epoch": 0.3, "learning_rate": 1.6015262443440618e-05, "loss": 2.4875, "step": 6570 }, { "epoch": 0.3, "learning_rate": 1.6003830522610712e-05, "loss": 2.5146, "step": 6580 }, { "epoch": 0.3, "learning_rate": 1.599238631987731e-05, "loss": 2.4881, "step": 6590 }, { "epoch": 0.3, "learning_rate": 1.598092985865157e-05, "loss": 2.481, "step": 6600 }, { "epoch": 0.3, "learning_rate": 1.5969461162369718e-05, "loss": 2.4918, "step": 6610 }, { "epoch": 0.3, "learning_rate": 1.595798025449301e-05, "loss": 2.4875, "step": 6620 }, { "epoch": 0.3, "learning_rate": 1.5946487158507693e-05, "loss": 2.4693, "step": 6630 }, { "epoch": 0.3, "learning_rate": 1.5934981897924937e-05, "loss": 2.5047, "step": 6640 }, { "epoch": 0.3, "learning_rate": 1.5923464496280797e-05, "loss": 2.5147, "step": 6650 }, { "epoch": 0.3, "learning_rate": 1.5911934977136167e-05, "loss": 2.4647, "step": 6660 }, { "epoch": 0.3, "learning_rate": 1.5900393364076735e-05, "loss": 2.4744, "step": 6670 }, { "epoch": 0.3, "learning_rate": 1.5888839680712918e-05, "loss": 2.4781, "step": 6680 }, { "epoch": 0.3, "learning_rate": 1.587727395067983e-05, "loss": 2.5232, "step": 6690 }, { "epoch": 0.3, "learning_rate": 1.5865696197637234e-05, "loss": 2.4996, "step": 6700 }, { "epoch": 0.3, "learning_rate": 1.5854106445269483e-05, "loss": 2.4779, "step": 6710 }, { "epoch": 0.3, "learning_rate": 1.584250471728548e-05, "loss": 2.5024, "step": 6720 }, { "epoch": 0.31, "learning_rate": 1.5830891037418615e-05, "loss": 2.4704, "step": 6730 }, { "epoch": 0.31, "learning_rate": 1.5819265429426748e-05, "loss": 2.5011, "step": 6740 }, { "epoch": 0.31, "learning_rate": 1.5807627917092118e-05, "loss": 2.4715, "step": 6750 }, { "epoch": 0.31, "learning_rate": 1.579597852422133e-05, "loss": 2.5069, "step": 6760 }, { "epoch": 0.31, "learning_rate": 1.5784317274645294e-05, "loss": 2.4801, "step": 6770 }, { "epoch": 0.31, "learning_rate": 1.5772644192219164e-05, "loss": 2.4633, "step": 6780 }, { "epoch": 0.31, "learning_rate": 1.5760959300822313e-05, "loss": 2.4838, "step": 6790 }, { "epoch": 0.31, "learning_rate": 1.574926262435826e-05, "loss": 2.4959, "step": 6800 }, { "epoch": 0.31, "learning_rate": 1.573755418675464e-05, "loss": 2.4952, "step": 6810 }, { "epoch": 0.31, "learning_rate": 1.572583401196315e-05, "loss": 2.4767, "step": 6820 }, { "epoch": 0.31, "learning_rate": 1.5714102123959485e-05, "loss": 2.4918, "step": 6830 }, { "epoch": 0.31, "learning_rate": 1.5702358546743314e-05, "loss": 2.5085, "step": 6840 }, { "epoch": 0.31, "learning_rate": 1.569060330433821e-05, "loss": 2.5108, "step": 6850 }, { "epoch": 0.31, "learning_rate": 1.5678836420791618e-05, "loss": 2.5056, "step": 6860 }, { "epoch": 0.31, "learning_rate": 1.5667057920174785e-05, "loss": 2.4979, "step": 6870 }, { "epoch": 0.31, "learning_rate": 1.565526782658273e-05, "loss": 2.5092, "step": 6880 }, { "epoch": 0.31, "learning_rate": 1.5643466164134196e-05, "loss": 2.4869, "step": 6890 }, { "epoch": 0.31, "learning_rate": 1.5631652956971576e-05, "loss": 2.4805, "step": 6900 }, { "epoch": 0.31, "learning_rate": 1.561982822926089e-05, "loss": 2.4754, "step": 6910 }, { "epoch": 0.31, "learning_rate": 1.5607992005191717e-05, "loss": 2.5099, "step": 6920 }, { "epoch": 0.31, "learning_rate": 1.5596144308977167e-05, "loss": 2.476, "step": 6930 }, { "epoch": 0.31, "learning_rate": 1.5584285164853802e-05, "loss": 2.5084, "step": 6940 }, { "epoch": 0.31, "learning_rate": 1.5572414597081617e-05, "loss": 2.4775, "step": 6950 }, { "epoch": 0.32, "learning_rate": 1.5560532629943968e-05, "loss": 2.4663, "step": 6960 }, { "epoch": 0.32, "learning_rate": 1.5548639287747537e-05, "loss": 2.4791, "step": 6970 }, { "epoch": 0.32, "learning_rate": 1.5536734594822266e-05, "loss": 2.4713, "step": 6980 }, { "epoch": 0.32, "learning_rate": 1.552481857552132e-05, "loss": 2.4699, "step": 6990 }, { "epoch": 0.32, "learning_rate": 1.5512891254221046e-05, "loss": 2.4678, "step": 7000 }, { "epoch": 0.32, "learning_rate": 1.5500952655320892e-05, "loss": 2.4733, "step": 7010 }, { "epoch": 0.32, "learning_rate": 1.5489002803243396e-05, "loss": 2.5095, "step": 7020 }, { "epoch": 0.32, "learning_rate": 1.5477041722434096e-05, "loss": 2.4851, "step": 7030 }, { "epoch": 0.32, "learning_rate": 1.5465069437361516e-05, "loss": 2.4988, "step": 7040 }, { "epoch": 0.32, "learning_rate": 1.54530859725171e-05, "loss": 2.4799, "step": 7050 }, { "epoch": 0.32, "learning_rate": 1.5441091352415148e-05, "loss": 2.4921, "step": 7060 }, { "epoch": 0.32, "learning_rate": 1.5429085601592794e-05, "loss": 2.5083, "step": 7070 }, { "epoch": 0.32, "learning_rate": 1.541706874460994e-05, "loss": 2.466, "step": 7080 }, { "epoch": 0.32, "learning_rate": 1.5405040806049202e-05, "loss": 2.4906, "step": 7090 }, { "epoch": 0.32, "learning_rate": 1.5393001810515865e-05, "loss": 2.4883, "step": 7100 }, { "epoch": 0.32, "learning_rate": 1.538095178263784e-05, "loss": 2.4842, "step": 7110 }, { "epoch": 0.32, "learning_rate": 1.5368890747065604e-05, "loss": 2.4967, "step": 7120 }, { "epoch": 0.32, "learning_rate": 1.535681872847215e-05, "loss": 2.4957, "step": 7130 }, { "epoch": 0.32, "learning_rate": 1.5344735751552935e-05, "loss": 2.5003, "step": 7140 }, { "epoch": 0.32, "learning_rate": 1.533264184102585e-05, "loss": 2.4989, "step": 7150 }, { "epoch": 0.32, "learning_rate": 1.5320537021631126e-05, "loss": 2.4865, "step": 7160 }, { "epoch": 0.32, "learning_rate": 1.5308421318131333e-05, "loss": 2.4966, "step": 7170 }, { "epoch": 0.33, "learning_rate": 1.5296294755311295e-05, "loss": 2.4821, "step": 7180 }, { "epoch": 0.33, "learning_rate": 1.528415735797806e-05, "loss": 2.4826, "step": 7190 }, { "epoch": 0.33, "learning_rate": 1.5272009150960828e-05, "loss": 2.4998, "step": 7200 }, { "epoch": 0.33, "learning_rate": 1.5259850159110917e-05, "loss": 2.4705, "step": 7210 }, { "epoch": 0.33, "learning_rate": 1.5247680407301708e-05, "loss": 2.4692, "step": 7220 }, { "epoch": 0.33, "learning_rate": 1.52354999204286e-05, "loss": 2.4782, "step": 7230 }, { "epoch": 0.33, "learning_rate": 1.5223308723408936e-05, "loss": 2.5029, "step": 7240 }, { "epoch": 0.33, "learning_rate": 1.5211106841181988e-05, "loss": 2.5012, "step": 7250 }, { "epoch": 0.33, "learning_rate": 1.519889429870887e-05, "loss": 2.4967, "step": 7260 }, { "epoch": 0.33, "learning_rate": 1.5186671120972514e-05, "loss": 2.469, "step": 7270 }, { "epoch": 0.33, "learning_rate": 1.5174437332977606e-05, "loss": 2.4666, "step": 7280 }, { "epoch": 0.33, "learning_rate": 1.5162192959750532e-05, "loss": 2.5038, "step": 7290 }, { "epoch": 0.33, "learning_rate": 1.5149938026339341e-05, "loss": 2.4995, "step": 7300 }, { "epoch": 0.33, "learning_rate": 1.5137672557813672e-05, "loss": 2.485, "step": 7310 }, { "epoch": 0.33, "learning_rate": 1.5125396579264728e-05, "loss": 2.4878, "step": 7320 }, { "epoch": 0.33, "learning_rate": 1.5113110115805208e-05, "loss": 2.5114, "step": 7330 }, { "epoch": 0.33, "learning_rate": 1.5100813192569254e-05, "loss": 2.4687, "step": 7340 }, { "epoch": 0.33, "learning_rate": 1.5088505834712414e-05, "loss": 2.521, "step": 7350 }, { "epoch": 0.33, "learning_rate": 1.5076188067411582e-05, "loss": 2.4765, "step": 7360 }, { "epoch": 0.33, "learning_rate": 1.5063859915864936e-05, "loss": 2.4709, "step": 7370 }, { "epoch": 0.33, "learning_rate": 1.5051521405291905e-05, "loss": 2.5008, "step": 7380 }, { "epoch": 0.33, "learning_rate": 1.5039172560933109e-05, "loss": 2.4881, "step": 7390 }, { "epoch": 0.34, "learning_rate": 1.5026813408050305e-05, "loss": 2.4938, "step": 7400 }, { "epoch": 0.34, "learning_rate": 1.5014443971926338e-05, "loss": 2.4877, "step": 7410 }, { "epoch": 0.34, "learning_rate": 1.5002064277865094e-05, "loss": 2.4864, "step": 7420 }, { "epoch": 0.34, "learning_rate": 1.4989674351191436e-05, "loss": 2.4753, "step": 7430 }, { "epoch": 0.34, "learning_rate": 1.4977274217251166e-05, "loss": 2.5124, "step": 7440 }, { "epoch": 0.34, "learning_rate": 1.4964863901410962e-05, "loss": 2.493, "step": 7450 }, { "epoch": 0.34, "learning_rate": 1.4952443429058334e-05, "loss": 2.4917, "step": 7460 }, { "epoch": 0.34, "learning_rate": 1.4940012825601569e-05, "loss": 2.4878, "step": 7470 }, { "epoch": 0.34, "learning_rate": 1.4927572116469672e-05, "loss": 2.4643, "step": 7480 }, { "epoch": 0.34, "learning_rate": 1.4915121327112338e-05, "loss": 2.4884, "step": 7490 }, { "epoch": 0.34, "learning_rate": 1.4902660482999865e-05, "loss": 2.4961, "step": 7500 }, { "epoch": 0.34, "learning_rate": 1.4890189609623122e-05, "loss": 2.4694, "step": 7510 }, { "epoch": 0.34, "learning_rate": 1.4877708732493508e-05, "loss": 2.471, "step": 7520 }, { "epoch": 0.34, "learning_rate": 1.4865217877142872e-05, "loss": 2.4625, "step": 7530 }, { "epoch": 0.34, "learning_rate": 1.4852717069123483e-05, "loss": 2.4878, "step": 7540 }, { "epoch": 0.34, "learning_rate": 1.484020633400797e-05, "loss": 2.4999, "step": 7550 }, { "epoch": 0.34, "learning_rate": 1.4827685697389265e-05, "loss": 2.4931, "step": 7560 }, { "epoch": 0.34, "learning_rate": 1.4815155184880557e-05, "loss": 2.4862, "step": 7570 }, { "epoch": 0.34, "learning_rate": 1.4802614822115235e-05, "loss": 2.496, "step": 7580 }, { "epoch": 0.34, "learning_rate": 1.479006463474685e-05, "loss": 2.4618, "step": 7590 }, { "epoch": 0.34, "learning_rate": 1.4777504648449044e-05, "loss": 2.4749, "step": 7600 }, { "epoch": 0.34, "learning_rate": 1.4764934888915494e-05, "loss": 2.4874, "step": 7610 }, { "epoch": 0.35, "learning_rate": 1.4752355381859888e-05, "loss": 2.4869, "step": 7620 }, { "epoch": 0.35, "learning_rate": 1.4739766153015842e-05, "loss": 2.473, "step": 7630 }, { "epoch": 0.35, "learning_rate": 1.4727167228136862e-05, "loss": 2.4901, "step": 7640 }, { "epoch": 0.35, "learning_rate": 1.4714558632996297e-05, "loss": 2.4881, "step": 7650 }, { "epoch": 0.35, "learning_rate": 1.4701940393387264e-05, "loss": 2.4849, "step": 7660 }, { "epoch": 0.35, "learning_rate": 1.468931253512262e-05, "loss": 2.4872, "step": 7670 }, { "epoch": 0.35, "learning_rate": 1.4676675084034897e-05, "loss": 2.4706, "step": 7680 }, { "epoch": 0.35, "learning_rate": 1.4664028065976245e-05, "loss": 2.4929, "step": 7690 }, { "epoch": 0.35, "learning_rate": 1.4651371506818395e-05, "loss": 2.5027, "step": 7700 }, { "epoch": 0.35, "learning_rate": 1.463870543245259e-05, "loss": 2.4725, "step": 7710 }, { "epoch": 0.35, "learning_rate": 1.4626029868789535e-05, "loss": 2.4946, "step": 7720 }, { "epoch": 0.35, "learning_rate": 1.4613344841759354e-05, "loss": 2.465, "step": 7730 }, { "epoch": 0.35, "learning_rate": 1.4600650377311523e-05, "loss": 2.4928, "step": 7740 }, { "epoch": 0.35, "learning_rate": 1.4587946501414832e-05, "loss": 2.4926, "step": 7750 }, { "epoch": 0.35, "learning_rate": 1.4575233240057314e-05, "loss": 2.4775, "step": 7760 }, { "epoch": 0.35, "learning_rate": 1.4562510619246213e-05, "loss": 2.4851, "step": 7770 }, { "epoch": 0.35, "learning_rate": 1.454977866500791e-05, "loss": 2.4875, "step": 7780 }, { "epoch": 0.35, "learning_rate": 1.4537037403387882e-05, "loss": 2.4923, "step": 7790 }, { "epoch": 0.35, "learning_rate": 1.4524286860450646e-05, "loss": 2.5041, "step": 7800 }, { "epoch": 0.35, "learning_rate": 1.451152706227971e-05, "loss": 2.4803, "step": 7810 }, { "epoch": 0.35, "learning_rate": 1.4498758034977507e-05, "loss": 2.4748, "step": 7820 }, { "epoch": 0.35, "learning_rate": 1.4485979804665358e-05, "loss": 2.473, "step": 7830 }, { "epoch": 0.36, "learning_rate": 1.4473192397483403e-05, "loss": 2.4936, "step": 7840 }, { "epoch": 0.36, "learning_rate": 1.4460395839590563e-05, "loss": 2.501, "step": 7850 }, { "epoch": 0.36, "learning_rate": 1.4447590157164466e-05, "loss": 2.5057, "step": 7860 }, { "epoch": 0.36, "learning_rate": 1.4434775376401424e-05, "loss": 2.4603, "step": 7870 }, { "epoch": 0.36, "learning_rate": 1.442195152351634e-05, "loss": 2.4675, "step": 7880 }, { "epoch": 0.36, "learning_rate": 1.4409118624742696e-05, "loss": 2.507, "step": 7890 }, { "epoch": 0.36, "learning_rate": 1.4396276706332461e-05, "loss": 2.4854, "step": 7900 }, { "epoch": 0.36, "learning_rate": 1.4383425794556072e-05, "loss": 2.4905, "step": 7910 }, { "epoch": 0.36, "learning_rate": 1.437056591570235e-05, "loss": 2.4925, "step": 7920 }, { "epoch": 0.36, "learning_rate": 1.435769709607847e-05, "loss": 2.4565, "step": 7930 }, { "epoch": 0.36, "learning_rate": 1.4344819362009885e-05, "loss": 2.4914, "step": 7940 }, { "epoch": 0.36, "learning_rate": 1.4331932739840298e-05, "loss": 2.4601, "step": 7950 }, { "epoch": 0.36, "learning_rate": 1.4319037255931582e-05, "loss": 2.4923, "step": 7960 }, { "epoch": 0.36, "learning_rate": 1.4306132936663749e-05, "loss": 2.4638, "step": 7970 }, { "epoch": 0.36, "learning_rate": 1.4293219808434875e-05, "loss": 2.4854, "step": 7980 }, { "epoch": 0.36, "learning_rate": 1.4280297897661063e-05, "loss": 2.5125, "step": 7990 }, { "epoch": 0.36, "learning_rate": 1.4267367230776383e-05, "loss": 2.4939, "step": 8000 }, { "epoch": 0.36, "eval_accuracy": 0.5401815307188504, "eval_loss": 2.244050979614258, "eval_runtime": 9.925, "eval_samples_per_second": 137.733, "eval_steps_per_second": 1.108, "step": 8000 }, { "epoch": 0.36, "learning_rate": 1.4254427834232811e-05, "loss": 2.4725, "step": 8010 }, { "epoch": 0.36, "learning_rate": 1.4241479734500187e-05, "loss": 2.4881, "step": 8020 }, { "epoch": 0.36, "learning_rate": 1.422852295806615e-05, "loss": 2.4651, "step": 8030 }, { "epoch": 0.36, "learning_rate": 1.4215557531436095e-05, "loss": 2.5104, "step": 8040 }, { "epoch": 0.36, "learning_rate": 1.4202583481133107e-05, "loss": 2.4719, "step": 8050 }, { "epoch": 0.37, "learning_rate": 1.418960083369791e-05, "loss": 2.4504, "step": 8060 }, { "epoch": 0.37, "learning_rate": 1.417660961568883e-05, "loss": 2.4851, "step": 8070 }, { "epoch": 0.37, "learning_rate": 1.4163609853681704e-05, "loss": 2.4842, "step": 8080 }, { "epoch": 0.37, "learning_rate": 1.4150601574269862e-05, "loss": 2.4873, "step": 8090 }, { "epoch": 0.37, "learning_rate": 1.4137584804064055e-05, "loss": 2.4766, "step": 8100 }, { "epoch": 0.37, "learning_rate": 1.41245595696924e-05, "loss": 2.492, "step": 8110 }, { "epoch": 0.37, "learning_rate": 1.4111525897800333e-05, "loss": 2.4844, "step": 8120 }, { "epoch": 0.37, "learning_rate": 1.4098483815050548e-05, "loss": 2.4887, "step": 8130 }, { "epoch": 0.37, "learning_rate": 1.4085433348122946e-05, "loss": 2.4792, "step": 8140 }, { "epoch": 0.37, "learning_rate": 1.4072374523714577e-05, "loss": 2.4944, "step": 8150 }, { "epoch": 0.37, "learning_rate": 1.4059307368539594e-05, "loss": 2.4783, "step": 8160 }, { "epoch": 0.37, "learning_rate": 1.4046231909329184e-05, "loss": 2.4608, "step": 8170 }, { "epoch": 0.37, "learning_rate": 1.4033148172831526e-05, "loss": 2.4837, "step": 8180 }, { "epoch": 0.37, "learning_rate": 1.402005618581173e-05, "loss": 2.477, "step": 8190 }, { "epoch": 0.37, "learning_rate": 1.4006955975051788e-05, "loss": 2.4932, "step": 8200 }, { "epoch": 0.37, "learning_rate": 1.399384756735051e-05, "loss": 2.489, "step": 8210 }, { "epoch": 0.37, "learning_rate": 1.3980730989523473e-05, "loss": 2.4782, "step": 8220 }, { "epoch": 0.37, "learning_rate": 1.3967606268402975e-05, "loss": 2.4819, "step": 8230 }, { "epoch": 0.37, "learning_rate": 1.3954473430837965e-05, "loss": 2.4772, "step": 8240 }, { "epoch": 0.37, "learning_rate": 1.3941332503694001e-05, "loss": 2.4997, "step": 8250 }, { "epoch": 0.37, "learning_rate": 1.3928183513853188e-05, "loss": 2.4828, "step": 8260 }, { "epoch": 0.37, "learning_rate": 1.3915026488214118e-05, "loss": 2.5103, "step": 8270 }, { "epoch": 0.38, "learning_rate": 1.3901861453691834e-05, "loss": 2.503, "step": 8280 }, { "epoch": 0.38, "learning_rate": 1.3888688437217757e-05, "loss": 2.4911, "step": 8290 }, { "epoch": 0.38, "learning_rate": 1.3875507465739633e-05, "loss": 2.4989, "step": 8300 }, { "epoch": 0.38, "learning_rate": 1.3862318566221481e-05, "loss": 2.4716, "step": 8310 }, { "epoch": 0.38, "learning_rate": 1.3849121765643549e-05, "loss": 2.5075, "step": 8320 }, { "epoch": 0.38, "learning_rate": 1.3835917091002236e-05, "loss": 2.5114, "step": 8330 }, { "epoch": 0.38, "learning_rate": 1.3822704569310053e-05, "loss": 2.4933, "step": 8340 }, { "epoch": 0.38, "learning_rate": 1.3809484227595564e-05, "loss": 2.4884, "step": 8350 }, { "epoch": 0.38, "learning_rate": 1.3796256092903333e-05, "loss": 2.4812, "step": 8360 }, { "epoch": 0.38, "learning_rate": 1.3783020192293858e-05, "loss": 2.4746, "step": 8370 }, { "epoch": 0.38, "learning_rate": 1.3769776552843532e-05, "loss": 2.4646, "step": 8380 }, { "epoch": 0.38, "learning_rate": 1.3756525201644577e-05, "loss": 2.5032, "step": 8390 }, { "epoch": 0.38, "learning_rate": 1.3743266165804983e-05, "loss": 2.4692, "step": 8400 }, { "epoch": 0.38, "learning_rate": 1.3729999472448474e-05, "loss": 2.4748, "step": 8410 }, { "epoch": 0.38, "learning_rate": 1.3716725148714429e-05, "loss": 2.465, "step": 8420 }, { "epoch": 0.38, "learning_rate": 1.3703443221757838e-05, "loss": 2.4649, "step": 8430 }, { "epoch": 0.38, "learning_rate": 1.3690153718749248e-05, "loss": 2.4688, "step": 8440 }, { "epoch": 0.38, "learning_rate": 1.36768566668747e-05, "loss": 2.4741, "step": 8450 }, { "epoch": 0.38, "learning_rate": 1.3663552093335676e-05, "loss": 2.4869, "step": 8460 }, { "epoch": 0.38, "learning_rate": 1.3650240025349055e-05, "loss": 2.4578, "step": 8470 }, { "epoch": 0.38, "learning_rate": 1.3636920490147034e-05, "loss": 2.4768, "step": 8480 }, { "epoch": 0.38, "learning_rate": 1.3623593514977098e-05, "loss": 2.5064, "step": 8490 }, { "epoch": 0.39, "learning_rate": 1.361025912710194e-05, "loss": 2.4767, "step": 8500 }, { "epoch": 0.39, "learning_rate": 1.3596917353799426e-05, "loss": 2.475, "step": 8510 }, { "epoch": 0.39, "learning_rate": 1.3583568222362526e-05, "loss": 2.4926, "step": 8520 }, { "epoch": 0.39, "learning_rate": 1.3570211760099265e-05, "loss": 2.4748, "step": 8530 }, { "epoch": 0.39, "learning_rate": 1.3556847994332662e-05, "loss": 2.507, "step": 8540 }, { "epoch": 0.39, "learning_rate": 1.3543476952400676e-05, "loss": 2.491, "step": 8550 }, { "epoch": 0.39, "learning_rate": 1.3530098661656155e-05, "loss": 2.468, "step": 8560 }, { "epoch": 0.39, "learning_rate": 1.351671314946677e-05, "loss": 2.5024, "step": 8570 }, { "epoch": 0.39, "learning_rate": 1.3503320443214968e-05, "loss": 2.4747, "step": 8580 }, { "epoch": 0.39, "learning_rate": 1.3489920570297916e-05, "loss": 2.4857, "step": 8590 }, { "epoch": 0.39, "learning_rate": 1.3476513558127433e-05, "loss": 2.4804, "step": 8600 }, { "epoch": 0.39, "learning_rate": 1.346309943412995e-05, "loss": 2.4737, "step": 8610 }, { "epoch": 0.39, "learning_rate": 1.3449678225746445e-05, "loss": 2.4841, "step": 8620 }, { "epoch": 0.39, "learning_rate": 1.3436249960432386e-05, "loss": 2.4936, "step": 8630 }, { "epoch": 0.39, "learning_rate": 1.3422814665657677e-05, "loss": 2.5065, "step": 8640 }, { "epoch": 0.39, "learning_rate": 1.340937236890661e-05, "loss": 2.4979, "step": 8650 }, { "epoch": 0.39, "learning_rate": 1.339592309767779e-05, "loss": 2.4755, "step": 8660 }, { "epoch": 0.39, "learning_rate": 1.3382466879484092e-05, "loss": 2.4979, "step": 8670 }, { "epoch": 0.39, "learning_rate": 1.336900374185261e-05, "loss": 2.4854, "step": 8680 }, { "epoch": 0.39, "learning_rate": 1.3355533712324586e-05, "loss": 2.4722, "step": 8690 }, { "epoch": 0.39, "learning_rate": 1.3342056818455359e-05, "loss": 2.4854, "step": 8700 }, { "epoch": 0.39, "learning_rate": 1.3328573087814314e-05, "loss": 2.4955, "step": 8710 }, { "epoch": 0.4, "learning_rate": 1.3315082547984819e-05, "loss": 2.4944, "step": 8720 }, { "epoch": 0.4, "learning_rate": 1.330158522656418e-05, "loss": 2.5024, "step": 8730 }, { "epoch": 0.4, "learning_rate": 1.328808115116356e-05, "loss": 2.4808, "step": 8740 }, { "epoch": 0.4, "learning_rate": 1.3274570349407956e-05, "loss": 2.4916, "step": 8750 }, { "epoch": 0.4, "learning_rate": 1.3261052848936116e-05, "loss": 2.4632, "step": 8760 }, { "epoch": 0.4, "learning_rate": 1.3247528677400486e-05, "loss": 2.4657, "step": 8770 }, { "epoch": 0.4, "learning_rate": 1.3233997862467169e-05, "loss": 2.4718, "step": 8780 }, { "epoch": 0.4, "learning_rate": 1.3220460431815859e-05, "loss": 2.478, "step": 8790 }, { "epoch": 0.4, "learning_rate": 1.320691641313977e-05, "loss": 2.4887, "step": 8800 }, { "epoch": 0.4, "learning_rate": 1.3193365834145606e-05, "loss": 2.4611, "step": 8810 }, { "epoch": 0.4, "learning_rate": 1.3179808722553486e-05, "loss": 2.469, "step": 8820 }, { "epoch": 0.4, "learning_rate": 1.31662451060969e-05, "loss": 2.4403, "step": 8830 }, { "epoch": 0.4, "learning_rate": 1.3152675012522629e-05, "loss": 2.4769, "step": 8840 }, { "epoch": 0.4, "learning_rate": 1.3139098469590716e-05, "loss": 2.4568, "step": 8850 }, { "epoch": 0.4, "learning_rate": 1.31255155050744e-05, "loss": 2.4885, "step": 8860 }, { "epoch": 0.4, "learning_rate": 1.3111926146760047e-05, "loss": 2.4722, "step": 8870 }, { "epoch": 0.4, "learning_rate": 1.309833042244711e-05, "loss": 2.4638, "step": 8880 }, { "epoch": 0.4, "learning_rate": 1.308472835994806e-05, "loss": 2.4892, "step": 8890 }, { "epoch": 0.4, "learning_rate": 1.3071119987088339e-05, "loss": 2.4763, "step": 8900 }, { "epoch": 0.4, "learning_rate": 1.3057505331706288e-05, "loss": 2.494, "step": 8910 }, { "epoch": 0.4, "learning_rate": 1.3043884421653115e-05, "loss": 2.4811, "step": 8920 }, { "epoch": 0.4, "learning_rate": 1.303025728479281e-05, "loss": 2.4878, "step": 8930 }, { "epoch": 0.41, "learning_rate": 1.3016623949002104e-05, "loss": 2.4671, "step": 8940 }, { "epoch": 0.41, "learning_rate": 1.3002984442170416e-05, "loss": 2.4678, "step": 8950 }, { "epoch": 0.41, "learning_rate": 1.2989338792199782e-05, "loss": 2.4962, "step": 8960 }, { "epoch": 0.41, "learning_rate": 1.2975687027004804e-05, "loss": 2.4836, "step": 8970 }, { "epoch": 0.41, "learning_rate": 1.29620291745126e-05, "loss": 2.5108, "step": 8980 }, { "epoch": 0.41, "learning_rate": 1.2948365262662734e-05, "loss": 2.4853, "step": 8990 }, { "epoch": 0.41, "learning_rate": 1.2934695319407176e-05, "loss": 2.463, "step": 9000 }, { "epoch": 0.41, "learning_rate": 1.2921019372710217e-05, "loss": 2.4847, "step": 9010 }, { "epoch": 0.41, "learning_rate": 1.2907337450548446e-05, "loss": 2.4742, "step": 9020 }, { "epoch": 0.41, "learning_rate": 1.289364958091067e-05, "loss": 2.4816, "step": 9030 }, { "epoch": 0.41, "learning_rate": 1.2879955791797853e-05, "loss": 2.4556, "step": 9040 }, { "epoch": 0.41, "learning_rate": 1.2866256111223085e-05, "loss": 2.4868, "step": 9050 }, { "epoch": 0.41, "learning_rate": 1.2852550567211498e-05, "loss": 2.4913, "step": 9060 }, { "epoch": 0.41, "learning_rate": 1.2838839187800218e-05, "loss": 2.4818, "step": 9070 }, { "epoch": 0.41, "learning_rate": 1.282512200103831e-05, "loss": 2.5155, "step": 9080 }, { "epoch": 0.41, "learning_rate": 1.2811399034986726e-05, "loss": 2.4636, "step": 9090 }, { "epoch": 0.41, "learning_rate": 1.2797670317718234e-05, "loss": 2.47, "step": 9100 }, { "epoch": 0.41, "learning_rate": 1.2783935877317362e-05, "loss": 2.4984, "step": 9110 }, { "epoch": 0.41, "learning_rate": 1.2770195741880356e-05, "loss": 2.4898, "step": 9120 }, { "epoch": 0.41, "learning_rate": 1.2756449939515107e-05, "loss": 2.4708, "step": 9130 }, { "epoch": 0.41, "learning_rate": 1.2742698498341098e-05, "loss": 2.4779, "step": 9140 }, { "epoch": 0.41, "learning_rate": 1.2728941446489347e-05, "loss": 2.47, "step": 9150 }, { "epoch": 0.42, "learning_rate": 1.2715178812102353e-05, "loss": 2.4753, "step": 9160 }, { "epoch": 0.42, "learning_rate": 1.2701410623334037e-05, "loss": 2.5039, "step": 9170 }, { "epoch": 0.42, "learning_rate": 1.2687636908349671e-05, "loss": 2.4783, "step": 9180 }, { "epoch": 0.42, "learning_rate": 1.2673857695325846e-05, "loss": 2.4995, "step": 9190 }, { "epoch": 0.42, "learning_rate": 1.2660073012450389e-05, "loss": 2.4753, "step": 9200 }, { "epoch": 0.42, "learning_rate": 1.2646282887922325e-05, "loss": 2.4616, "step": 9210 }, { "epoch": 0.42, "learning_rate": 1.2632487349951805e-05, "loss": 2.4776, "step": 9220 }, { "epoch": 0.42, "learning_rate": 1.2618686426760058e-05, "loss": 2.4758, "step": 9230 }, { "epoch": 0.42, "learning_rate": 1.2604880146579326e-05, "loss": 2.4691, "step": 9240 }, { "epoch": 0.42, "learning_rate": 1.2591068537652813e-05, "loss": 2.4921, "step": 9250 }, { "epoch": 0.42, "learning_rate": 1.257725162823462e-05, "loss": 2.4803, "step": 9260 }, { "epoch": 0.42, "learning_rate": 1.2563429446589695e-05, "loss": 2.4763, "step": 9270 }, { "epoch": 0.42, "learning_rate": 1.2549602020993769e-05, "loss": 2.4785, "step": 9280 }, { "epoch": 0.42, "learning_rate": 1.2535769379733305e-05, "loss": 2.5066, "step": 9290 }, { "epoch": 0.42, "learning_rate": 1.2521931551105427e-05, "loss": 2.4683, "step": 9300 }, { "epoch": 0.42, "learning_rate": 1.2508088563417875e-05, "loss": 2.4796, "step": 9310 }, { "epoch": 0.42, "learning_rate": 1.2494240444988943e-05, "loss": 2.4638, "step": 9320 }, { "epoch": 0.42, "learning_rate": 1.2480387224147426e-05, "loss": 2.4631, "step": 9330 }, { "epoch": 0.42, "learning_rate": 1.2466528929232548e-05, "loss": 2.4953, "step": 9340 }, { "epoch": 0.42, "learning_rate": 1.245266558859392e-05, "loss": 2.4954, "step": 9350 }, { "epoch": 0.42, "learning_rate": 1.2438797230591468e-05, "loss": 2.481, "step": 9360 }, { "epoch": 0.42, "learning_rate": 1.2424923883595389e-05, "loss": 2.4852, "step": 9370 }, { "epoch": 0.43, "learning_rate": 1.2411045575986079e-05, "loss": 2.4744, "step": 9380 }, { "epoch": 0.43, "learning_rate": 1.2397162336154085e-05, "loss": 2.4688, "step": 9390 }, { "epoch": 0.43, "learning_rate": 1.2383274192500048e-05, "loss": 2.4656, "step": 9400 }, { "epoch": 0.43, "learning_rate": 1.2369381173434636e-05, "loss": 2.4552, "step": 9410 }, { "epoch": 0.43, "learning_rate": 1.235548330737849e-05, "loss": 2.4615, "step": 9420 }, { "epoch": 0.43, "learning_rate": 1.2341580622762164e-05, "loss": 2.4731, "step": 9430 }, { "epoch": 0.43, "learning_rate": 1.2327673148026079e-05, "loss": 2.4722, "step": 9440 }, { "epoch": 0.43, "learning_rate": 1.2313760911620447e-05, "loss": 2.4791, "step": 9450 }, { "epoch": 0.43, "learning_rate": 1.229984394200522e-05, "loss": 2.4814, "step": 9460 }, { "epoch": 0.43, "learning_rate": 1.2285922267650038e-05, "loss": 2.4528, "step": 9470 }, { "epoch": 0.43, "learning_rate": 1.2271995917034166e-05, "loss": 2.4879, "step": 9480 }, { "epoch": 0.43, "learning_rate": 1.2258064918646424e-05, "loss": 2.4689, "step": 9490 }, { "epoch": 0.43, "learning_rate": 1.2244129300985153e-05, "loss": 2.4963, "step": 9500 }, { "epoch": 0.43, "learning_rate": 1.2230189092558138e-05, "loss": 2.4701, "step": 9510 }, { "epoch": 0.43, "learning_rate": 1.2216244321882553e-05, "loss": 2.4841, "step": 9520 }, { "epoch": 0.43, "learning_rate": 1.2202295017484911e-05, "loss": 2.4951, "step": 9530 }, { "epoch": 0.43, "learning_rate": 1.2188341207900991e-05, "loss": 2.4291, "step": 9540 }, { "epoch": 0.43, "learning_rate": 1.2174382921675799e-05, "loss": 2.4742, "step": 9550 }, { "epoch": 0.43, "learning_rate": 1.2160420187363484e-05, "loss": 2.4867, "step": 9560 }, { "epoch": 0.43, "learning_rate": 1.2146453033527315e-05, "loss": 2.4603, "step": 9570 }, { "epoch": 0.43, "learning_rate": 1.213248148873958e-05, "loss": 2.4703, "step": 9580 }, { "epoch": 0.43, "learning_rate": 1.2118505581581568e-05, "loss": 2.4585, "step": 9590 }, { "epoch": 0.44, "learning_rate": 1.2104525340643479e-05, "loss": 2.4888, "step": 9600 }, { "epoch": 0.44, "learning_rate": 1.2090540794524382e-05, "loss": 2.4635, "step": 9610 }, { "epoch": 0.44, "learning_rate": 1.2076551971832154e-05, "loss": 2.4772, "step": 9620 }, { "epoch": 0.44, "learning_rate": 1.2062558901183422e-05, "loss": 2.4872, "step": 9630 }, { "epoch": 0.44, "learning_rate": 1.2048561611203501e-05, "loss": 2.4838, "step": 9640 }, { "epoch": 0.44, "learning_rate": 1.2034560130526341e-05, "loss": 2.4642, "step": 9650 }, { "epoch": 0.44, "learning_rate": 1.2020554487794455e-05, "loss": 2.5052, "step": 9660 }, { "epoch": 0.44, "learning_rate": 1.2006544711658884e-05, "loss": 2.4885, "step": 9670 }, { "epoch": 0.44, "learning_rate": 1.1992530830779116e-05, "loss": 2.4674, "step": 9680 }, { "epoch": 0.44, "learning_rate": 1.1978512873823036e-05, "loss": 2.4788, "step": 9690 }, { "epoch": 0.44, "learning_rate": 1.1964490869466869e-05, "loss": 2.4905, "step": 9700 }, { "epoch": 0.44, "learning_rate": 1.1950464846395123e-05, "loss": 2.5041, "step": 9710 }, { "epoch": 0.44, "learning_rate": 1.1936434833300517e-05, "loss": 2.4659, "step": 9720 }, { "epoch": 0.44, "learning_rate": 1.1922400858883942e-05, "loss": 2.4741, "step": 9730 }, { "epoch": 0.44, "learning_rate": 1.1908362951854393e-05, "loss": 2.4725, "step": 9740 }, { "epoch": 0.44, "learning_rate": 1.1894321140928901e-05, "loss": 2.478, "step": 9750 }, { "epoch": 0.44, "learning_rate": 1.1880275454832493e-05, "loss": 2.47, "step": 9760 }, { "epoch": 0.44, "learning_rate": 1.1866225922298116e-05, "loss": 2.474, "step": 9770 }, { "epoch": 0.44, "learning_rate": 1.185217257206659e-05, "loss": 2.4519, "step": 9780 }, { "epoch": 0.44, "learning_rate": 1.183811543288654e-05, "loss": 2.4818, "step": 9790 }, { "epoch": 0.44, "learning_rate": 1.1824054533514349e-05, "loss": 2.4663, "step": 9800 }, { "epoch": 0.44, "learning_rate": 1.1809989902714085e-05, "loss": 2.4877, "step": 9810 }, { "epoch": 0.45, "learning_rate": 1.1795921569257456e-05, "loss": 2.4713, "step": 9820 }, { "epoch": 0.45, "learning_rate": 1.178184956192374e-05, "loss": 2.4738, "step": 9830 }, { "epoch": 0.45, "learning_rate": 1.1767773909499729e-05, "loss": 2.457, "step": 9840 }, { "epoch": 0.45, "learning_rate": 1.1753694640779677e-05, "loss": 2.4695, "step": 9850 }, { "epoch": 0.45, "learning_rate": 1.1739611784565232e-05, "loss": 2.4408, "step": 9860 }, { "epoch": 0.45, "learning_rate": 1.1725525369665383e-05, "loss": 2.4893, "step": 9870 }, { "epoch": 0.45, "learning_rate": 1.1711435424896395e-05, "loss": 2.4603, "step": 9880 }, { "epoch": 0.45, "learning_rate": 1.169734197908176e-05, "loss": 2.4992, "step": 9890 }, { "epoch": 0.45, "learning_rate": 1.1683245061052126e-05, "loss": 2.4855, "step": 9900 }, { "epoch": 0.45, "learning_rate": 1.1669144699645249e-05, "loss": 2.4751, "step": 9910 }, { "epoch": 0.45, "learning_rate": 1.1655040923705926e-05, "loss": 2.4548, "step": 9920 }, { "epoch": 0.45, "learning_rate": 1.1640933762085943e-05, "loss": 2.4694, "step": 9930 }, { "epoch": 0.45, "learning_rate": 1.1626823243644006e-05, "loss": 2.469, "step": 9940 }, { "epoch": 0.45, "learning_rate": 1.161270939724569e-05, "loss": 2.5118, "step": 9950 }, { "epoch": 0.45, "learning_rate": 1.1598592251763378e-05, "loss": 2.4749, "step": 9960 }, { "epoch": 0.45, "learning_rate": 1.1584471836076208e-05, "loss": 2.4817, "step": 9970 }, { "epoch": 0.45, "learning_rate": 1.1570348179069998e-05, "loss": 2.4811, "step": 9980 }, { "epoch": 0.45, "learning_rate": 1.1556221309637204e-05, "loss": 2.4543, "step": 9990 }, { "epoch": 0.45, "learning_rate": 1.1542091256676846e-05, "loss": 2.463, "step": 10000 }, { "epoch": 0.45, "eval_accuracy": 0.5409716963389947, "eval_loss": 2.238548755645752, "eval_runtime": 9.8782, "eval_samples_per_second": 138.385, "eval_steps_per_second": 1.114, "step": 10000 }, { "epoch": 0.45, "learning_rate": 1.1527958049094466e-05, "loss": 2.5025, "step": 10010 }, { "epoch": 0.45, "learning_rate": 1.1513821715802052e-05, "loss": 2.4632, "step": 10020 }, { "epoch": 0.45, "learning_rate": 1.149968228571799e-05, "loss": 2.4893, "step": 10030 }, { "epoch": 0.46, "learning_rate": 1.1485539787767e-05, "loss": 2.4654, "step": 10040 }, { "epoch": 0.46, "learning_rate": 1.1471394250880077e-05, "loss": 2.4884, "step": 10050 }, { "epoch": 0.46, "learning_rate": 1.1457245703994433e-05, "loss": 2.4646, "step": 10060 }, { "epoch": 0.46, "learning_rate": 1.1443094176053439e-05, "loss": 2.5017, "step": 10070 }, { "epoch": 0.46, "learning_rate": 1.1428939696006564e-05, "loss": 2.4983, "step": 10080 }, { "epoch": 0.46, "learning_rate": 1.1414782292809314e-05, "loss": 2.4971, "step": 10090 }, { "epoch": 0.46, "learning_rate": 1.1400621995423175e-05, "loss": 2.4909, "step": 10100 }, { "epoch": 0.46, "learning_rate": 1.1386458832815554e-05, "loss": 2.467, "step": 10110 }, { "epoch": 0.46, "learning_rate": 1.1372292833959723e-05, "loss": 2.4878, "step": 10120 }, { "epoch": 0.46, "learning_rate": 1.135812402783475e-05, "loss": 2.5128, "step": 10130 }, { "epoch": 0.46, "learning_rate": 1.1343952443425452e-05, "loss": 2.4584, "step": 10140 }, { "epoch": 0.46, "learning_rate": 1.1329778109722325e-05, "loss": 2.4873, "step": 10150 }, { "epoch": 0.46, "learning_rate": 1.1315601055721488e-05, "loss": 2.4796, "step": 10160 }, { "epoch": 0.46, "learning_rate": 1.130142131042463e-05, "loss": 2.4332, "step": 10170 }, { "epoch": 0.46, "learning_rate": 1.1287238902838942e-05, "loss": 2.4768, "step": 10180 }, { "epoch": 0.46, "learning_rate": 1.1273053861977062e-05, "loss": 2.4962, "step": 10190 }, { "epoch": 0.46, "learning_rate": 1.1258866216857015e-05, "loss": 2.4744, "step": 10200 }, { "epoch": 0.46, "learning_rate": 1.124467599650215e-05, "loss": 2.4636, "step": 10210 }, { "epoch": 0.46, "learning_rate": 1.1230483229941092e-05, "loss": 2.4826, "step": 10220 }, { "epoch": 0.46, "learning_rate": 1.1216287946207667e-05, "loss": 2.4625, "step": 10230 }, { "epoch": 0.46, "learning_rate": 1.1202090174340855e-05, "loss": 2.4428, "step": 10240 }, { "epoch": 0.46, "learning_rate": 1.1187889943384722e-05, "loss": 2.4797, "step": 10250 }, { "epoch": 0.46, "learning_rate": 1.117368728238837e-05, "loss": 2.4933, "step": 10260 }, { "epoch": 0.47, "learning_rate": 1.115948222040587e-05, "loss": 2.4521, "step": 10270 }, { "epoch": 0.47, "learning_rate": 1.11452747864962e-05, "loss": 2.4748, "step": 10280 }, { "epoch": 0.47, "learning_rate": 1.1131065009723195e-05, "loss": 2.4887, "step": 10290 }, { "epoch": 0.47, "learning_rate": 1.111685291915548e-05, "loss": 2.4836, "step": 10300 }, { "epoch": 0.47, "learning_rate": 1.1102638543866418e-05, "loss": 2.5095, "step": 10310 }, { "epoch": 0.47, "learning_rate": 1.108842191293404e-05, "loss": 2.4714, "step": 10320 }, { "epoch": 0.47, "learning_rate": 1.1074203055440995e-05, "loss": 2.4761, "step": 10330 }, { "epoch": 0.47, "learning_rate": 1.1059982000474486e-05, "loss": 2.4847, "step": 10340 }, { "epoch": 0.47, "learning_rate": 1.104575877712621e-05, "loss": 2.4751, "step": 10350 }, { "epoch": 0.47, "learning_rate": 1.1031533414492301e-05, "loss": 2.4657, "step": 10360 }, { "epoch": 0.47, "learning_rate": 1.1017305941673269e-05, "loss": 2.4617, "step": 10370 }, { "epoch": 0.47, "learning_rate": 1.100307638777394e-05, "loss": 2.5021, "step": 10380 }, { "epoch": 0.47, "learning_rate": 1.0988844781903402e-05, "loss": 2.4764, "step": 10390 }, { "epoch": 0.47, "learning_rate": 1.0974611153174934e-05, "loss": 2.5, "step": 10400 }, { "epoch": 0.47, "learning_rate": 1.0960375530705958e-05, "loss": 2.4802, "step": 10410 }, { "epoch": 0.47, "learning_rate": 1.0946137943617967e-05, "loss": 2.4398, "step": 10420 }, { "epoch": 0.47, "learning_rate": 1.0931898421036484e-05, "loss": 2.4764, "step": 10430 }, { "epoch": 0.47, "learning_rate": 1.0917656992090984e-05, "loss": 2.4588, "step": 10440 }, { "epoch": 0.47, "learning_rate": 1.0903413685914843e-05, "loss": 2.4786, "step": 10450 }, { "epoch": 0.47, "learning_rate": 1.0889168531645277e-05, "loss": 2.4669, "step": 10460 }, { "epoch": 0.47, "learning_rate": 1.0874921558423288e-05, "loss": 2.4847, "step": 10470 }, { "epoch": 0.47, "learning_rate": 1.0860672795393588e-05, "loss": 2.468, "step": 10480 }, { "epoch": 0.48, "learning_rate": 1.0846422271704561e-05, "loss": 2.4586, "step": 10490 }, { "epoch": 0.48, "learning_rate": 1.083217001650819e-05, "loss": 2.4695, "step": 10500 }, { "epoch": 0.48, "learning_rate": 1.0817916058959994e-05, "loss": 2.4669, "step": 10510 }, { "epoch": 0.48, "learning_rate": 1.0803660428218982e-05, "loss": 2.4742, "step": 10520 }, { "epoch": 0.48, "learning_rate": 1.0789403153447584e-05, "loss": 2.4924, "step": 10530 }, { "epoch": 0.48, "learning_rate": 1.077514426381159e-05, "loss": 2.5009, "step": 10540 }, { "epoch": 0.48, "learning_rate": 1.0760883788480095e-05, "loss": 2.4916, "step": 10550 }, { "epoch": 0.48, "learning_rate": 1.0746621756625442e-05, "loss": 2.4864, "step": 10560 }, { "epoch": 0.48, "learning_rate": 1.0732358197423149e-05, "loss": 2.4607, "step": 10570 }, { "epoch": 0.48, "learning_rate": 1.071809314005187e-05, "loss": 2.4872, "step": 10580 }, { "epoch": 0.48, "learning_rate": 1.070382661369331e-05, "loss": 2.4654, "step": 10590 }, { "epoch": 0.48, "learning_rate": 1.0689558647532197e-05, "loss": 2.4703, "step": 10600 }, { "epoch": 0.48, "learning_rate": 1.0675289270756186e-05, "loss": 2.4827, "step": 10610 }, { "epoch": 0.48, "learning_rate": 1.066101851255583e-05, "loss": 2.473, "step": 10620 }, { "epoch": 0.48, "learning_rate": 1.0646746402124504e-05, "loss": 2.4486, "step": 10630 }, { "epoch": 0.48, "learning_rate": 1.0632472968658348e-05, "loss": 2.4877, "step": 10640 }, { "epoch": 0.48, "learning_rate": 1.0618198241356206e-05, "loss": 2.4764, "step": 10650 }, { "epoch": 0.48, "learning_rate": 1.0603922249419579e-05, "loss": 2.4597, "step": 10660 }, { "epoch": 0.48, "learning_rate": 1.0589645022052544e-05, "loss": 2.4598, "step": 10670 }, { "epoch": 0.48, "learning_rate": 1.057536658846171e-05, "loss": 2.4909, "step": 10680 }, { "epoch": 0.48, "learning_rate": 1.0561086977856157e-05, "loss": 2.4626, "step": 10690 }, { "epoch": 0.48, "learning_rate": 1.0546806219447363e-05, "loss": 2.4911, "step": 10700 }, { "epoch": 0.49, "learning_rate": 1.0532524342449165e-05, "loss": 2.4544, "step": 10710 }, { "epoch": 0.49, "learning_rate": 1.0518241376077678e-05, "loss": 2.4779, "step": 10720 }, { "epoch": 0.49, "learning_rate": 1.050395734955126e-05, "loss": 2.4778, "step": 10730 }, { "epoch": 0.49, "learning_rate": 1.0489672292090419e-05, "loss": 2.4548, "step": 10740 }, { "epoch": 0.49, "learning_rate": 1.0475386232917789e-05, "loss": 2.4474, "step": 10750 }, { "epoch": 0.49, "learning_rate": 1.046109920125804e-05, "loss": 2.4791, "step": 10760 }, { "epoch": 0.49, "learning_rate": 1.0446811226337839e-05, "loss": 2.4879, "step": 10770 }, { "epoch": 0.49, "learning_rate": 1.0432522337385782e-05, "loss": 2.4726, "step": 10780 }, { "epoch": 0.49, "learning_rate": 1.041823256363233e-05, "loss": 2.4786, "step": 10790 }, { "epoch": 0.49, "learning_rate": 1.0403941934309763e-05, "loss": 2.465, "step": 10800 }, { "epoch": 0.49, "learning_rate": 1.0389650478652103e-05, "loss": 2.4702, "step": 10810 }, { "epoch": 0.49, "learning_rate": 1.037535822589506e-05, "loss": 2.4907, "step": 10820 }, { "epoch": 0.49, "learning_rate": 1.036106520527599e-05, "loss": 2.4662, "step": 10830 }, { "epoch": 0.49, "learning_rate": 1.03467714460338e-05, "loss": 2.4916, "step": 10840 }, { "epoch": 0.49, "learning_rate": 1.0332476977408927e-05, "loss": 2.468, "step": 10850 }, { "epoch": 0.49, "learning_rate": 1.0318181828643244e-05, "loss": 2.4903, "step": 10860 }, { "epoch": 0.49, "learning_rate": 1.0303886028980019e-05, "loss": 2.4821, "step": 10870 }, { "epoch": 0.49, "learning_rate": 1.028958960766386e-05, "loss": 2.4746, "step": 10880 }, { "epoch": 0.49, "learning_rate": 1.0275292593940634e-05, "loss": 2.4571, "step": 10890 }, { "epoch": 0.49, "learning_rate": 1.026099501705743e-05, "loss": 2.4814, "step": 10900 }, { "epoch": 0.49, "learning_rate": 1.0246696906262484e-05, "loss": 2.4966, "step": 10910 }, { "epoch": 0.49, "learning_rate": 1.0232398290805123e-05, "loss": 2.5007, "step": 10920 }, { "epoch": 0.5, "learning_rate": 1.0218099199935715e-05, "loss": 2.4785, "step": 10930 }, { "epoch": 0.5, "learning_rate": 1.0203799662905592e-05, "loss": 2.4657, "step": 10940 }, { "epoch": 0.5, "learning_rate": 1.0189499708966997e-05, "loss": 2.4478, "step": 10950 }, { "epoch": 0.5, "learning_rate": 1.0175199367373032e-05, "loss": 2.456, "step": 10960 }, { "epoch": 0.5, "learning_rate": 1.0160898667377594e-05, "loss": 2.4703, "step": 10970 }, { "epoch": 0.5, "learning_rate": 1.0146597638235303e-05, "loss": 2.4726, "step": 10980 }, { "epoch": 0.5, "learning_rate": 1.0132296309201463e-05, "loss": 2.4659, "step": 10990 }, { "epoch": 0.5, "learning_rate": 1.0117994709531987e-05, "loss": 2.528, "step": 11000 }, { "epoch": 0.5, "learning_rate": 1.0103692868483338e-05, "loss": 2.5046, "step": 11010 }, { "epoch": 0.5, "learning_rate": 1.0089390815312476e-05, "loss": 2.4595, "step": 11020 }, { "epoch": 0.5, "learning_rate": 1.00750885792768e-05, "loss": 2.4854, "step": 11030 }, { "epoch": 0.5, "learning_rate": 1.0060786189634075e-05, "loss": 2.4941, "step": 11040 }, { "epoch": 0.5, "learning_rate": 1.0046483675642386e-05, "loss": 2.4553, "step": 11050 }, { "epoch": 0.5, "learning_rate": 1.0032181066560066e-05, "loss": 2.4882, "step": 11060 }, { "epoch": 0.5, "learning_rate": 1.001787839164565e-05, "loss": 2.4697, "step": 11070 }, { "epoch": 0.5, "learning_rate": 1.0003575680157802e-05, "loss": 2.4672, "step": 11080 }, { "epoch": 0.5, "learning_rate": 9.989272961355266e-06, "loss": 2.4758, "step": 11090 }, { "epoch": 0.5, "learning_rate": 9.974970264496798e-06, "loss": 2.4676, "step": 11100 }, { "epoch": 0.5, "learning_rate": 9.960667618841101e-06, "loss": 2.4688, "step": 11110 }, { "epoch": 0.5, "learning_rate": 9.946365053646796e-06, "loss": 2.4346, "step": 11120 }, { "epoch": 0.5, "learning_rate": 9.932062598172308e-06, "loss": 2.5007, "step": 11130 }, { "epoch": 0.5, "learning_rate": 9.917760281675867e-06, "loss": 2.4631, "step": 11140 }, { "epoch": 0.51, "learning_rate": 9.903458133415398e-06, "loss": 2.4774, "step": 11150 }, { "epoch": 0.51, "learning_rate": 9.889156182648498e-06, "loss": 2.4702, "step": 11160 }, { "epoch": 0.51, "learning_rate": 9.87485445863234e-06, "loss": 2.493, "step": 11170 }, { "epoch": 0.51, "learning_rate": 9.860552990623657e-06, "loss": 2.4554, "step": 11180 }, { "epoch": 0.51, "learning_rate": 9.846251807878638e-06, "loss": 2.4582, "step": 11190 }, { "epoch": 0.51, "learning_rate": 9.831950939652903e-06, "loss": 2.4747, "step": 11200 }, { "epoch": 0.51, "learning_rate": 9.817650415201415e-06, "loss": 2.4689, "step": 11210 }, { "epoch": 0.51, "learning_rate": 9.80335026377845e-06, "loss": 2.4748, "step": 11220 }, { "epoch": 0.51, "learning_rate": 9.789050514637507e-06, "loss": 2.4528, "step": 11230 }, { "epoch": 0.51, "learning_rate": 9.774751197031264e-06, "loss": 2.464, "step": 11240 }, { "epoch": 0.51, "learning_rate": 9.760452340211525e-06, "loss": 2.4807, "step": 11250 }, { "epoch": 0.51, "learning_rate": 9.746153973429138e-06, "loss": 2.4558, "step": 11260 }, { "epoch": 0.51, "learning_rate": 9.731856125933962e-06, "loss": 2.455, "step": 11270 }, { "epoch": 0.51, "learning_rate": 9.717558826974789e-06, "loss": 2.4982, "step": 11280 }, { "epoch": 0.51, "learning_rate": 9.703262105799286e-06, "loss": 2.4671, "step": 11290 }, { "epoch": 0.51, "learning_rate": 9.688965991653934e-06, "loss": 2.466, "step": 11300 }, { "epoch": 0.51, "learning_rate": 9.674670513783989e-06, "loss": 2.4909, "step": 11310 }, { "epoch": 0.51, "learning_rate": 9.660375701433384e-06, "loss": 2.455, "step": 11320 }, { "epoch": 0.51, "learning_rate": 9.64608158384471e-06, "loss": 2.4938, "step": 11330 }, { "epoch": 0.51, "learning_rate": 9.631788190259118e-06, "loss": 2.485, "step": 11340 }, { "epoch": 0.51, "learning_rate": 9.617495549916297e-06, "loss": 2.4972, "step": 11350 }, { "epoch": 0.51, "learning_rate": 9.603203692054376e-06, "loss": 2.4789, "step": 11360 }, { "epoch": 0.52, "learning_rate": 9.588912645909905e-06, "loss": 2.4842, "step": 11370 }, { "epoch": 0.52, "learning_rate": 9.574622440717748e-06, "loss": 2.4695, "step": 11380 }, { "epoch": 0.52, "learning_rate": 9.560333105711073e-06, "loss": 2.4725, "step": 11390 }, { "epoch": 0.52, "learning_rate": 9.54604467012125e-06, "loss": 2.4497, "step": 11400 }, { "epoch": 0.52, "learning_rate": 9.531757163177819e-06, "loss": 2.4851, "step": 11410 }, { "epoch": 0.52, "learning_rate": 9.517470614108422e-06, "loss": 2.4984, "step": 11420 }, { "epoch": 0.52, "learning_rate": 9.503185052138725e-06, "loss": 2.4936, "step": 11430 }, { "epoch": 0.52, "learning_rate": 9.4889005064924e-06, "loss": 2.473, "step": 11440 }, { "epoch": 0.52, "learning_rate": 9.474617006391015e-06, "loss": 2.4945, "step": 11450 }, { "epoch": 0.52, "learning_rate": 9.460334581054019e-06, "loss": 2.4932, "step": 11460 }, { "epoch": 0.52, "learning_rate": 9.446053259698649e-06, "loss": 2.4301, "step": 11470 }, { "epoch": 0.52, "learning_rate": 9.431773071539894e-06, "loss": 2.4555, "step": 11480 }, { "epoch": 0.52, "learning_rate": 9.41749404579041e-06, "loss": 2.4897, "step": 11490 }, { "epoch": 0.52, "learning_rate": 9.403216211660498e-06, "loss": 2.4859, "step": 11500 }, { "epoch": 0.52, "learning_rate": 9.388939598357995e-06, "loss": 2.4884, "step": 11510 }, { "epoch": 0.52, "learning_rate": 9.374664235088262e-06, "loss": 2.4721, "step": 11520 }, { "epoch": 0.52, "learning_rate": 9.36039015105409e-06, "loss": 2.4489, "step": 11530 }, { "epoch": 0.52, "learning_rate": 9.346117375455658e-06, "loss": 2.4761, "step": 11540 }, { "epoch": 0.52, "learning_rate": 9.33184593749047e-06, "loss": 2.4561, "step": 11550 }, { "epoch": 0.52, "learning_rate": 9.317575866353293e-06, "loss": 2.4629, "step": 11560 }, { "epoch": 0.52, "learning_rate": 9.303307191236087e-06, "loss": 2.4816, "step": 11570 }, { "epoch": 0.52, "learning_rate": 9.289039941327979e-06, "loss": 2.5062, "step": 11580 }, { "epoch": 0.53, "learning_rate": 9.274774145815155e-06, "loss": 2.4586, "step": 11590 }, { "epoch": 0.53, "learning_rate": 9.260509833880848e-06, "loss": 2.4809, "step": 11600 }, { "epoch": 0.53, "learning_rate": 9.246247034705235e-06, "loss": 2.4802, "step": 11610 }, { "epoch": 0.53, "learning_rate": 9.23198577746542e-06, "loss": 2.4742, "step": 11620 }, { "epoch": 0.53, "learning_rate": 9.217726091335333e-06, "loss": 2.4741, "step": 11630 }, { "epoch": 0.53, "learning_rate": 9.203468005485702e-06, "loss": 2.4874, "step": 11640 }, { "epoch": 0.53, "learning_rate": 9.189211549083985e-06, "loss": 2.4788, "step": 11650 }, { "epoch": 0.53, "learning_rate": 9.174956751294288e-06, "loss": 2.4903, "step": 11660 }, { "epoch": 0.53, "learning_rate": 9.160703641277347e-06, "loss": 2.455, "step": 11670 }, { "epoch": 0.53, "learning_rate": 9.146452248190425e-06, "loss": 2.4857, "step": 11680 }, { "epoch": 0.53, "learning_rate": 9.132202601187292e-06, "loss": 2.4862, "step": 11690 }, { "epoch": 0.53, "learning_rate": 9.117954729418125e-06, "loss": 2.4894, "step": 11700 }, { "epoch": 0.53, "learning_rate": 9.103708662029492e-06, "loss": 2.457, "step": 11710 }, { "epoch": 0.53, "learning_rate": 9.089464428164247e-06, "loss": 2.4887, "step": 11720 }, { "epoch": 0.53, "learning_rate": 9.075222056961515e-06, "loss": 2.486, "step": 11730 }, { "epoch": 0.53, "learning_rate": 9.06098157755659e-06, "loss": 2.4873, "step": 11740 }, { "epoch": 0.53, "learning_rate": 9.046743019080915e-06, "loss": 2.4694, "step": 11750 }, { "epoch": 0.53, "learning_rate": 9.032506410661991e-06, "loss": 2.4844, "step": 11760 }, { "epoch": 0.53, "learning_rate": 9.018271781423334e-06, "loss": 2.4693, "step": 11770 }, { "epoch": 0.53, "learning_rate": 9.004039160484409e-06, "loss": 2.4867, "step": 11780 }, { "epoch": 0.53, "learning_rate": 8.98980857696058e-06, "loss": 2.464, "step": 11790 }, { "epoch": 0.53, "learning_rate": 8.975580059963027e-06, "loss": 2.4595, "step": 11800 }, { "epoch": 0.54, "learning_rate": 8.961353638598724e-06, "loss": 2.4671, "step": 11810 }, { "epoch": 0.54, "learning_rate": 8.947129341970342e-06, "loss": 2.4666, "step": 11820 }, { "epoch": 0.54, "learning_rate": 8.932907199176206e-06, "loss": 2.4859, "step": 11830 }, { "epoch": 0.54, "learning_rate": 8.91868723931025e-06, "loss": 2.4529, "step": 11840 }, { "epoch": 0.54, "learning_rate": 8.904469491461917e-06, "loss": 2.462, "step": 11850 }, { "epoch": 0.54, "learning_rate": 8.890253984716152e-06, "loss": 2.4492, "step": 11860 }, { "epoch": 0.54, "learning_rate": 8.876040748153301e-06, "loss": 2.4945, "step": 11870 }, { "epoch": 0.54, "learning_rate": 8.861829810849067e-06, "loss": 2.4905, "step": 11880 }, { "epoch": 0.54, "learning_rate": 8.847621201874447e-06, "loss": 2.49, "step": 11890 }, { "epoch": 0.54, "learning_rate": 8.833414950295685e-06, "loss": 2.4897, "step": 11900 }, { "epoch": 0.54, "learning_rate": 8.81921108517419e-06, "loss": 2.4593, "step": 11910 }, { "epoch": 0.54, "learning_rate": 8.805009635566502e-06, "loss": 2.4447, "step": 11920 }, { "epoch": 0.54, "learning_rate": 8.790810630524205e-06, "loss": 2.4568, "step": 11930 }, { "epoch": 0.54, "learning_rate": 8.776614099093897e-06, "loss": 2.483, "step": 11940 }, { "epoch": 0.54, "learning_rate": 8.762420070317102e-06, "loss": 2.4614, "step": 11950 }, { "epoch": 0.54, "learning_rate": 8.748228573230237e-06, "loss": 2.4272, "step": 11960 }, { "epoch": 0.54, "learning_rate": 8.73403963686453e-06, "loss": 2.4839, "step": 11970 }, { "epoch": 0.54, "learning_rate": 8.719853290245977e-06, "loss": 2.4769, "step": 11980 }, { "epoch": 0.54, "learning_rate": 8.705669562395276e-06, "loss": 2.4887, "step": 11990 }, { "epoch": 0.54, "learning_rate": 8.691488482327762e-06, "loss": 2.4559, "step": 12000 }, { "epoch": 0.54, "eval_accuracy": 0.5414407901370168, "eval_loss": 2.2359259128570557, "eval_runtime": 9.8425, "eval_samples_per_second": 138.887, "eval_steps_per_second": 1.118, "step": 12000 }, { "epoch": 0.54, "learning_rate": 8.67731007905336e-06, "loss": 2.4765, "step": 12010 }, { "epoch": 0.54, "learning_rate": 8.66313438157651e-06, "loss": 2.4874, "step": 12020 }, { "epoch": 0.55, "learning_rate": 8.648961418896134e-06, "loss": 2.5224, "step": 12030 }, { "epoch": 0.55, "learning_rate": 8.634791220005539e-06, "loss": 2.4972, "step": 12040 }, { "epoch": 0.55, "learning_rate": 8.620623813892397e-06, "loss": 2.4834, "step": 12050 }, { "epoch": 0.55, "learning_rate": 8.606459229538645e-06, "loss": 2.4802, "step": 12060 }, { "epoch": 0.55, "learning_rate": 8.592297495920475e-06, "loss": 2.4945, "step": 12070 }, { "epoch": 0.55, "learning_rate": 8.57813864200822e-06, "loss": 2.4804, "step": 12080 }, { "epoch": 0.55, "learning_rate": 8.563982696766343e-06, "loss": 2.4684, "step": 12090 }, { "epoch": 0.55, "learning_rate": 8.549829689153346e-06, "loss": 2.4806, "step": 12100 }, { "epoch": 0.55, "learning_rate": 8.535679648121725e-06, "loss": 2.4539, "step": 12110 }, { "epoch": 0.55, "learning_rate": 8.521532602617901e-06, "loss": 2.4654, "step": 12120 }, { "epoch": 0.55, "learning_rate": 8.507388581582183e-06, "loss": 2.4793, "step": 12130 }, { "epoch": 0.55, "learning_rate": 8.493247613948673e-06, "loss": 2.4696, "step": 12140 }, { "epoch": 0.55, "learning_rate": 8.47910972864524e-06, "loss": 2.4889, "step": 12150 }, { "epoch": 0.55, "learning_rate": 8.464974954593443e-06, "loss": 2.4896, "step": 12160 }, { "epoch": 0.55, "learning_rate": 8.45084332070848e-06, "loss": 2.478, "step": 12170 }, { "epoch": 0.55, "learning_rate": 8.436714855899114e-06, "loss": 2.454, "step": 12180 }, { "epoch": 0.55, "learning_rate": 8.422589589067645e-06, "loss": 2.4744, "step": 12190 }, { "epoch": 0.55, "learning_rate": 8.40846754910981e-06, "loss": 2.4624, "step": 12200 }, { "epoch": 0.55, "learning_rate": 8.394348764914758e-06, "loss": 2.4692, "step": 12210 }, { "epoch": 0.55, "learning_rate": 8.380233265364973e-06, "loss": 2.4601, "step": 12220 }, { "epoch": 0.55, "learning_rate": 8.36612107933622e-06, "loss": 2.476, "step": 12230 }, { "epoch": 0.55, "learning_rate": 8.352012235697492e-06, "loss": 2.4928, "step": 12240 }, { "epoch": 0.56, "learning_rate": 8.33790676331093e-06, "loss": 2.5132, "step": 12250 }, { "epoch": 0.56, "learning_rate": 8.323804691031797e-06, "loss": 2.4658, "step": 12260 }, { "epoch": 0.56, "learning_rate": 8.309706047708378e-06, "loss": 2.469, "step": 12270 }, { "epoch": 0.56, "learning_rate": 8.29561086218197e-06, "loss": 2.4982, "step": 12280 }, { "epoch": 0.56, "learning_rate": 8.281519163286772e-06, "loss": 2.4755, "step": 12290 }, { "epoch": 0.56, "learning_rate": 8.26743097984987e-06, "loss": 2.4668, "step": 12300 }, { "epoch": 0.56, "learning_rate": 8.25334634069114e-06, "loss": 2.4874, "step": 12310 }, { "epoch": 0.56, "learning_rate": 8.239265274623227e-06, "loss": 2.4933, "step": 12320 }, { "epoch": 0.56, "learning_rate": 8.22518781045145e-06, "loss": 2.4909, "step": 12330 }, { "epoch": 0.56, "learning_rate": 8.211113976973768e-06, "loss": 2.4741, "step": 12340 }, { "epoch": 0.56, "learning_rate": 8.197043802980712e-06, "loss": 2.4819, "step": 12350 }, { "epoch": 0.56, "learning_rate": 8.18297731725533e-06, "loss": 2.4838, "step": 12360 }, { "epoch": 0.56, "learning_rate": 8.168914548573113e-06, "loss": 2.4663, "step": 12370 }, { "epoch": 0.56, "learning_rate": 8.154855525701965e-06, "loss": 2.4944, "step": 12380 }, { "epoch": 0.56, "learning_rate": 8.140800277402116e-06, "loss": 2.4747, "step": 12390 }, { "epoch": 0.56, "learning_rate": 8.12674883242607e-06, "loss": 2.4589, "step": 12400 }, { "epoch": 0.56, "learning_rate": 8.112701219518567e-06, "loss": 2.4612, "step": 12410 }, { "epoch": 0.56, "learning_rate": 8.09865746741649e-06, "loss": 2.4959, "step": 12420 }, { "epoch": 0.56, "learning_rate": 8.084617604848838e-06, "loss": 2.4851, "step": 12430 }, { "epoch": 0.56, "learning_rate": 8.07058166053665e-06, "loss": 2.4846, "step": 12440 }, { "epoch": 0.56, "learning_rate": 8.056549663192942e-06, "loss": 2.4935, "step": 12450 }, { "epoch": 0.56, "learning_rate": 8.04252164152266e-06, "loss": 2.4802, "step": 12460 }, { "epoch": 0.57, "learning_rate": 8.028497624222626e-06, "loss": 2.4761, "step": 12470 }, { "epoch": 0.57, "learning_rate": 8.014477639981455e-06, "loss": 2.4767, "step": 12480 }, { "epoch": 0.57, "learning_rate": 8.000461717479525e-06, "loss": 2.4622, "step": 12490 }, { "epoch": 0.57, "learning_rate": 7.986449885388889e-06, "loss": 2.4456, "step": 12500 }, { "epoch": 0.57, "learning_rate": 7.972442172373256e-06, "loss": 2.4738, "step": 12510 }, { "epoch": 0.57, "learning_rate": 7.958438607087884e-06, "loss": 2.484, "step": 12520 }, { "epoch": 0.57, "learning_rate": 7.944439218179565e-06, "loss": 2.4505, "step": 12530 }, { "epoch": 0.57, "learning_rate": 7.930444034286532e-06, "loss": 2.4804, "step": 12540 }, { "epoch": 0.57, "learning_rate": 7.916453084038432e-06, "loss": 2.4537, "step": 12550 }, { "epoch": 0.57, "learning_rate": 7.902466396056234e-06, "loss": 2.4594, "step": 12560 }, { "epoch": 0.57, "learning_rate": 7.888483998952204e-06, "loss": 2.4833, "step": 12570 }, { "epoch": 0.57, "learning_rate": 7.874505921329823e-06, "loss": 2.45, "step": 12580 }, { "epoch": 0.57, "learning_rate": 7.860532191783729e-06, "loss": 2.4693, "step": 12590 }, { "epoch": 0.57, "learning_rate": 7.846562838899679e-06, "loss": 2.4696, "step": 12600 }, { "epoch": 0.57, "learning_rate": 7.832597891254463e-06, "loss": 2.4672, "step": 12610 }, { "epoch": 0.57, "learning_rate": 7.818637377415872e-06, "loss": 2.4988, "step": 12620 }, { "epoch": 0.57, "learning_rate": 7.804681325942616e-06, "loss": 2.488, "step": 12630 }, { "epoch": 0.57, "learning_rate": 7.790729765384286e-06, "loss": 2.4743, "step": 12640 }, { "epoch": 0.57, "learning_rate": 7.776782724281275e-06, "loss": 2.4833, "step": 12650 }, { "epoch": 0.57, "learning_rate": 7.762840231164745e-06, "loss": 2.4573, "step": 12660 }, { "epoch": 0.57, "learning_rate": 7.748902314556539e-06, "loss": 2.468, "step": 12670 }, { "epoch": 0.57, "learning_rate": 7.73496900296915e-06, "loss": 2.4519, "step": 12680 }, { "epoch": 0.58, "learning_rate": 7.721040324905642e-06, "loss": 2.4748, "step": 12690 }, { "epoch": 0.58, "learning_rate": 7.707116308859609e-06, "loss": 2.4708, "step": 12700 }, { "epoch": 0.58, "learning_rate": 7.693196983315094e-06, "loss": 2.4529, "step": 12710 }, { "epoch": 0.58, "learning_rate": 7.679282376746567e-06, "loss": 2.4409, "step": 12720 }, { "epoch": 0.58, "learning_rate": 7.665372517618818e-06, "loss": 2.4931, "step": 12730 }, { "epoch": 0.58, "learning_rate": 7.651467434386951e-06, "loss": 2.4605, "step": 12740 }, { "epoch": 0.58, "learning_rate": 7.637567155496277e-06, "loss": 2.4621, "step": 12750 }, { "epoch": 0.58, "learning_rate": 7.623671709382297e-06, "loss": 2.481, "step": 12760 }, { "epoch": 0.58, "learning_rate": 7.609781124470613e-06, "loss": 2.4536, "step": 12770 }, { "epoch": 0.58, "learning_rate": 7.595895429176891e-06, "loss": 2.473, "step": 12780 }, { "epoch": 0.58, "learning_rate": 7.582014651906788e-06, "loss": 2.4774, "step": 12790 }, { "epoch": 0.58, "learning_rate": 7.5681388210559056e-06, "loss": 2.4565, "step": 12800 }, { "epoch": 0.58, "learning_rate": 7.5542679650097286e-06, "loss": 2.4743, "step": 12810 }, { "epoch": 0.58, "learning_rate": 7.540402112143551e-06, "loss": 2.4498, "step": 12820 }, { "epoch": 0.58, "learning_rate": 7.526541290822453e-06, "loss": 2.4811, "step": 12830 }, { "epoch": 0.58, "learning_rate": 7.5126855294012e-06, "loss": 2.4997, "step": 12840 }, { "epoch": 0.58, "learning_rate": 7.498834856224226e-06, "loss": 2.4737, "step": 12850 }, { "epoch": 0.58, "learning_rate": 7.48498929962554e-06, "loss": 2.4443, "step": 12860 }, { "epoch": 0.58, "learning_rate": 7.471148887928698e-06, "loss": 2.4566, "step": 12870 }, { "epoch": 0.58, "learning_rate": 7.457313649446719e-06, "loss": 2.4765, "step": 12880 }, { "epoch": 0.58, "learning_rate": 7.44348361248205e-06, "loss": 2.4601, "step": 12890 }, { "epoch": 0.58, "learning_rate": 7.429658805326485e-06, "loss": 2.469, "step": 12900 }, { "epoch": 0.59, "learning_rate": 7.415839256261133e-06, "loss": 2.4644, "step": 12910 }, { "epoch": 0.59, "learning_rate": 7.402024993556338e-06, "loss": 2.4645, "step": 12920 }, { "epoch": 0.59, "learning_rate": 7.3882160454716325e-06, "loss": 2.4595, "step": 12930 }, { "epoch": 0.59, "learning_rate": 7.374412440255673e-06, "loss": 2.4701, "step": 12940 }, { "epoch": 0.59, "learning_rate": 7.360614206146198e-06, "loss": 2.4651, "step": 12950 }, { "epoch": 0.59, "learning_rate": 7.346821371369945e-06, "loss": 2.4749, "step": 12960 }, { "epoch": 0.59, "learning_rate": 7.333033964142609e-06, "loss": 2.5187, "step": 12970 }, { "epoch": 0.59, "learning_rate": 7.31925201266879e-06, "loss": 2.471, "step": 12980 }, { "epoch": 0.59, "learning_rate": 7.305475545141916e-06, "loss": 2.4607, "step": 12990 }, { "epoch": 0.59, "learning_rate": 7.291704589744209e-06, "loss": 2.4629, "step": 13000 }, { "epoch": 0.59, "learning_rate": 7.2779391746466e-06, "loss": 2.4691, "step": 13010 }, { "epoch": 0.59, "learning_rate": 7.264179328008702e-06, "loss": 2.4992, "step": 13020 }, { "epoch": 0.59, "learning_rate": 7.250425077978726e-06, "loss": 2.4771, "step": 13030 }, { "epoch": 0.59, "learning_rate": 7.236676452693438e-06, "loss": 2.4698, "step": 13040 }, { "epoch": 0.59, "learning_rate": 7.2229334802780956e-06, "loss": 2.4695, "step": 13050 }, { "epoch": 0.59, "learning_rate": 7.209196188846399e-06, "loss": 2.4729, "step": 13060 }, { "epoch": 0.59, "learning_rate": 7.195464606500415e-06, "loss": 2.4422, "step": 13070 }, { "epoch": 0.59, "learning_rate": 7.181738761330546e-06, "loss": 2.459, "step": 13080 }, { "epoch": 0.59, "learning_rate": 7.168018681415441e-06, "loss": 2.4536, "step": 13090 }, { "epoch": 0.59, "learning_rate": 7.154304394821973e-06, "loss": 2.4786, "step": 13100 }, { "epoch": 0.59, "learning_rate": 7.14059592960515e-06, "loss": 2.4563, "step": 13110 }, { "epoch": 0.59, "learning_rate": 7.12689331380808e-06, "loss": 2.4681, "step": 13120 }, { "epoch": 0.6, "learning_rate": 7.113196575461896e-06, "loss": 2.4908, "step": 13130 }, { "epoch": 0.6, "learning_rate": 7.099505742585719e-06, "loss": 2.4829, "step": 13140 }, { "epoch": 0.6, "learning_rate": 7.085820843186584e-06, "loss": 2.4811, "step": 13150 }, { "epoch": 0.6, "learning_rate": 7.07214190525938e-06, "loss": 2.4819, "step": 13160 }, { "epoch": 0.6, "learning_rate": 7.0584689567868156e-06, "loss": 2.4804, "step": 13170 }, { "epoch": 0.6, "learning_rate": 7.0448020257393335e-06, "loss": 2.4538, "step": 13180 }, { "epoch": 0.6, "learning_rate": 7.031141140075081e-06, "loss": 2.4562, "step": 13190 }, { "epoch": 0.6, "learning_rate": 7.01748632773982e-06, "loss": 2.4731, "step": 13200 }, { "epoch": 0.6, "learning_rate": 7.003837616666906e-06, "loss": 2.4594, "step": 13210 }, { "epoch": 0.6, "learning_rate": 6.990195034777202e-06, "loss": 2.4747, "step": 13220 }, { "epoch": 0.6, "learning_rate": 6.976558609979041e-06, "loss": 2.4613, "step": 13230 }, { "epoch": 0.6, "learning_rate": 6.962928370168148e-06, "loss": 2.4849, "step": 13240 }, { "epoch": 0.6, "learning_rate": 6.949304343227611e-06, "loss": 2.4788, "step": 13250 }, { "epoch": 0.6, "learning_rate": 6.9356865570277985e-06, "loss": 2.4661, "step": 13260 }, { "epoch": 0.6, "learning_rate": 6.922075039426315e-06, "loss": 2.4745, "step": 13270 }, { "epoch": 0.6, "learning_rate": 6.908469818267938e-06, "loss": 2.4808, "step": 13280 }, { "epoch": 0.6, "learning_rate": 6.8948709213845755e-06, "loss": 2.4845, "step": 13290 }, { "epoch": 0.6, "learning_rate": 6.881278376595182e-06, "loss": 2.5033, "step": 13300 }, { "epoch": 0.6, "learning_rate": 6.867692211705735e-06, "loss": 2.4862, "step": 13310 }, { "epoch": 0.6, "learning_rate": 6.854112454509143e-06, "loss": 2.4932, "step": 13320 }, { "epoch": 0.6, "learning_rate": 6.840539132785224e-06, "loss": 2.4466, "step": 13330 }, { "epoch": 0.6, "learning_rate": 6.826972274300617e-06, "loss": 2.4699, "step": 13340 }, { "epoch": 0.61, "learning_rate": 6.8134119068087425e-06, "loss": 2.484, "step": 13350 }, { "epoch": 0.61, "learning_rate": 6.79985805804975e-06, "loss": 2.4772, "step": 13360 }, { "epoch": 0.61, "learning_rate": 6.786310755750446e-06, "loss": 2.4542, "step": 13370 }, { "epoch": 0.61, "learning_rate": 6.772770027624249e-06, "loss": 2.4706, "step": 13380 }, { "epoch": 0.61, "learning_rate": 6.759235901371124e-06, "loss": 2.4257, "step": 13390 }, { "epoch": 0.61, "learning_rate": 6.745708404677541e-06, "loss": 2.4684, "step": 13400 }, { "epoch": 0.61, "learning_rate": 6.7321875652163926e-06, "loss": 2.4852, "step": 13410 }, { "epoch": 0.61, "learning_rate": 6.718673410646971e-06, "loss": 2.466, "step": 13420 }, { "epoch": 0.61, "learning_rate": 6.705165968614873e-06, "loss": 2.4785, "step": 13430 }, { "epoch": 0.61, "learning_rate": 6.6916652667519855e-06, "loss": 2.4711, "step": 13440 }, { "epoch": 0.61, "learning_rate": 6.678171332676387e-06, "loss": 2.4601, "step": 13450 }, { "epoch": 0.61, "learning_rate": 6.664684193992326e-06, "loss": 2.5161, "step": 13460 }, { "epoch": 0.61, "learning_rate": 6.651203878290139e-06, "loss": 2.4911, "step": 13470 }, { "epoch": 0.61, "learning_rate": 6.6377304131462175e-06, "loss": 2.4763, "step": 13480 }, { "epoch": 0.61, "learning_rate": 6.624263826122923e-06, "loss": 2.4514, "step": 13490 }, { "epoch": 0.61, "learning_rate": 6.610804144768562e-06, "loss": 2.47, "step": 13500 }, { "epoch": 0.61, "learning_rate": 6.597351396617304e-06, "loss": 2.4567, "step": 13510 }, { "epoch": 0.61, "learning_rate": 6.583905609189141e-06, "loss": 2.4374, "step": 13520 }, { "epoch": 0.61, "learning_rate": 6.570466809989824e-06, "loss": 2.4939, "step": 13530 }, { "epoch": 0.61, "learning_rate": 6.5570350265108005e-06, "loss": 2.464, "step": 13540 }, { "epoch": 0.61, "learning_rate": 6.543610286229183e-06, "loss": 2.4703, "step": 13550 }, { "epoch": 0.61, "learning_rate": 6.530192616607658e-06, "loss": 2.4654, "step": 13560 }, { "epoch": 0.62, "learning_rate": 6.516782045094463e-06, "loss": 2.456, "step": 13570 }, { "epoch": 0.62, "learning_rate": 6.5033785991233e-06, "loss": 2.4836, "step": 13580 }, { "epoch": 0.62, "learning_rate": 6.489982306113309e-06, "loss": 2.4499, "step": 13590 }, { "epoch": 0.62, "learning_rate": 6.47659319346899e-06, "loss": 2.4631, "step": 13600 }, { "epoch": 0.62, "learning_rate": 6.463211288580152e-06, "loss": 2.4528, "step": 13610 }, { "epoch": 0.62, "learning_rate": 6.4498366188218635e-06, "loss": 2.472, "step": 13620 }, { "epoch": 0.62, "learning_rate": 6.4364692115543944e-06, "loss": 2.465, "step": 13630 }, { "epoch": 0.62, "learning_rate": 6.423109094123148e-06, "loss": 2.4635, "step": 13640 }, { "epoch": 0.62, "learning_rate": 6.409756293858628e-06, "loss": 2.4981, "step": 13650 }, { "epoch": 0.62, "learning_rate": 6.396410838076357e-06, "loss": 2.4745, "step": 13660 }, { "epoch": 0.62, "learning_rate": 6.3830727540768445e-06, "loss": 2.4769, "step": 13670 }, { "epoch": 0.62, "learning_rate": 6.3697420691455095e-06, "loss": 2.4806, "step": 13680 }, { "epoch": 0.62, "learning_rate": 6.356418810552645e-06, "loss": 2.4537, "step": 13690 }, { "epoch": 0.62, "learning_rate": 6.3431030055533405e-06, "loss": 2.4497, "step": 13700 }, { "epoch": 0.62, "learning_rate": 6.329794681387451e-06, "loss": 2.4794, "step": 13710 }, { "epoch": 0.62, "learning_rate": 6.316493865279516e-06, "loss": 2.483, "step": 13720 }, { "epoch": 0.62, "learning_rate": 6.303200584438722e-06, "loss": 2.4723, "step": 13730 }, { "epoch": 0.62, "learning_rate": 6.289914866058846e-06, "loss": 2.4575, "step": 13740 }, { "epoch": 0.62, "learning_rate": 6.276636737318178e-06, "loss": 2.4786, "step": 13750 }, { "epoch": 0.62, "learning_rate": 6.263366225379504e-06, "loss": 2.4861, "step": 13760 }, { "epoch": 0.62, "learning_rate": 6.250103357390007e-06, "loss": 2.4715, "step": 13770 }, { "epoch": 0.62, "learning_rate": 6.2368481604812515e-06, "loss": 2.4597, "step": 13780 }, { "epoch": 0.62, "learning_rate": 6.223600661769094e-06, "loss": 2.4618, "step": 13790 }, { "epoch": 0.63, "learning_rate": 6.210360888353657e-06, "loss": 2.4722, "step": 13800 }, { "epoch": 0.63, "learning_rate": 6.197128867319244e-06, "loss": 2.4829, "step": 13810 }, { "epoch": 0.63, "learning_rate": 6.183904625734319e-06, "loss": 2.4703, "step": 13820 }, { "epoch": 0.63, "learning_rate": 6.17068819065141e-06, "loss": 2.4897, "step": 13830 }, { "epoch": 0.63, "learning_rate": 6.157479589107092e-06, "loss": 2.4974, "step": 13840 }, { "epoch": 0.63, "learning_rate": 6.144278848121912e-06, "loss": 2.4877, "step": 13850 }, { "epoch": 0.63, "learning_rate": 6.131085994700328e-06, "loss": 2.4798, "step": 13860 }, { "epoch": 0.63, "learning_rate": 6.11790105583067e-06, "loss": 2.4666, "step": 13870 }, { "epoch": 0.63, "learning_rate": 6.104724058485086e-06, "loss": 2.4675, "step": 13880 }, { "epoch": 0.63, "learning_rate": 6.091555029619453e-06, "loss": 2.4579, "step": 13890 }, { "epoch": 0.63, "learning_rate": 6.078393996173375e-06, "loss": 2.4621, "step": 13900 }, { "epoch": 0.63, "learning_rate": 6.0652409850700775e-06, "loss": 2.4689, "step": 13910 }, { "epoch": 0.63, "learning_rate": 6.052096023216395e-06, "loss": 2.4858, "step": 13920 }, { "epoch": 0.63, "learning_rate": 6.038959137502678e-06, "loss": 2.4593, "step": 13930 }, { "epoch": 0.63, "learning_rate": 6.025830354802767e-06, "loss": 2.4603, "step": 13940 }, { "epoch": 0.63, "learning_rate": 6.01270970197392e-06, "loss": 2.4746, "step": 13950 }, { "epoch": 0.63, "learning_rate": 5.999597205856766e-06, "loss": 2.459, "step": 13960 }, { "epoch": 0.63, "learning_rate": 5.9864928932752575e-06, "loss": 2.4795, "step": 13970 }, { "epoch": 0.63, "learning_rate": 5.973396791036585e-06, "loss": 2.4831, "step": 13980 }, { "epoch": 0.63, "learning_rate": 5.960308925931167e-06, "loss": 2.4637, "step": 13990 }, { "epoch": 0.63, "learning_rate": 5.9472293247325485e-06, "loss": 2.5186, "step": 14000 }, { "epoch": 0.63, "eval_accuracy": 0.5415494826024122, "eval_loss": 2.234656810760498, "eval_runtime": 9.8993, "eval_samples_per_second": 138.09, "eval_steps_per_second": 1.111, "step": 14000 }, { "epoch": 0.63, "learning_rate": 5.934158014197391e-06, "loss": 2.4902, "step": 14010 }, { "epoch": 0.64, "learning_rate": 5.9210950210653795e-06, "loss": 2.4741, "step": 14020 }, { "epoch": 0.64, "learning_rate": 5.908040372059195e-06, "loss": 2.4564, "step": 14030 }, { "epoch": 0.64, "learning_rate": 5.894994093884439e-06, "loss": 2.4434, "step": 14040 }, { "epoch": 0.64, "learning_rate": 5.8819562132296e-06, "loss": 2.4497, "step": 14050 }, { "epoch": 0.64, "learning_rate": 5.868926756765979e-06, "loss": 2.5057, "step": 14060 }, { "epoch": 0.64, "learning_rate": 5.85590575114765e-06, "loss": 2.4718, "step": 14070 }, { "epoch": 0.64, "learning_rate": 5.842893223011394e-06, "loss": 2.4651, "step": 14080 }, { "epoch": 0.64, "learning_rate": 5.829889198976655e-06, "loss": 2.4535, "step": 14090 }, { "epoch": 0.64, "learning_rate": 5.816893705645475e-06, "loss": 2.4601, "step": 14100 }, { "epoch": 0.64, "learning_rate": 5.803906769602451e-06, "loss": 2.4967, "step": 14110 }, { "epoch": 0.64, "learning_rate": 5.7909284174146675e-06, "loss": 2.4893, "step": 14120 }, { "epoch": 0.64, "learning_rate": 5.777958675631657e-06, "loss": 2.4555, "step": 14130 }, { "epoch": 0.64, "learning_rate": 5.764997570785329e-06, "loss": 2.47, "step": 14140 }, { "epoch": 0.64, "learning_rate": 5.752045129389932e-06, "loss": 2.4638, "step": 14150 }, { "epoch": 0.64, "learning_rate": 5.739101377941991e-06, "loss": 2.4654, "step": 14160 }, { "epoch": 0.64, "learning_rate": 5.72616634292025e-06, "loss": 2.4784, "step": 14170 }, { "epoch": 0.64, "learning_rate": 5.713240050785625e-06, "loss": 2.4875, "step": 14180 }, { "epoch": 0.64, "learning_rate": 5.700322527981146e-06, "loss": 2.4501, "step": 14190 }, { "epoch": 0.64, "learning_rate": 5.687413800931905e-06, "loss": 2.4733, "step": 14200 }, { "epoch": 0.64, "learning_rate": 5.674513896044998e-06, "loss": 2.4574, "step": 14210 }, { "epoch": 0.64, "learning_rate": 5.661622839709478e-06, "loss": 2.4738, "step": 14220 }, { "epoch": 0.64, "learning_rate": 5.648740658296293e-06, "loss": 2.4581, "step": 14230 }, { "epoch": 0.65, "learning_rate": 5.635867378158237e-06, "loss": 2.4966, "step": 14240 }, { "epoch": 0.65, "learning_rate": 5.623003025629894e-06, "loss": 2.4455, "step": 14250 }, { "epoch": 0.65, "learning_rate": 5.610147627027589e-06, "loss": 2.4658, "step": 14260 }, { "epoch": 0.65, "learning_rate": 5.597301208649318e-06, "loss": 2.4797, "step": 14270 }, { "epoch": 0.65, "learning_rate": 5.584463796774732e-06, "loss": 2.4531, "step": 14280 }, { "epoch": 0.65, "learning_rate": 5.571635417665022e-06, "loss": 2.4901, "step": 14290 }, { "epoch": 0.65, "learning_rate": 5.558816097562937e-06, "loss": 2.4704, "step": 14300 }, { "epoch": 0.65, "learning_rate": 5.546005862692665e-06, "loss": 2.4684, "step": 14310 }, { "epoch": 0.65, "learning_rate": 5.533204739259823e-06, "loss": 2.4764, "step": 14320 }, { "epoch": 0.65, "learning_rate": 5.5204127534513895e-06, "loss": 2.4556, "step": 14330 }, { "epoch": 0.65, "learning_rate": 5.507629931435645e-06, "loss": 2.4633, "step": 14340 }, { "epoch": 0.65, "learning_rate": 5.4948562993621266e-06, "loss": 2.4738, "step": 14350 }, { "epoch": 0.65, "learning_rate": 5.482091883361571e-06, "loss": 2.4838, "step": 14360 }, { "epoch": 0.65, "learning_rate": 5.469336709545864e-06, "loss": 2.4699, "step": 14370 }, { "epoch": 0.65, "learning_rate": 5.456590804007976e-06, "loss": 2.4543, "step": 14380 }, { "epoch": 0.65, "learning_rate": 5.443854192821938e-06, "loss": 2.434, "step": 14390 }, { "epoch": 0.65, "learning_rate": 5.4311269020427395e-06, "loss": 2.4642, "step": 14400 }, { "epoch": 0.65, "learning_rate": 5.418408957706331e-06, "loss": 2.4769, "step": 14410 }, { "epoch": 0.65, "learning_rate": 5.405700385829518e-06, "loss": 2.4864, "step": 14420 }, { "epoch": 0.65, "learning_rate": 5.3930012124099585e-06, "loss": 2.4804, "step": 14430 }, { "epoch": 0.65, "learning_rate": 5.380311463426056e-06, "loss": 2.4896, "step": 14440 }, { "epoch": 0.65, "learning_rate": 5.367631164836965e-06, "loss": 2.4535, "step": 14450 }, { "epoch": 0.66, "learning_rate": 5.354960342582478e-06, "loss": 2.4588, "step": 14460 }, { "epoch": 0.66, "learning_rate": 5.34229902258303e-06, "loss": 2.475, "step": 14470 }, { "epoch": 0.66, "learning_rate": 5.3296472307395896e-06, "loss": 2.4587, "step": 14480 }, { "epoch": 0.66, "learning_rate": 5.3170049929336586e-06, "loss": 2.4349, "step": 14490 }, { "epoch": 0.66, "learning_rate": 5.304372335027184e-06, "loss": 2.4759, "step": 14500 }, { "epoch": 0.66, "learning_rate": 5.291749282862505e-06, "loss": 2.4796, "step": 14510 }, { "epoch": 0.66, "learning_rate": 5.2791358622623336e-06, "loss": 2.4795, "step": 14520 }, { "epoch": 0.66, "learning_rate": 5.266532099029651e-06, "loss": 2.4616, "step": 14530 }, { "epoch": 0.66, "learning_rate": 5.253938018947713e-06, "loss": 2.4727, "step": 14540 }, { "epoch": 0.66, "learning_rate": 5.2413536477799365e-06, "loss": 2.483, "step": 14550 }, { "epoch": 0.66, "learning_rate": 5.228779011269906e-06, "loss": 2.4836, "step": 14560 }, { "epoch": 0.66, "learning_rate": 5.216214135141262e-06, "loss": 2.4657, "step": 14570 }, { "epoch": 0.66, "learning_rate": 5.203659045097706e-06, "loss": 2.4742, "step": 14580 }, { "epoch": 0.66, "learning_rate": 5.191113766822905e-06, "loss": 2.4922, "step": 14590 }, { "epoch": 0.66, "learning_rate": 5.178578325980457e-06, "loss": 2.4533, "step": 14600 }, { "epoch": 0.66, "learning_rate": 5.1660527482138365e-06, "loss": 2.4693, "step": 14610 }, { "epoch": 0.66, "learning_rate": 5.153537059146343e-06, "loss": 2.492, "step": 14620 }, { "epoch": 0.66, "learning_rate": 5.141031284381042e-06, "loss": 2.4795, "step": 14630 }, { "epoch": 0.66, "learning_rate": 5.128535449500725e-06, "loss": 2.4912, "step": 14640 }, { "epoch": 0.66, "learning_rate": 5.11604958006784e-06, "loss": 2.4671, "step": 14650 }, { "epoch": 0.66, "learning_rate": 5.103573701624458e-06, "loss": 2.4684, "step": 14660 }, { "epoch": 0.66, "learning_rate": 5.091107839692208e-06, "loss": 2.4717, "step": 14670 }, { "epoch": 0.67, "learning_rate": 5.078652019772226e-06, "loss": 2.4664, "step": 14680 }, { "epoch": 0.67, "learning_rate": 5.066206267345111e-06, "loss": 2.4638, "step": 14690 }, { "epoch": 0.67, "learning_rate": 5.053770607870863e-06, "loss": 2.4742, "step": 14700 }, { "epoch": 0.67, "learning_rate": 5.041345066788832e-06, "loss": 2.4715, "step": 14710 }, { "epoch": 0.67, "learning_rate": 5.028929669517678e-06, "loss": 2.4495, "step": 14720 }, { "epoch": 0.67, "learning_rate": 5.016524441455304e-06, "loss": 2.4717, "step": 14730 }, { "epoch": 0.67, "learning_rate": 5.004129407978808e-06, "loss": 2.4551, "step": 14740 }, { "epoch": 0.67, "learning_rate": 4.991744594444439e-06, "loss": 2.4574, "step": 14750 }, { "epoch": 0.67, "learning_rate": 4.979370026187535e-06, "loss": 2.4722, "step": 14760 }, { "epoch": 0.67, "learning_rate": 4.967005728522479e-06, "loss": 2.4583, "step": 14770 }, { "epoch": 0.67, "learning_rate": 4.95465172674264e-06, "loss": 2.4863, "step": 14780 }, { "epoch": 0.67, "learning_rate": 4.942308046120328e-06, "loss": 2.497, "step": 14790 }, { "epoch": 0.67, "learning_rate": 4.929974711906736e-06, "loss": 2.4474, "step": 14800 }, { "epoch": 0.67, "learning_rate": 4.9176517493318935e-06, "loss": 2.4806, "step": 14810 }, { "epoch": 0.67, "learning_rate": 4.905339183604614e-06, "loss": 2.4521, "step": 14820 }, { "epoch": 0.67, "learning_rate": 4.893037039912441e-06, "loss": 2.4732, "step": 14830 }, { "epoch": 0.67, "learning_rate": 4.880745343421597e-06, "loss": 2.4738, "step": 14840 }, { "epoch": 0.67, "learning_rate": 4.868464119276936e-06, "loss": 2.4462, "step": 14850 }, { "epoch": 0.67, "learning_rate": 4.856193392601881e-06, "loss": 2.481, "step": 14860 }, { "epoch": 0.67, "learning_rate": 4.8439331884984e-06, "loss": 2.4777, "step": 14870 }, { "epoch": 0.67, "learning_rate": 4.831683532046909e-06, "loss": 2.4696, "step": 14880 }, { "epoch": 0.67, "learning_rate": 4.8194444483062645e-06, "loss": 2.4729, "step": 14890 }, { "epoch": 0.68, "learning_rate": 4.807215962313688e-06, "loss": 2.4823, "step": 14900 }, { "epoch": 0.68, "learning_rate": 4.794998099084726e-06, "loss": 2.4952, "step": 14910 }, { "epoch": 0.68, "learning_rate": 4.782790883613189e-06, "loss": 2.453, "step": 14920 }, { "epoch": 0.68, "learning_rate": 4.770594340871107e-06, "loss": 2.4983, "step": 14930 }, { "epoch": 0.68, "learning_rate": 4.758408495808683e-06, "loss": 2.4949, "step": 14940 }, { "epoch": 0.68, "learning_rate": 4.7462333733542235e-06, "loss": 2.4553, "step": 14950 }, { "epoch": 0.68, "learning_rate": 4.734068998414118e-06, "loss": 2.4503, "step": 14960 }, { "epoch": 0.68, "learning_rate": 4.7219153958727485e-06, "loss": 2.4675, "step": 14970 }, { "epoch": 0.68, "learning_rate": 4.709772590592483e-06, "loss": 2.4864, "step": 14980 }, { "epoch": 0.68, "learning_rate": 4.697640607413576e-06, "loss": 2.4899, "step": 14990 }, { "epoch": 0.68, "learning_rate": 4.685519471154173e-06, "loss": 2.4719, "step": 15000 }, { "epoch": 0.68, "learning_rate": 4.673409206610199e-06, "loss": 2.4789, "step": 15010 }, { "epoch": 0.68, "learning_rate": 4.66130983855537e-06, "loss": 2.492, "step": 15020 }, { "epoch": 0.68, "learning_rate": 4.6492213917410815e-06, "loss": 2.4628, "step": 15030 }, { "epoch": 0.68, "learning_rate": 4.6371438908964165e-06, "loss": 2.4774, "step": 15040 }, { "epoch": 0.68, "learning_rate": 4.6250773607280375e-06, "loss": 2.4933, "step": 15050 }, { "epoch": 0.68, "learning_rate": 4.613021825920189e-06, "loss": 2.4983, "step": 15060 }, { "epoch": 0.68, "learning_rate": 4.600977311134612e-06, "loss": 2.4851, "step": 15070 }, { "epoch": 0.68, "learning_rate": 4.588943841010496e-06, "loss": 2.4699, "step": 15080 }, { "epoch": 0.68, "learning_rate": 4.576921440164456e-06, "loss": 2.4793, "step": 15090 }, { "epoch": 0.68, "learning_rate": 4.56491013319044e-06, "loss": 2.4855, "step": 15100 }, { "epoch": 0.68, "learning_rate": 4.552909944659726e-06, "loss": 2.4873, "step": 15110 }, { "epoch": 0.69, "learning_rate": 4.540920899120821e-06, "loss": 2.4707, "step": 15120 }, { "epoch": 0.69, "learning_rate": 4.528943021099466e-06, "loss": 2.4558, "step": 15130 }, { "epoch": 0.69, "learning_rate": 4.516976335098526e-06, "loss": 2.4868, "step": 15140 }, { "epoch": 0.69, "learning_rate": 4.505020865598001e-06, "loss": 2.4713, "step": 15150 }, { "epoch": 0.69, "learning_rate": 4.493076637054916e-06, "loss": 2.4521, "step": 15160 }, { "epoch": 0.69, "learning_rate": 4.481143673903327e-06, "loss": 2.4822, "step": 15170 }, { "epoch": 0.69, "learning_rate": 4.4692220005542294e-06, "loss": 2.4968, "step": 15180 }, { "epoch": 0.69, "learning_rate": 4.457311641395526e-06, "loss": 2.4652, "step": 15190 }, { "epoch": 0.69, "learning_rate": 4.445412620791978e-06, "loss": 2.45, "step": 15200 }, { "epoch": 0.69, "learning_rate": 4.433524963085147e-06, "loss": 2.4471, "step": 15210 }, { "epoch": 0.69, "learning_rate": 4.421648692593352e-06, "loss": 2.4728, "step": 15220 }, { "epoch": 0.69, "learning_rate": 4.409783833611617e-06, "loss": 2.4617, "step": 15230 }, { "epoch": 0.69, "learning_rate": 4.397930410411623e-06, "loss": 2.4828, "step": 15240 }, { "epoch": 0.69, "learning_rate": 4.3860884472416555e-06, "loss": 2.4692, "step": 15250 }, { "epoch": 0.69, "learning_rate": 4.374257968326561e-06, "loss": 2.4842, "step": 15260 }, { "epoch": 0.69, "learning_rate": 4.362438997867676e-06, "loss": 2.4953, "step": 15270 }, { "epoch": 0.69, "learning_rate": 4.350631560042821e-06, "loss": 2.4676, "step": 15280 }, { "epoch": 0.69, "learning_rate": 4.338835679006206e-06, "loss": 2.4856, "step": 15290 }, { "epoch": 0.69, "learning_rate": 4.327051378888405e-06, "loss": 2.4651, "step": 15300 }, { "epoch": 0.69, "learning_rate": 4.315278683796299e-06, "loss": 2.4919, "step": 15310 }, { "epoch": 0.69, "learning_rate": 4.303517617813031e-06, "loss": 2.4883, "step": 15320 }, { "epoch": 0.69, "learning_rate": 4.2917682049979535e-06, "loss": 2.4892, "step": 15330 }, { "epoch": 0.7, "learning_rate": 4.2800304693865824e-06, "loss": 2.4552, "step": 15340 }, { "epoch": 0.7, "learning_rate": 4.268304434990542e-06, "loss": 2.4524, "step": 15350 }, { "epoch": 0.7, "learning_rate": 4.2565901257975236e-06, "loss": 2.468, "step": 15360 }, { "epoch": 0.7, "learning_rate": 4.24488756577123e-06, "loss": 2.4851, "step": 15370 }, { "epoch": 0.7, "learning_rate": 4.2331967788513295e-06, "loss": 2.477, "step": 15380 }, { "epoch": 0.7, "learning_rate": 4.2215177889534075e-06, "loss": 2.4709, "step": 15390 }, { "epoch": 0.7, "learning_rate": 4.209850619968916e-06, "loss": 2.4456, "step": 15400 }, { "epoch": 0.7, "learning_rate": 4.198195295765123e-06, "loss": 2.4752, "step": 15410 }, { "epoch": 0.7, "learning_rate": 4.186551840185069e-06, "loss": 2.4709, "step": 15420 }, { "epoch": 0.7, "learning_rate": 4.174920277047512e-06, "loss": 2.4657, "step": 15430 }, { "epoch": 0.7, "learning_rate": 4.16330063014689e-06, "loss": 2.473, "step": 15440 }, { "epoch": 0.7, "learning_rate": 4.151692923253252e-06, "loss": 2.4582, "step": 15450 }, { "epoch": 0.7, "learning_rate": 4.140097180112229e-06, "loss": 2.4701, "step": 15460 }, { "epoch": 0.7, "learning_rate": 4.128513424444978e-06, "loss": 2.461, "step": 15470 }, { "epoch": 0.7, "learning_rate": 4.1169416799481285e-06, "loss": 2.4839, "step": 15480 }, { "epoch": 0.7, "learning_rate": 4.105381970293747e-06, "loss": 2.4982, "step": 15490 }, { "epoch": 0.7, "learning_rate": 4.093834319129274e-06, "loss": 2.4623, "step": 15500 }, { "epoch": 0.7, "learning_rate": 4.082298750077485e-06, "loss": 2.4681, "step": 15510 }, { "epoch": 0.7, "learning_rate": 4.070775286736438e-06, "loss": 2.4705, "step": 15520 }, { "epoch": 0.7, "learning_rate": 4.05926395267943e-06, "loss": 2.475, "step": 15530 }, { "epoch": 0.7, "learning_rate": 4.047764771454935e-06, "loss": 2.4782, "step": 15540 }, { "epoch": 0.7, "learning_rate": 4.036277766586588e-06, "loss": 2.4762, "step": 15550 }, { "epoch": 0.71, "learning_rate": 4.0248029615730864e-06, "loss": 2.4666, "step": 15560 }, { "epoch": 0.71, "learning_rate": 4.013340379888199e-06, "loss": 2.4874, "step": 15570 }, { "epoch": 0.71, "learning_rate": 4.001890044980662e-06, "loss": 2.4638, "step": 15580 }, { "epoch": 0.71, "learning_rate": 3.990451980274187e-06, "loss": 2.486, "step": 15590 }, { "epoch": 0.71, "learning_rate": 3.979026209167355e-06, "loss": 2.4787, "step": 15600 }, { "epoch": 0.71, "learning_rate": 3.9676127550336295e-06, "loss": 2.4796, "step": 15610 }, { "epoch": 0.71, "learning_rate": 3.9562116412212455e-06, "loss": 2.4748, "step": 15620 }, { "epoch": 0.71, "learning_rate": 3.944822891053227e-06, "loss": 2.49, "step": 15630 }, { "epoch": 0.71, "learning_rate": 3.933446527827275e-06, "loss": 2.471, "step": 15640 }, { "epoch": 0.71, "learning_rate": 3.922082574815765e-06, "loss": 2.4741, "step": 15650 }, { "epoch": 0.71, "learning_rate": 3.910731055265697e-06, "loss": 2.4513, "step": 15660 }, { "epoch": 0.71, "learning_rate": 3.899391992398607e-06, "loss": 2.4789, "step": 15670 }, { "epoch": 0.71, "learning_rate": 3.888065409410581e-06, "loss": 2.4613, "step": 15680 }, { "epoch": 0.71, "learning_rate": 3.876751329472146e-06, "loss": 2.478, "step": 15690 }, { "epoch": 0.71, "learning_rate": 3.865449775728279e-06, "loss": 2.4754, "step": 15700 }, { "epoch": 0.71, "learning_rate": 3.854160771298307e-06, "loss": 2.4561, "step": 15710 }, { "epoch": 0.71, "learning_rate": 3.842884339275907e-06, "loss": 2.4551, "step": 15720 }, { "epoch": 0.71, "learning_rate": 3.831620502729014e-06, "loss": 2.48, "step": 15730 }, { "epoch": 0.71, "learning_rate": 3.820369284699823e-06, "loss": 2.487, "step": 15740 }, { "epoch": 0.71, "learning_rate": 3.8091307082046958e-06, "loss": 2.4888, "step": 15750 }, { "epoch": 0.71, "learning_rate": 3.7979047962341408e-06, "loss": 2.4476, "step": 15760 }, { "epoch": 0.71, "learning_rate": 3.7866915717527587e-06, "loss": 2.4976, "step": 15770 }, { "epoch": 0.72, "learning_rate": 3.7754910576991954e-06, "loss": 2.4457, "step": 15780 }, { "epoch": 0.72, "learning_rate": 3.7643032769860943e-06, "loss": 2.4718, "step": 15790 }, { "epoch": 0.72, "learning_rate": 3.753128252500051e-06, "loss": 2.4773, "step": 15800 }, { "epoch": 0.72, "learning_rate": 3.7419660071015673e-06, "loss": 2.4611, "step": 15810 }, { "epoch": 0.72, "learning_rate": 3.7308165636250014e-06, "loss": 2.4912, "step": 15820 }, { "epoch": 0.72, "learning_rate": 3.7196799448785246e-06, "loss": 2.4799, "step": 15830 }, { "epoch": 0.72, "learning_rate": 3.70855617364407e-06, "loss": 2.4868, "step": 15840 }, { "epoch": 0.72, "learning_rate": 3.6974452726772925e-06, "loss": 2.4607, "step": 15850 }, { "epoch": 0.72, "learning_rate": 3.686347264707516e-06, "loss": 2.4613, "step": 15860 }, { "epoch": 0.72, "learning_rate": 3.67526217243769e-06, "loss": 2.461, "step": 15870 }, { "epoch": 0.72, "learning_rate": 3.6641900185443445e-06, "loss": 2.486, "step": 15880 }, { "epoch": 0.72, "learning_rate": 3.6531308256775375e-06, "loss": 2.4613, "step": 15890 }, { "epoch": 0.72, "learning_rate": 3.642084616460818e-06, "loss": 2.4438, "step": 15900 }, { "epoch": 0.72, "learning_rate": 3.631051413491171e-06, "loss": 2.4679, "step": 15910 }, { "epoch": 0.72, "learning_rate": 3.620031239338977e-06, "loss": 2.4533, "step": 15920 }, { "epoch": 0.72, "learning_rate": 3.6090241165479625e-06, "loss": 2.4828, "step": 15930 }, { "epoch": 0.72, "learning_rate": 3.5980300676351544e-06, "loss": 2.4579, "step": 15940 }, { "epoch": 0.72, "learning_rate": 3.587049115090836e-06, "loss": 2.4736, "step": 15950 }, { "epoch": 0.72, "learning_rate": 3.5760812813785006e-06, "loss": 2.4806, "step": 15960 }, { "epoch": 0.72, "learning_rate": 3.565126588934803e-06, "loss": 2.4707, "step": 15970 }, { "epoch": 0.72, "learning_rate": 3.554185060169516e-06, "loss": 2.4677, "step": 15980 }, { "epoch": 0.72, "learning_rate": 3.5432567174654806e-06, "loss": 2.4921, "step": 15990 }, { "epoch": 0.73, "learning_rate": 3.53234158317857e-06, "loss": 2.4755, "step": 16000 }, { "epoch": 0.73, "eval_accuracy": 0.5415931026049723, "eval_loss": 2.23403000831604, "eval_runtime": 9.8805, "eval_samples_per_second": 138.354, "eval_steps_per_second": 1.113, "step": 16000 }, { "epoch": 0.73, "learning_rate": 3.5214396796376315e-06, "loss": 2.4601, "step": 16010 }, { "epoch": 0.73, "learning_rate": 3.5105510291444446e-06, "loss": 2.4698, "step": 16020 }, { "epoch": 0.73, "learning_rate": 3.499675653973691e-06, "loss": 2.474, "step": 16030 }, { "epoch": 0.73, "learning_rate": 3.488813576372876e-06, "loss": 2.4743, "step": 16040 }, { "epoch": 0.73, "learning_rate": 3.477964818562316e-06, "loss": 2.4599, "step": 16050 }, { "epoch": 0.73, "learning_rate": 3.467129402735074e-06, "loss": 2.472, "step": 16060 }, { "epoch": 0.73, "learning_rate": 3.456307351056921e-06, "loss": 2.4705, "step": 16070 }, { "epoch": 0.73, "learning_rate": 3.4454986856662898e-06, "loss": 2.4786, "step": 16080 }, { "epoch": 0.73, "learning_rate": 3.4347034286742277e-06, "loss": 2.4673, "step": 16090 }, { "epoch": 0.73, "learning_rate": 3.4239216021643538e-06, "loss": 2.4549, "step": 16100 }, { "epoch": 0.73, "learning_rate": 3.4131532281928105e-06, "loss": 2.4825, "step": 16110 }, { "epoch": 0.73, "learning_rate": 3.40239832878823e-06, "loss": 2.4727, "step": 16120 }, { "epoch": 0.73, "learning_rate": 3.3916569259516597e-06, "loss": 2.4705, "step": 16130 }, { "epoch": 0.73, "learning_rate": 3.3809290416565656e-06, "loss": 2.4846, "step": 16140 }, { "epoch": 0.73, "learning_rate": 3.370214697848728e-06, "loss": 2.4725, "step": 16150 }, { "epoch": 0.73, "learning_rate": 3.3595139164462575e-06, "loss": 2.4895, "step": 16160 }, { "epoch": 0.73, "learning_rate": 3.348826719339493e-06, "loss": 2.4706, "step": 16170 }, { "epoch": 0.73, "learning_rate": 3.338153128391012e-06, "loss": 2.485, "step": 16180 }, { "epoch": 0.73, "learning_rate": 3.327493165435528e-06, "loss": 2.4644, "step": 16190 }, { "epoch": 0.73, "learning_rate": 3.316846852279907e-06, "loss": 2.4653, "step": 16200 }, { "epoch": 0.73, "learning_rate": 3.3062142107030626e-06, "loss": 2.4959, "step": 16210 }, { "epoch": 0.74, "learning_rate": 3.2955952624559653e-06, "loss": 2.4861, "step": 16220 }, { "epoch": 0.74, "learning_rate": 3.284990029261562e-06, "loss": 2.4701, "step": 16230 }, { "epoch": 0.74, "learning_rate": 3.2743985328147353e-06, "loss": 2.4711, "step": 16240 }, { "epoch": 0.74, "learning_rate": 3.2638207947822885e-06, "loss": 2.4783, "step": 16250 }, { "epoch": 0.74, "learning_rate": 3.253256836802853e-06, "loss": 2.4762, "step": 16260 }, { "epoch": 0.74, "learning_rate": 3.242706680486898e-06, "loss": 2.4604, "step": 16270 }, { "epoch": 0.74, "learning_rate": 3.2321703474166312e-06, "loss": 2.4659, "step": 16280 }, { "epoch": 0.74, "learning_rate": 3.2216478591460097e-06, "loss": 2.4723, "step": 16290 }, { "epoch": 0.74, "learning_rate": 3.2111392372006434e-06, "loss": 2.4733, "step": 16300 }, { "epoch": 0.74, "learning_rate": 3.2006445030778e-06, "loss": 2.4744, "step": 16310 }, { "epoch": 0.74, "learning_rate": 3.190163678246314e-06, "loss": 2.4804, "step": 16320 }, { "epoch": 0.74, "learning_rate": 3.179696784146584e-06, "loss": 2.4829, "step": 16330 }, { "epoch": 0.74, "learning_rate": 3.1692438421905037e-06, "loss": 2.4611, "step": 16340 }, { "epoch": 0.74, "learning_rate": 3.158804873761424e-06, "loss": 2.4778, "step": 16350 }, { "epoch": 0.74, "learning_rate": 3.1483799002141136e-06, "loss": 2.4949, "step": 16360 }, { "epoch": 0.74, "learning_rate": 3.1379689428747085e-06, "loss": 2.4535, "step": 16370 }, { "epoch": 0.74, "learning_rate": 3.127572023040676e-06, "loss": 2.4756, "step": 16380 }, { "epoch": 0.74, "learning_rate": 3.1171891619807637e-06, "loss": 2.4812, "step": 16390 }, { "epoch": 0.74, "learning_rate": 3.106820380934963e-06, "loss": 2.4504, "step": 16400 }, { "epoch": 0.74, "learning_rate": 3.096465701114458e-06, "loss": 2.4971, "step": 16410 }, { "epoch": 0.74, "learning_rate": 3.086125143701594e-06, "loss": 2.4903, "step": 16420 }, { "epoch": 0.74, "learning_rate": 3.0757987298498106e-06, "loss": 2.4753, "step": 16430 }, { "epoch": 0.75, "learning_rate": 3.0654864806836325e-06, "loss": 2.4555, "step": 16440 }, { "epoch": 0.75, "learning_rate": 3.0551884172985992e-06, "loss": 2.4669, "step": 16450 }, { "epoch": 0.75, "learning_rate": 3.0449045607612305e-06, "loss": 2.486, "step": 16460 }, { "epoch": 0.75, "learning_rate": 3.034634932108985e-06, "loss": 2.4763, "step": 16470 }, { "epoch": 0.75, "learning_rate": 3.024379552350214e-06, "loss": 2.4611, "step": 16480 }, { "epoch": 0.75, "learning_rate": 3.0141384424641228e-06, "loss": 2.4502, "step": 16490 }, { "epoch": 0.75, "learning_rate": 3.0039116234007225e-06, "loss": 2.4697, "step": 16500 }, { "epoch": 0.75, "learning_rate": 2.9936991160807925e-06, "loss": 2.4622, "step": 16510 }, { "epoch": 0.75, "learning_rate": 2.9835009413958314e-06, "loss": 2.472, "step": 16520 }, { "epoch": 0.75, "learning_rate": 2.9733171202080226e-06, "loss": 2.4652, "step": 16530 }, { "epoch": 0.75, "learning_rate": 2.963147673350181e-06, "loss": 2.4879, "step": 16540 }, { "epoch": 0.75, "learning_rate": 2.952992621625722e-06, "loss": 2.4528, "step": 16550 }, { "epoch": 0.75, "learning_rate": 2.942851985808609e-06, "loss": 2.4515, "step": 16560 }, { "epoch": 0.75, "learning_rate": 2.9327257866433178e-06, "loss": 2.4785, "step": 16570 }, { "epoch": 0.75, "learning_rate": 2.9226140448447895e-06, "loss": 2.4765, "step": 16580 }, { "epoch": 0.75, "learning_rate": 2.912516781098389e-06, "loss": 2.4486, "step": 16590 }, { "epoch": 0.75, "learning_rate": 2.902434016059874e-06, "loss": 2.4713, "step": 16600 }, { "epoch": 0.75, "learning_rate": 2.8923657703553254e-06, "loss": 2.4536, "step": 16610 }, { "epoch": 0.75, "learning_rate": 2.8823120645811333e-06, "loss": 2.4894, "step": 16620 }, { "epoch": 0.75, "learning_rate": 2.8722729193039435e-06, "loss": 2.4423, "step": 16630 }, { "epoch": 0.75, "learning_rate": 2.8622483550606116e-06, "loss": 2.4793, "step": 16640 }, { "epoch": 0.75, "learning_rate": 2.8522383923581674e-06, "loss": 2.4523, "step": 16650 }, { "epoch": 0.76, "learning_rate": 2.8422430516737733e-06, "loss": 2.47, "step": 16660 }, { "epoch": 0.76, "learning_rate": 2.8322623534546734e-06, "loss": 2.4773, "step": 16670 }, { "epoch": 0.76, "learning_rate": 2.822296318118164e-06, "loss": 2.4868, "step": 16680 }, { "epoch": 0.76, "learning_rate": 2.8123449660515424e-06, "loss": 2.4751, "step": 16690 }, { "epoch": 0.76, "learning_rate": 2.8024083176120665e-06, "loss": 2.4977, "step": 16700 }, { "epoch": 0.76, "learning_rate": 2.7924863931269288e-06, "loss": 2.45, "step": 16710 }, { "epoch": 0.76, "learning_rate": 2.7825792128931783e-06, "loss": 2.4585, "step": 16720 }, { "epoch": 0.76, "learning_rate": 2.772686797177726e-06, "loss": 2.4805, "step": 16730 }, { "epoch": 0.76, "learning_rate": 2.7628091662172572e-06, "loss": 2.4609, "step": 16740 }, { "epoch": 0.76, "learning_rate": 2.7529463402182354e-06, "loss": 2.4742, "step": 16750 }, { "epoch": 0.76, "learning_rate": 2.7430983393568144e-06, "loss": 2.4473, "step": 16760 }, { "epoch": 0.76, "learning_rate": 2.7332651837788427e-06, "loss": 2.4716, "step": 16770 }, { "epoch": 0.76, "learning_rate": 2.7234468935997783e-06, "loss": 2.4421, "step": 16780 }, { "epoch": 0.76, "learning_rate": 2.7136434889046913e-06, "loss": 2.4503, "step": 16790 }, { "epoch": 0.76, "learning_rate": 2.7038549897481835e-06, "loss": 2.4651, "step": 16800 }, { "epoch": 0.76, "learning_rate": 2.6940814161543706e-06, "loss": 2.457, "step": 16810 }, { "epoch": 0.76, "learning_rate": 2.684322788116843e-06, "loss": 2.4748, "step": 16820 }, { "epoch": 0.76, "learning_rate": 2.674579125598602e-06, "loss": 2.4787, "step": 16830 }, { "epoch": 0.76, "learning_rate": 2.6648504485320527e-06, "loss": 2.4896, "step": 16840 }, { "epoch": 0.76, "learning_rate": 2.655136776818923e-06, "loss": 2.4548, "step": 16850 }, { "epoch": 0.76, "learning_rate": 2.645438130330269e-06, "loss": 2.4798, "step": 16860 }, { "epoch": 0.76, "learning_rate": 2.635754528906386e-06, "loss": 2.4825, "step": 16870 }, { "epoch": 0.77, "learning_rate": 2.626085992356815e-06, "loss": 2.4685, "step": 16880 }, { "epoch": 0.77, "learning_rate": 2.616432540460255e-06, "loss": 2.4365, "step": 16890 }, { "epoch": 0.77, "learning_rate": 2.6067941929645692e-06, "loss": 2.4293, "step": 16900 }, { "epoch": 0.77, "learning_rate": 2.5971709695867096e-06, "loss": 2.4729, "step": 16910 }, { "epoch": 0.77, "learning_rate": 2.5875628900126914e-06, "loss": 2.47, "step": 16920 }, { "epoch": 0.77, "learning_rate": 2.5779699738975495e-06, "loss": 2.4668, "step": 16930 }, { "epoch": 0.77, "learning_rate": 2.5683922408653027e-06, "loss": 2.4559, "step": 16940 }, { "epoch": 0.77, "learning_rate": 2.5588297105089067e-06, "loss": 2.4613, "step": 16950 }, { "epoch": 0.77, "learning_rate": 2.5492824023902185e-06, "loss": 2.498, "step": 16960 }, { "epoch": 0.77, "learning_rate": 2.5397503360399555e-06, "loss": 2.4832, "step": 16970 }, { "epoch": 0.77, "learning_rate": 2.5302335309576553e-06, "loss": 2.4554, "step": 16980 }, { "epoch": 0.77, "learning_rate": 2.5207320066116392e-06, "loss": 2.4544, "step": 16990 }, { "epoch": 0.77, "learning_rate": 2.511245782438957e-06, "loss": 2.4382, "step": 17000 }, { "epoch": 0.77, "learning_rate": 2.5017748778453755e-06, "loss": 2.4771, "step": 17010 }, { "epoch": 0.77, "learning_rate": 2.4923193122053136e-06, "loss": 2.473, "step": 17020 }, { "epoch": 0.77, "learning_rate": 2.482879104861813e-06, "loss": 2.4811, "step": 17030 }, { "epoch": 0.77, "learning_rate": 2.4734542751264967e-06, "loss": 2.4655, "step": 17040 }, { "epoch": 0.77, "learning_rate": 2.4640448422795305e-06, "loss": 2.4916, "step": 17050 }, { "epoch": 0.77, "learning_rate": 2.4546508255695846e-06, "loss": 2.477, "step": 17060 }, { "epoch": 0.77, "learning_rate": 2.44527224421379e-06, "loss": 2.4873, "step": 17070 }, { "epoch": 0.77, "learning_rate": 2.4359091173977033e-06, "loss": 2.4731, "step": 17080 }, { "epoch": 0.77, "learning_rate": 2.426561464275269e-06, "loss": 2.491, "step": 17090 }, { "epoch": 0.77, "learning_rate": 2.417229303968771e-06, "loss": 2.4704, "step": 17100 }, { "epoch": 0.78, "learning_rate": 2.407912655568806e-06, "loss": 2.4689, "step": 17110 }, { "epoch": 0.78, "learning_rate": 2.3986115381342347e-06, "loss": 2.4711, "step": 17120 }, { "epoch": 0.78, "learning_rate": 2.389325970692148e-06, "loss": 2.4638, "step": 17130 }, { "epoch": 0.78, "learning_rate": 2.380055972237826e-06, "loss": 2.438, "step": 17140 }, { "epoch": 0.78, "learning_rate": 2.3708015617347e-06, "loss": 2.475, "step": 17150 }, { "epoch": 0.78, "learning_rate": 2.3615627581143132e-06, "loss": 2.4604, "step": 17160 }, { "epoch": 0.78, "learning_rate": 2.3523395802762817e-06, "loss": 2.4708, "step": 17170 }, { "epoch": 0.78, "learning_rate": 2.3431320470882557e-06, "loss": 2.4791, "step": 17180 }, { "epoch": 0.78, "learning_rate": 2.333940177385885e-06, "loss": 2.4956, "step": 17190 }, { "epoch": 0.78, "learning_rate": 2.3247639899727736e-06, "loss": 2.4711, "step": 17200 }, { "epoch": 0.78, "learning_rate": 2.315603503620444e-06, "loss": 2.4586, "step": 17210 }, { "epoch": 0.78, "learning_rate": 2.306458737068302e-06, "loss": 2.4329, "step": 17220 }, { "epoch": 0.78, "learning_rate": 2.2973297090235946e-06, "loss": 2.457, "step": 17230 }, { "epoch": 0.78, "learning_rate": 2.2882164381613713e-06, "loss": 2.462, "step": 17240 }, { "epoch": 0.78, "learning_rate": 2.279118943124452e-06, "loss": 2.4808, "step": 17250 }, { "epoch": 0.78, "learning_rate": 2.2700372425233783e-06, "loss": 2.4858, "step": 17260 }, { "epoch": 0.78, "learning_rate": 2.260971354936383e-06, "loss": 2.4725, "step": 17270 }, { "epoch": 0.78, "learning_rate": 2.251921298909362e-06, "loss": 2.4557, "step": 17280 }, { "epoch": 0.78, "learning_rate": 2.2428870929558012e-06, "loss": 2.4702, "step": 17290 }, { "epoch": 0.78, "learning_rate": 2.2338687555567884e-06, "loss": 2.4876, "step": 17300 }, { "epoch": 0.78, "learning_rate": 2.2248663051609267e-06, "loss": 2.4866, "step": 17310 }, { "epoch": 0.78, "learning_rate": 2.2158797601843407e-06, "loss": 2.4528, "step": 17320 }, { "epoch": 0.79, "learning_rate": 2.2069091390105958e-06, "loss": 2.4904, "step": 17330 }, { "epoch": 0.79, "learning_rate": 2.197954459990704e-06, "loss": 2.4492, "step": 17340 }, { "epoch": 0.79, "learning_rate": 2.1890157414430448e-06, "loss": 2.4789, "step": 17350 }, { "epoch": 0.79, "learning_rate": 2.1800930016533673e-06, "loss": 2.4603, "step": 17360 }, { "epoch": 0.79, "learning_rate": 2.1711862588747157e-06, "loss": 2.4672, "step": 17370 }, { "epoch": 0.79, "learning_rate": 2.162295531327415e-06, "loss": 2.4695, "step": 17380 }, { "epoch": 0.79, "learning_rate": 2.153420837199042e-06, "loss": 2.4806, "step": 17390 }, { "epoch": 0.79, "learning_rate": 2.1445621946443486e-06, "loss": 2.4707, "step": 17400 }, { "epoch": 0.79, "learning_rate": 2.135719621785275e-06, "loss": 2.455, "step": 17410 }, { "epoch": 0.79, "learning_rate": 2.126893136710868e-06, "loss": 2.4655, "step": 17420 }, { "epoch": 0.79, "learning_rate": 2.1180827574772846e-06, "loss": 2.5176, "step": 17430 }, { "epoch": 0.79, "learning_rate": 2.109288502107709e-06, "loss": 2.4658, "step": 17440 }, { "epoch": 0.79, "learning_rate": 2.1005103885923683e-06, "loss": 2.4609, "step": 17450 }, { "epoch": 0.79, "learning_rate": 2.0917484348884443e-06, "loss": 2.4808, "step": 17460 }, { "epoch": 0.79, "learning_rate": 2.0830026589200835e-06, "loss": 2.49, "step": 17470 }, { "epoch": 0.79, "learning_rate": 2.074273078578315e-06, "loss": 2.442, "step": 17480 }, { "epoch": 0.79, "learning_rate": 2.0655597117210557e-06, "loss": 2.4991, "step": 17490 }, { "epoch": 0.79, "learning_rate": 2.0568625761730475e-06, "loss": 2.4852, "step": 17500 }, { "epoch": 0.79, "learning_rate": 2.0481816897258256e-06, "loss": 2.4643, "step": 17510 }, { "epoch": 0.79, "learning_rate": 2.039517070137691e-06, "loss": 2.4727, "step": 17520 }, { "epoch": 0.79, "learning_rate": 2.030868735133661e-06, "loss": 2.4714, "step": 17530 }, { "epoch": 0.79, "learning_rate": 2.022236702405447e-06, "loss": 2.4597, "step": 17540 }, { "epoch": 0.8, "learning_rate": 2.0136209896114044e-06, "loss": 2.4604, "step": 17550 }, { "epoch": 0.8, "learning_rate": 2.005021614376511e-06, "loss": 2.4699, "step": 17560 }, { "epoch": 0.8, "learning_rate": 1.9964385942923116e-06, "loss": 2.4238, "step": 17570 }, { "epoch": 0.8, "learning_rate": 1.9878719469169104e-06, "loss": 2.4799, "step": 17580 }, { "epoch": 0.8, "learning_rate": 1.9793216897748967e-06, "loss": 2.4426, "step": 17590 }, { "epoch": 0.8, "learning_rate": 1.9707878403573532e-06, "loss": 2.4655, "step": 17600 }, { "epoch": 0.8, "learning_rate": 1.9622704161217844e-06, "loss": 2.476, "step": 17610 }, { "epoch": 0.8, "learning_rate": 1.9537694344920945e-06, "loss": 2.4877, "step": 17620 }, { "epoch": 0.8, "learning_rate": 1.945284912858556e-06, "loss": 2.441, "step": 17630 }, { "epoch": 0.8, "learning_rate": 1.9368168685777667e-06, "loss": 2.5043, "step": 17640 }, { "epoch": 0.8, "learning_rate": 1.9283653189726172e-06, "loss": 2.4792, "step": 17650 }, { "epoch": 0.8, "learning_rate": 1.9199302813322564e-06, "loss": 2.454, "step": 17660 }, { "epoch": 0.8, "learning_rate": 1.9115117729120548e-06, "loss": 2.4713, "step": 17670 }, { "epoch": 0.8, "learning_rate": 1.90310981093357e-06, "loss": 2.4868, "step": 17680 }, { "epoch": 0.8, "learning_rate": 1.8947244125845077e-06, "loss": 2.4803, "step": 17690 }, { "epoch": 0.8, "learning_rate": 1.886355595018693e-06, "loss": 2.4637, "step": 17700 }, { "epoch": 0.8, "learning_rate": 1.8780033753560322e-06, "loss": 2.4517, "step": 17710 }, { "epoch": 0.8, "learning_rate": 1.869667770682475e-06, "loss": 2.4766, "step": 17720 }, { "epoch": 0.8, "learning_rate": 1.8613487980499856e-06, "loss": 2.4674, "step": 17730 }, { "epoch": 0.8, "learning_rate": 1.8530464744765008e-06, "loss": 2.4586, "step": 17740 }, { "epoch": 0.8, "learning_rate": 1.8447608169459019e-06, "loss": 2.4749, "step": 17750 }, { "epoch": 0.8, "learning_rate": 1.836491842407976e-06, "loss": 2.4648, "step": 17760 }, { "epoch": 0.81, "learning_rate": 1.8282395677783792e-06, "loss": 2.4878, "step": 17770 }, { "epoch": 0.81, "learning_rate": 1.8200040099386084e-06, "loss": 2.4559, "step": 17780 }, { "epoch": 0.81, "learning_rate": 1.8117851857359635e-06, "loss": 2.4712, "step": 17790 }, { "epoch": 0.81, "learning_rate": 1.8035831119835111e-06, "loss": 2.4596, "step": 17800 }, { "epoch": 0.81, "learning_rate": 1.795397805460053e-06, "loss": 2.492, "step": 17810 }, { "epoch": 0.81, "learning_rate": 1.787229282910088e-06, "loss": 2.4697, "step": 17820 }, { "epoch": 0.81, "learning_rate": 1.7790775610437838e-06, "loss": 2.4374, "step": 17830 }, { "epoch": 0.81, "learning_rate": 1.770942656536937e-06, "loss": 2.4926, "step": 17840 }, { "epoch": 0.81, "learning_rate": 1.7628245860309435e-06, "loss": 2.489, "step": 17850 }, { "epoch": 0.81, "learning_rate": 1.7547233661327567e-06, "loss": 2.4658, "step": 17860 }, { "epoch": 0.81, "learning_rate": 1.7466390134148713e-06, "loss": 2.4838, "step": 17870 }, { "epoch": 0.81, "learning_rate": 1.7385715444152595e-06, "loss": 2.4489, "step": 17880 }, { "epoch": 0.81, "learning_rate": 1.7305209756373732e-06, "loss": 2.4781, "step": 17890 }, { "epoch": 0.81, "learning_rate": 1.722487323550075e-06, "loss": 2.4737, "step": 17900 }, { "epoch": 0.81, "learning_rate": 1.714470604587638e-06, "loss": 2.4854, "step": 17910 }, { "epoch": 0.81, "learning_rate": 1.7064708351496774e-06, "loss": 2.4807, "step": 17920 }, { "epoch": 0.81, "learning_rate": 1.6984880316011554e-06, "loss": 2.4651, "step": 17930 }, { "epoch": 0.81, "learning_rate": 1.6905222102723074e-06, "loss": 2.4797, "step": 17940 }, { "epoch": 0.81, "learning_rate": 1.6825733874586458e-06, "loss": 2.4639, "step": 17950 }, { "epoch": 0.81, "learning_rate": 1.674641579420897e-06, "loss": 2.5059, "step": 17960 }, { "epoch": 0.81, "learning_rate": 1.6667268023849848e-06, "loss": 2.4473, "step": 17970 }, { "epoch": 0.81, "learning_rate": 1.6588290725419998e-06, "loss": 2.4591, "step": 17980 }, { "epoch": 0.82, "learning_rate": 1.6509484060481451e-06, "loss": 2.4699, "step": 17990 }, { "epoch": 0.82, "learning_rate": 1.6430848190247362e-06, "loss": 2.4813, "step": 18000 }, { "epoch": 0.82, "eval_accuracy": 0.5416867783481749, "eval_loss": 2.233830690383911, "eval_runtime": 9.9255, "eval_samples_per_second": 137.726, "eval_steps_per_second": 1.108, "step": 18000 }, { "epoch": 0.82, "learning_rate": 1.6352383275581274e-06, "loss": 2.4915, "step": 18010 }, { "epoch": 0.82, "learning_rate": 1.6274089476997257e-06, "loss": 2.4732, "step": 18020 }, { "epoch": 0.82, "learning_rate": 1.6195966954659093e-06, "loss": 2.467, "step": 18030 }, { "epoch": 0.82, "learning_rate": 1.6118015868380387e-06, "loss": 2.4746, "step": 18040 }, { "epoch": 0.82, "learning_rate": 1.6040236377623874e-06, "loss": 2.4808, "step": 18050 }, { "epoch": 0.82, "learning_rate": 1.596262864150141e-06, "loss": 2.4675, "step": 18060 }, { "epoch": 0.82, "learning_rate": 1.5885192818773399e-06, "loss": 2.4639, "step": 18070 }, { "epoch": 0.82, "learning_rate": 1.5807929067848593e-06, "loss": 2.4783, "step": 18080 }, { "epoch": 0.82, "learning_rate": 1.573083754678374e-06, "loss": 2.4318, "step": 18090 }, { "epoch": 0.82, "learning_rate": 1.5653918413283276e-06, "loss": 2.4574, "step": 18100 }, { "epoch": 0.82, "learning_rate": 1.5577171824698945e-06, "loss": 2.464, "step": 18110 }, { "epoch": 0.82, "learning_rate": 1.550059793802957e-06, "loss": 2.4461, "step": 18120 }, { "epoch": 0.82, "learning_rate": 1.5424196909920663e-06, "loss": 2.5064, "step": 18130 }, { "epoch": 0.82, "learning_rate": 1.53479688966641e-06, "loss": 2.5071, "step": 18140 }, { "epoch": 0.82, "learning_rate": 1.527191405419789e-06, "loss": 2.4901, "step": 18150 }, { "epoch": 0.82, "learning_rate": 1.5196032538105653e-06, "loss": 2.4794, "step": 18160 }, { "epoch": 0.82, "learning_rate": 1.5120324503616613e-06, "loss": 2.4578, "step": 18170 }, { "epoch": 0.82, "learning_rate": 1.5044790105605e-06, "loss": 2.4956, "step": 18180 }, { "epoch": 0.82, "learning_rate": 1.4969429498589861e-06, "loss": 2.4597, "step": 18190 }, { "epoch": 0.82, "learning_rate": 1.489424283673473e-06, "loss": 2.4543, "step": 18200 }, { "epoch": 0.83, "learning_rate": 1.4819230273847295e-06, "loss": 2.4628, "step": 18210 }, { "epoch": 0.83, "learning_rate": 1.474439196337909e-06, "loss": 2.4726, "step": 18220 }, { "epoch": 0.83, "learning_rate": 1.4669728058425215e-06, "loss": 2.4595, "step": 18230 }, { "epoch": 0.83, "learning_rate": 1.4595238711723958e-06, "loss": 2.4566, "step": 18240 }, { "epoch": 0.83, "learning_rate": 1.452092407565654e-06, "loss": 2.4618, "step": 18250 }, { "epoch": 0.83, "learning_rate": 1.4446784302246753e-06, "loss": 2.4595, "step": 18260 }, { "epoch": 0.83, "learning_rate": 1.437281954316071e-06, "loss": 2.4806, "step": 18270 }, { "epoch": 0.83, "learning_rate": 1.4299029949706478e-06, "loss": 2.4581, "step": 18280 }, { "epoch": 0.83, "learning_rate": 1.4225415672833798e-06, "loss": 2.4576, "step": 18290 }, { "epoch": 0.83, "learning_rate": 1.4151976863133775e-06, "loss": 2.4914, "step": 18300 }, { "epoch": 0.83, "learning_rate": 1.407871367083855e-06, "loss": 2.4491, "step": 18310 }, { "epoch": 0.83, "learning_rate": 1.4005626245821036e-06, "loss": 2.486, "step": 18320 }, { "epoch": 0.83, "learning_rate": 1.3932714737594544e-06, "loss": 2.4876, "step": 18330 }, { "epoch": 0.83, "learning_rate": 1.3859979295312554e-06, "loss": 2.4538, "step": 18340 }, { "epoch": 0.83, "learning_rate": 1.3787420067768342e-06, "loss": 2.4468, "step": 18350 }, { "epoch": 0.83, "learning_rate": 1.371503720339471e-06, "loss": 2.4683, "step": 18360 }, { "epoch": 0.83, "learning_rate": 1.36428308502637e-06, "loss": 2.4853, "step": 18370 }, { "epoch": 0.83, "learning_rate": 1.3570801156086254e-06, "loss": 2.481, "step": 18380 }, { "epoch": 0.83, "learning_rate": 1.349894826821192e-06, "loss": 2.4568, "step": 18390 }, { "epoch": 0.83, "learning_rate": 1.342727233362856e-06, "loss": 2.4875, "step": 18400 }, { "epoch": 0.83, "learning_rate": 1.3355773498962054e-06, "loss": 2.4554, "step": 18410 }, { "epoch": 0.83, "learning_rate": 1.3284451910475992e-06, "loss": 2.4949, "step": 18420 }, { "epoch": 0.84, "learning_rate": 1.3213307714071343e-06, "loss": 2.4547, "step": 18430 }, { "epoch": 0.84, "learning_rate": 1.314234105528629e-06, "loss": 2.4623, "step": 18440 }, { "epoch": 0.84, "learning_rate": 1.307155207929568e-06, "loss": 2.4404, "step": 18450 }, { "epoch": 0.84, "learning_rate": 1.3000940930911032e-06, "loss": 2.4754, "step": 18460 }, { "epoch": 0.84, "learning_rate": 1.2930507754579924e-06, "loss": 2.4613, "step": 18470 }, { "epoch": 0.84, "learning_rate": 1.2860252694386044e-06, "loss": 2.4751, "step": 18480 }, { "epoch": 0.84, "learning_rate": 1.2790175894048529e-06, "loss": 2.4724, "step": 18490 }, { "epoch": 0.84, "learning_rate": 1.272027749692203e-06, "loss": 2.4683, "step": 18500 }, { "epoch": 0.84, "learning_rate": 1.2650557645996053e-06, "loss": 2.4751, "step": 18510 }, { "epoch": 0.84, "learning_rate": 1.258101648389507e-06, "loss": 2.4795, "step": 18520 }, { "epoch": 0.84, "learning_rate": 1.2511654152877805e-06, "loss": 2.4547, "step": 18530 }, { "epoch": 0.84, "learning_rate": 1.2442470794837281e-06, "loss": 2.4755, "step": 18540 }, { "epoch": 0.84, "learning_rate": 1.2373466551300394e-06, "loss": 2.4778, "step": 18550 }, { "epoch": 0.84, "learning_rate": 1.2304641563427543e-06, "loss": 2.4878, "step": 18560 }, { "epoch": 0.84, "learning_rate": 1.223599597201256e-06, "loss": 2.4586, "step": 18570 }, { "epoch": 0.84, "learning_rate": 1.2167529917482123e-06, "loss": 2.471, "step": 18580 }, { "epoch": 0.84, "learning_rate": 1.2099243539895822e-06, "loss": 2.4661, "step": 18590 }, { "epoch": 0.84, "learning_rate": 1.2031136978945513e-06, "loss": 2.4942, "step": 18600 }, { "epoch": 0.84, "learning_rate": 1.1963210373955348e-06, "loss": 2.4489, "step": 18610 }, { "epoch": 0.84, "learning_rate": 1.1895463863881219e-06, "loss": 2.4902, "step": 18620 }, { "epoch": 0.84, "learning_rate": 1.1827897587310744e-06, "loss": 2.4806, "step": 18630 }, { "epoch": 0.84, "learning_rate": 1.176051168246266e-06, "loss": 2.4844, "step": 18640 }, { "epoch": 0.85, "learning_rate": 1.1693306287186913e-06, "loss": 2.464, "step": 18650 }, { "epoch": 0.85, "learning_rate": 1.1626281538964057e-06, "loss": 2.4944, "step": 18660 }, { "epoch": 0.85, "learning_rate": 1.1559437574905153e-06, "loss": 2.46, "step": 18670 }, { "epoch": 0.85, "learning_rate": 1.1492774531751416e-06, "loss": 2.4967, "step": 18680 }, { "epoch": 0.85, "learning_rate": 1.1426292545873952e-06, "loss": 2.479, "step": 18690 }, { "epoch": 0.85, "learning_rate": 1.1359991753273504e-06, "loss": 2.5162, "step": 18700 }, { "epoch": 0.85, "learning_rate": 1.1293872289580133e-06, "loss": 2.4849, "step": 18710 }, { "epoch": 0.85, "learning_rate": 1.1227934290052989e-06, "loss": 2.4682, "step": 18720 }, { "epoch": 0.85, "learning_rate": 1.1162177889579906e-06, "loss": 2.4756, "step": 18730 }, { "epoch": 0.85, "learning_rate": 1.1096603222677406e-06, "loss": 2.493, "step": 18740 }, { "epoch": 0.85, "learning_rate": 1.1031210423490036e-06, "loss": 2.4884, "step": 18750 }, { "epoch": 0.85, "learning_rate": 1.096599962579048e-06, "loss": 2.4543, "step": 18760 }, { "epoch": 0.85, "learning_rate": 1.0900970962978985e-06, "loss": 2.4616, "step": 18770 }, { "epoch": 0.85, "learning_rate": 1.0836124568083263e-06, "loss": 2.4939, "step": 18780 }, { "epoch": 0.85, "learning_rate": 1.077146057375814e-06, "loss": 2.4483, "step": 18790 }, { "epoch": 0.85, "learning_rate": 1.0706979112285343e-06, "loss": 2.4408, "step": 18800 }, { "epoch": 0.85, "learning_rate": 1.0642680315573162e-06, "loss": 2.4837, "step": 18810 }, { "epoch": 0.85, "learning_rate": 1.0578564315156215e-06, "loss": 2.4425, "step": 18820 }, { "epoch": 0.85, "learning_rate": 1.0514631242195195e-06, "loss": 2.4882, "step": 18830 }, { "epoch": 0.85, "learning_rate": 1.0450881227476583e-06, "loss": 2.5087, "step": 18840 }, { "epoch": 0.85, "learning_rate": 1.0387314401412375e-06, "loss": 2.4761, "step": 18850 }, { "epoch": 0.85, "learning_rate": 1.0323930894039825e-06, "loss": 2.4945, "step": 18860 }, { "epoch": 0.86, "learning_rate": 1.026073083502116e-06, "loss": 2.4795, "step": 18870 }, { "epoch": 0.86, "learning_rate": 1.0197714353643351e-06, "loss": 2.4768, "step": 18880 }, { "epoch": 0.86, "learning_rate": 1.0134881578817834e-06, "loss": 2.4765, "step": 18890 }, { "epoch": 0.86, "learning_rate": 1.0072232639080215e-06, "loss": 2.4482, "step": 18900 }, { "epoch": 0.86, "learning_rate": 1.0009767662590053e-06, "loss": 2.4471, "step": 18910 }, { "epoch": 0.86, "learning_rate": 9.947486777130565e-07, "loss": 2.4803, "step": 18920 }, { "epoch": 0.86, "learning_rate": 9.885390110108396e-07, "loss": 2.482, "step": 18930 }, { "epoch": 0.86, "learning_rate": 9.823477788553305e-07, "loss": 2.5003, "step": 18940 }, { "epoch": 0.86, "learning_rate": 9.761749939117982e-07, "loss": 2.4727, "step": 18950 }, { "epoch": 0.86, "learning_rate": 9.700206688077707e-07, "loss": 2.4685, "step": 18960 }, { "epoch": 0.86, "learning_rate": 9.638848161330161e-07, "loss": 2.4901, "step": 18970 }, { "epoch": 0.86, "learning_rate": 9.577674484395127e-07, "loss": 2.4798, "step": 18980 }, { "epoch": 0.86, "learning_rate": 9.516685782414248e-07, "loss": 2.4721, "step": 18990 }, { "epoch": 0.86, "learning_rate": 9.455882180150744e-07, "loss": 2.5014, "step": 19000 }, { "epoch": 0.86, "learning_rate": 9.39526380198923e-07, "loss": 2.471, "step": 19010 }, { "epoch": 0.86, "learning_rate": 9.334830771935344e-07, "loss": 2.4707, "step": 19020 }, { "epoch": 0.86, "learning_rate": 9.274583213615662e-07, "loss": 2.4536, "step": 19030 }, { "epoch": 0.86, "learning_rate": 9.214521250277208e-07, "loss": 2.4662, "step": 19040 }, { "epoch": 0.86, "learning_rate": 9.154645004787488e-07, "loss": 2.4724, "step": 19050 }, { "epoch": 0.86, "learning_rate": 9.094954599633953e-07, "loss": 2.4818, "step": 19060 }, { "epoch": 0.86, "learning_rate": 9.035450156924009e-07, "loss": 2.4598, "step": 19070 }, { "epoch": 0.86, "learning_rate": 8.976131798384502e-07, "loss": 2.474, "step": 19080 }, { "epoch": 0.87, "learning_rate": 8.916999645361768e-07, "loss": 2.4635, "step": 19090 }, { "epoch": 0.87, "learning_rate": 8.858053818821089e-07, "loss": 2.4736, "step": 19100 }, { "epoch": 0.87, "learning_rate": 8.799294439346661e-07, "loss": 2.4871, "step": 19110 }, { "epoch": 0.87, "learning_rate": 8.740721627141246e-07, "loss": 2.4839, "step": 19120 }, { "epoch": 0.87, "learning_rate": 8.68233550202594e-07, "loss": 2.4924, "step": 19130 }, { "epoch": 0.87, "learning_rate": 8.624136183440001e-07, "loss": 2.4486, "step": 19140 }, { "epoch": 0.87, "learning_rate": 8.566123790440406e-07, "loss": 2.4702, "step": 19150 }, { "epoch": 0.87, "learning_rate": 8.508298441701923e-07, "loss": 2.4545, "step": 19160 }, { "epoch": 0.87, "learning_rate": 8.450660255516497e-07, "loss": 2.4544, "step": 19170 }, { "epoch": 0.87, "learning_rate": 8.393209349793363e-07, "loss": 2.4588, "step": 19180 }, { "epoch": 0.87, "learning_rate": 8.335945842058524e-07, "loss": 2.5121, "step": 19190 }, { "epoch": 0.87, "learning_rate": 8.278869849454718e-07, "loss": 2.4971, "step": 19200 }, { "epoch": 0.87, "learning_rate": 8.221981488740971e-07, "loss": 2.4697, "step": 19210 }, { "epoch": 0.87, "learning_rate": 8.165280876292625e-07, "loss": 2.4947, "step": 19220 }, { "epoch": 0.87, "learning_rate": 8.10876812810083e-07, "loss": 2.4556, "step": 19230 }, { "epoch": 0.87, "learning_rate": 8.052443359772466e-07, "loss": 2.4741, "step": 19240 }, { "epoch": 0.87, "learning_rate": 7.996306686529886e-07, "loss": 2.4592, "step": 19250 }, { "epoch": 0.87, "learning_rate": 7.940358223210631e-07, "loss": 2.4832, "step": 19260 }, { "epoch": 0.87, "learning_rate": 7.884598084267225e-07, "loss": 2.4762, "step": 19270 }, { "epoch": 0.87, "learning_rate": 7.82902638376698e-07, "loss": 2.4659, "step": 19280 }, { "epoch": 0.87, "learning_rate": 7.773643235391682e-07, "loss": 2.4634, "step": 19290 }, { "epoch": 0.87, "learning_rate": 7.71844875243739e-07, "loss": 2.4579, "step": 19300 }, { "epoch": 0.88, "learning_rate": 7.663443047814289e-07, "loss": 2.4547, "step": 19310 }, { "epoch": 0.88, "learning_rate": 7.608626234046279e-07, "loss": 2.4457, "step": 19320 }, { "epoch": 0.88, "learning_rate": 7.553998423270948e-07, "loss": 2.4728, "step": 19330 }, { "epoch": 0.88, "learning_rate": 7.499559727239192e-07, "loss": 2.4711, "step": 19340 }, { "epoch": 0.88, "learning_rate": 7.445310257315041e-07, "loss": 2.461, "step": 19350 }, { "epoch": 0.88, "learning_rate": 7.391250124475436e-07, "loss": 2.4891, "step": 19360 }, { "epoch": 0.88, "learning_rate": 7.337379439310011e-07, "loss": 2.4915, "step": 19370 }, { "epoch": 0.88, "learning_rate": 7.283698312020814e-07, "loss": 2.4784, "step": 19380 }, { "epoch": 0.88, "learning_rate": 7.230206852422161e-07, "loss": 2.4649, "step": 19390 }, { "epoch": 0.88, "learning_rate": 7.176905169940351e-07, "loss": 2.4806, "step": 19400 }, { "epoch": 0.88, "learning_rate": 7.123793373613441e-07, "loss": 2.4849, "step": 19410 }, { "epoch": 0.88, "learning_rate": 7.070871572091076e-07, "loss": 2.4658, "step": 19420 }, { "epoch": 0.88, "learning_rate": 7.0181398736342e-07, "loss": 2.4855, "step": 19430 }, { "epoch": 0.88, "learning_rate": 6.965598386114903e-07, "loss": 2.5001, "step": 19440 }, { "epoch": 0.88, "learning_rate": 6.913247217016128e-07, "loss": 2.4582, "step": 19450 }, { "epoch": 0.88, "learning_rate": 6.861086473431522e-07, "loss": 2.4426, "step": 19460 }, { "epoch": 0.88, "learning_rate": 6.809116262065152e-07, "loss": 2.4672, "step": 19470 }, { "epoch": 0.88, "learning_rate": 6.757336689231342e-07, "loss": 2.4626, "step": 19480 }, { "epoch": 0.88, "learning_rate": 6.705747860854405e-07, "loss": 2.4726, "step": 19490 }, { "epoch": 0.88, "learning_rate": 6.654349882468491e-07, "loss": 2.4695, "step": 19500 }, { "epoch": 0.88, "learning_rate": 6.603142859217293e-07, "loss": 2.4723, "step": 19510 }, { "epoch": 0.88, "learning_rate": 6.552126895853883e-07, "loss": 2.4674, "step": 19520 }, { "epoch": 0.89, "learning_rate": 6.501302096740503e-07, "loss": 2.4626, "step": 19530 }, { "epoch": 0.89, "learning_rate": 6.450668565848317e-07, "loss": 2.4773, "step": 19540 }, { "epoch": 0.89, "learning_rate": 6.400226406757226e-07, "loss": 2.4693, "step": 19550 }, { "epoch": 0.89, "learning_rate": 6.349975722655633e-07, "loss": 2.4922, "step": 19560 }, { "epoch": 0.89, "learning_rate": 6.299916616340262e-07, "loss": 2.4772, "step": 19570 }, { "epoch": 0.89, "learning_rate": 6.250049190215901e-07, "loss": 2.4792, "step": 19580 }, { "epoch": 0.89, "learning_rate": 6.200373546295246e-07, "loss": 2.4621, "step": 19590 }, { "epoch": 0.89, "learning_rate": 6.150889786198711e-07, "loss": 2.46, "step": 19600 }, { "epoch": 0.89, "learning_rate": 6.101598011154064e-07, "loss": 2.4741, "step": 19610 }, { "epoch": 0.89, "learning_rate": 6.052498321996458e-07, "loss": 2.459, "step": 19620 }, { "epoch": 0.89, "learning_rate": 6.003590819167982e-07, "loss": 2.4543, "step": 19630 }, { "epoch": 0.89, "learning_rate": 5.954875602717702e-07, "loss": 2.4536, "step": 19640 }, { "epoch": 0.89, "learning_rate": 5.906352772301193e-07, "loss": 2.4733, "step": 19650 }, { "epoch": 0.89, "learning_rate": 5.858022427180598e-07, "loss": 2.5068, "step": 19660 }, { "epoch": 0.89, "learning_rate": 5.809884666224208e-07, "loss": 2.4655, "step": 19670 }, { "epoch": 0.89, "learning_rate": 5.761939587906362e-07, "loss": 2.4941, "step": 19680 }, { "epoch": 0.89, "learning_rate": 5.714187290307272e-07, "loss": 2.4348, "step": 19690 }, { "epoch": 0.89, "learning_rate": 5.66662787111274e-07, "loss": 2.472, "step": 19700 }, { "epoch": 0.89, "learning_rate": 5.619261427614054e-07, "loss": 2.4812, "step": 19710 }, { "epoch": 0.89, "learning_rate": 5.572088056707648e-07, "loss": 2.4567, "step": 19720 }, { "epoch": 0.89, "learning_rate": 5.525107854895107e-07, "loss": 2.4716, "step": 19730 }, { "epoch": 0.89, "learning_rate": 5.478320918282731e-07, "loss": 2.484, "step": 19740 }, { "epoch": 0.9, "learning_rate": 5.431727342581583e-07, "loss": 2.4682, "step": 19750 }, { "epoch": 0.9, "learning_rate": 5.38532722310705e-07, "loss": 2.4756, "step": 19760 }, { "epoch": 0.9, "learning_rate": 5.339120654778884e-07, "loss": 2.473, "step": 19770 }, { "epoch": 0.9, "learning_rate": 5.293107732120794e-07, "loss": 2.4958, "step": 19780 }, { "epoch": 0.9, "learning_rate": 5.247288549260454e-07, "loss": 2.4707, "step": 19790 }, { "epoch": 0.9, "learning_rate": 5.201663199929097e-07, "loss": 2.4914, "step": 19800 }, { "epoch": 0.9, "learning_rate": 5.15623177746154e-07, "loss": 2.4894, "step": 19810 }, { "epoch": 0.9, "learning_rate": 5.110994374795819e-07, "loss": 2.4628, "step": 19820 }, { "epoch": 0.9, "learning_rate": 5.065951084473087e-07, "loss": 2.4489, "step": 19830 }, { "epoch": 0.9, "learning_rate": 5.021101998637412e-07, "loss": 2.4862, "step": 19840 }, { "epoch": 0.9, "learning_rate": 4.976447209035584e-07, "loss": 2.4711, "step": 19850 }, { "epoch": 0.9, "learning_rate": 4.93198680701692e-07, "loss": 2.4756, "step": 19860 }, { "epoch": 0.9, "learning_rate": 4.887720883533053e-07, "loss": 2.4947, "step": 19870 }, { "epoch": 0.9, "learning_rate": 4.843649529137861e-07, "loss": 2.4581, "step": 19880 }, { "epoch": 0.9, "learning_rate": 4.799772833987082e-07, "loss": 2.5007, "step": 19890 }, { "epoch": 0.9, "learning_rate": 4.7560908878383495e-07, "loss": 2.5006, "step": 19900 }, { "epoch": 0.9, "learning_rate": 4.712603780050806e-07, "loss": 2.4506, "step": 19910 }, { "epoch": 0.9, "learning_rate": 4.6693115995851136e-07, "loss": 2.4519, "step": 19920 }, { "epoch": 0.9, "learning_rate": 4.626214435003118e-07, "loss": 2.4831, "step": 19930 }, { "epoch": 0.9, "learning_rate": 4.5833123744677297e-07, "loss": 2.4764, "step": 19940 }, { "epoch": 0.9, "learning_rate": 4.5406055057427453e-07, "loss": 2.4708, "step": 19950 }, { "epoch": 0.9, "learning_rate": 4.4980939161926583e-07, "loss": 2.4668, "step": 19960 }, { "epoch": 0.91, "learning_rate": 4.455777692782504e-07, "loss": 2.4599, "step": 19970 }, { "epoch": 0.91, "learning_rate": 4.413656922077636e-07, "loss": 2.4717, "step": 19980 }, { "epoch": 0.91, "learning_rate": 4.371731690243586e-07, "loss": 2.4609, "step": 19990 }, { "epoch": 0.91, "learning_rate": 4.330002083045881e-07, "loss": 2.4797, "step": 20000 }, { "epoch": 0.91, "eval_accuracy": 0.5417811691733867, "eval_loss": 2.2338173389434814, "eval_runtime": 9.8192, "eval_samples_per_second": 139.217, "eval_steps_per_second": 1.12, "step": 20000 }, { "epoch": 0.91, "learning_rate": 4.288468185849848e-07, "loss": 2.4755, "step": 20010 }, { "epoch": 0.91, "learning_rate": 4.2471300836204655e-07, "loss": 2.4592, "step": 20020 }, { "epoch": 0.91, "learning_rate": 4.205987860922167e-07, "loss": 2.4843, "step": 20030 }, { "epoch": 0.91, "learning_rate": 4.1650416019187065e-07, "loss": 2.4376, "step": 20040 }, { "epoch": 0.91, "learning_rate": 4.1242913903729364e-07, "loss": 2.4909, "step": 20050 }, { "epoch": 0.91, "learning_rate": 4.0837373096466626e-07, "loss": 2.4521, "step": 20060 }, { "epoch": 0.91, "learning_rate": 4.043379442700468e-07, "loss": 2.489, "step": 20070 }, { "epoch": 0.91, "learning_rate": 4.003217872093557e-07, "loss": 2.4717, "step": 20080 }, { "epoch": 0.91, "learning_rate": 3.9632526799835537e-07, "loss": 2.4628, "step": 20090 }, { "epoch": 0.91, "learning_rate": 3.923483948126383e-07, "loss": 2.4842, "step": 20100 }, { "epoch": 0.91, "learning_rate": 3.883911757876058e-07, "loss": 2.5103, "step": 20110 }, { "epoch": 0.91, "learning_rate": 3.8445361901845333e-07, "loss": 2.4585, "step": 20120 }, { "epoch": 0.91, "learning_rate": 3.805357325601544e-07, "loss": 2.4485, "step": 20130 }, { "epoch": 0.91, "learning_rate": 3.766375244274412e-07, "loss": 2.4609, "step": 20140 }, { "epoch": 0.91, "learning_rate": 3.7275900259479357e-07, "loss": 2.4653, "step": 20150 }, { "epoch": 0.91, "learning_rate": 3.689001749964183e-07, "loss": 2.4603, "step": 20160 }, { "epoch": 0.91, "learning_rate": 3.650610495262319e-07, "loss": 2.4683, "step": 20170 }, { "epoch": 0.91, "learning_rate": 3.6124163403784773e-07, "loss": 2.4605, "step": 20180 }, { "epoch": 0.92, "learning_rate": 3.574419363445647e-07, "loss": 2.4574, "step": 20190 }, { "epoch": 0.92, "learning_rate": 3.536619642193351e-07, "loss": 2.4544, "step": 20200 }, { "epoch": 0.92, "learning_rate": 3.4990172539476784e-07, "loss": 2.472, "step": 20210 }, { "epoch": 0.92, "learning_rate": 3.461612275630977e-07, "loss": 2.4651, "step": 20220 }, { "epoch": 0.92, "learning_rate": 3.4244047837618255e-07, "loss": 2.4505, "step": 20230 }, { "epoch": 0.92, "learning_rate": 3.3873948544547174e-07, "loss": 2.5009, "step": 20240 }, { "epoch": 0.92, "learning_rate": 3.3505825634200904e-07, "loss": 2.4679, "step": 20250 }, { "epoch": 0.92, "learning_rate": 3.313967985964006e-07, "loss": 2.4656, "step": 20260 }, { "epoch": 0.92, "learning_rate": 3.277551196988105e-07, "loss": 2.468, "step": 20270 }, { "epoch": 0.92, "learning_rate": 3.241332270989417e-07, "loss": 2.4618, "step": 20280 }, { "epoch": 0.92, "learning_rate": 3.205311282060186e-07, "loss": 2.4926, "step": 20290 }, { "epoch": 0.92, "learning_rate": 3.169488303887769e-07, "loss": 2.4725, "step": 20300 }, { "epoch": 0.92, "learning_rate": 3.133863409754423e-07, "loss": 2.4697, "step": 20310 }, { "epoch": 0.92, "learning_rate": 3.0984366725372304e-07, "loss": 2.4938, "step": 20320 }, { "epoch": 0.92, "learning_rate": 3.0632081647078536e-07, "loss": 2.4824, "step": 20330 }, { "epoch": 0.92, "learning_rate": 3.028177958332512e-07, "loss": 2.4705, "step": 20340 }, { "epoch": 0.92, "learning_rate": 2.9933461250716836e-07, "loss": 2.4558, "step": 20350 }, { "epoch": 0.92, "learning_rate": 2.9587127361801247e-07, "loss": 2.4638, "step": 20360 }, { "epoch": 0.92, "learning_rate": 2.9242778625065414e-07, "loss": 2.4622, "step": 20370 }, { "epoch": 0.92, "learning_rate": 2.8900415744936405e-07, "loss": 2.4284, "step": 20380 }, { "epoch": 0.92, "learning_rate": 2.856003942177821e-07, "loss": 2.4643, "step": 20390 }, { "epoch": 0.92, "learning_rate": 2.822165035189117e-07, "loss": 2.4735, "step": 20400 }, { "epoch": 0.92, "learning_rate": 2.7885249227510235e-07, "loss": 2.4592, "step": 20410 }, { "epoch": 0.93, "learning_rate": 2.755083673680392e-07, "loss": 2.4702, "step": 20420 }, { "epoch": 0.93, "learning_rate": 2.721841356387245e-07, "loss": 2.4666, "step": 20430 }, { "epoch": 0.93, "learning_rate": 2.6887980388746515e-07, "loss": 2.467, "step": 20440 }, { "epoch": 0.93, "learning_rate": 2.6559537887385966e-07, "loss": 2.4946, "step": 20450 }, { "epoch": 0.93, "learning_rate": 2.6233086731678234e-07, "loss": 2.4656, "step": 20460 }, { "epoch": 0.93, "learning_rate": 2.5908627589437575e-07, "loss": 2.4694, "step": 20470 }, { "epoch": 0.93, "learning_rate": 2.55861611244026e-07, "loss": 2.4681, "step": 20480 }, { "epoch": 0.93, "learning_rate": 2.5265687996235854e-07, "loss": 2.459, "step": 20490 }, { "epoch": 0.93, "learning_rate": 2.4947208860522376e-07, "loss": 2.4609, "step": 20500 }, { "epoch": 0.93, "learning_rate": 2.4630724368767565e-07, "loss": 2.4642, "step": 20510 }, { "epoch": 0.93, "learning_rate": 2.4316235168397097e-07, "loss": 2.4708, "step": 20520 }, { "epoch": 0.93, "learning_rate": 2.400374190275423e-07, "loss": 2.487, "step": 20530 }, { "epoch": 0.93, "learning_rate": 2.3693245211099837e-07, "loss": 2.4803, "step": 20540 }, { "epoch": 0.93, "learning_rate": 2.3384745728610047e-07, "loss": 2.4987, "step": 20550 }, { "epoch": 0.93, "learning_rate": 2.307824408637538e-07, "loss": 2.4644, "step": 20560 }, { "epoch": 0.93, "learning_rate": 2.27737409113995e-07, "loss": 2.4638, "step": 20570 }, { "epoch": 0.93, "learning_rate": 2.2471236826597797e-07, "loss": 2.4529, "step": 20580 }, { "epoch": 0.93, "learning_rate": 2.217073245079626e-07, "loss": 2.4804, "step": 20590 }, { "epoch": 0.93, "learning_rate": 2.1872228398730045e-07, "loss": 2.4638, "step": 20600 }, { "epoch": 0.93, "learning_rate": 2.1575725281042236e-07, "loss": 2.4938, "step": 20610 }, { "epoch": 0.93, "learning_rate": 2.128122370428287e-07, "loss": 2.4722, "step": 20620 }, { "epoch": 0.93, "learning_rate": 2.0988724270907146e-07, "loss": 2.4603, "step": 20630 }, { "epoch": 0.94, "learning_rate": 2.0698227579274643e-07, "loss": 2.4643, "step": 20640 }, { "epoch": 0.94, "learning_rate": 2.0409734223648337e-07, "loss": 2.4598, "step": 20650 }, { "epoch": 0.94, "learning_rate": 2.0123244794192366e-07, "loss": 2.4557, "step": 20660 }, { "epoch": 0.94, "learning_rate": 1.9838759876972035e-07, "loss": 2.4597, "step": 20670 }, { "epoch": 0.94, "learning_rate": 1.9556280053951714e-07, "loss": 2.4743, "step": 20680 }, { "epoch": 0.94, "learning_rate": 1.927580590299427e-07, "loss": 2.4815, "step": 20690 }, { "epoch": 0.94, "learning_rate": 1.8997337997859522e-07, "loss": 2.4572, "step": 20700 }, { "epoch": 0.94, "learning_rate": 1.87208769082029e-07, "loss": 2.4441, "step": 20710 }, { "epoch": 0.94, "learning_rate": 1.8446423199574903e-07, "loss": 2.4685, "step": 20720 }, { "epoch": 0.94, "learning_rate": 1.8173977433419421e-07, "loss": 2.4815, "step": 20730 }, { "epoch": 0.94, "learning_rate": 1.7903540167072742e-07, "loss": 2.4675, "step": 20740 }, { "epoch": 0.94, "learning_rate": 1.7635111953762218e-07, "loss": 2.4717, "step": 20750 }, { "epoch": 0.94, "learning_rate": 1.7368693342605824e-07, "loss": 2.469, "step": 20760 }, { "epoch": 0.94, "learning_rate": 1.7104284878609934e-07, "loss": 2.4535, "step": 20770 }, { "epoch": 0.94, "learning_rate": 1.684188710266943e-07, "loss": 2.5172, "step": 20780 }, { "epoch": 0.94, "learning_rate": 1.6581500551565267e-07, "loss": 2.4841, "step": 20790 }, { "epoch": 0.94, "learning_rate": 1.6323125757964799e-07, "loss": 2.451, "step": 20800 }, { "epoch": 0.94, "learning_rate": 1.6066763250419338e-07, "loss": 2.449, "step": 20810 }, { "epoch": 0.94, "learning_rate": 1.5812413553364381e-07, "loss": 2.4326, "step": 20820 }, { "epoch": 0.94, "learning_rate": 1.5560077187117162e-07, "loss": 2.4364, "step": 20830 }, { "epoch": 0.94, "learning_rate": 1.5309754667876652e-07, "loss": 2.4445, "step": 20840 }, { "epoch": 0.94, "learning_rate": 1.5061446507722232e-07, "loss": 2.4838, "step": 20850 }, { "epoch": 0.95, "learning_rate": 1.4815153214612243e-07, "loss": 2.4709, "step": 20860 }, { "epoch": 0.95, "learning_rate": 1.4570875292383657e-07, "loss": 2.4599, "step": 20870 }, { "epoch": 0.95, "learning_rate": 1.432861324075019e-07, "loss": 2.4633, "step": 20880 }, { "epoch": 0.95, "learning_rate": 1.4088367555302185e-07, "loss": 2.4856, "step": 20890 }, { "epoch": 0.95, "learning_rate": 1.3850138727504735e-07, "loss": 2.4685, "step": 20900 }, { "epoch": 0.95, "learning_rate": 1.3613927244697567e-07, "loss": 2.4809, "step": 20910 }, { "epoch": 0.95, "learning_rate": 1.3379733590092924e-07, "loss": 2.4867, "step": 20920 }, { "epoch": 0.95, "learning_rate": 1.3147558242776027e-07, "loss": 2.4943, "step": 20930 }, { "epoch": 0.95, "learning_rate": 1.291740167770239e-07, "loss": 2.4762, "step": 20940 }, { "epoch": 0.95, "learning_rate": 1.2689264365698506e-07, "loss": 2.4526, "step": 20950 }, { "epoch": 0.95, "learning_rate": 1.246314677345972e-07, "loss": 2.4894, "step": 20960 }, { "epoch": 0.95, "learning_rate": 1.2239049363549916e-07, "loss": 2.4844, "step": 20970 }, { "epoch": 0.95, "learning_rate": 1.2016972594400155e-07, "loss": 2.4698, "step": 20980 }, { "epoch": 0.95, "learning_rate": 1.1796916920308043e-07, "loss": 2.4719, "step": 20990 }, { "epoch": 0.95, "learning_rate": 1.1578882791436374e-07, "loss": 2.5049, "step": 21000 }, { "epoch": 0.95, "learning_rate": 1.136287065381303e-07, "loss": 2.4826, "step": 21010 }, { "epoch": 0.95, "learning_rate": 1.11488809493292e-07, "loss": 2.4676, "step": 21020 }, { "epoch": 0.95, "learning_rate": 1.0936914115738717e-07, "loss": 2.4727, "step": 21030 }, { "epoch": 0.95, "learning_rate": 1.0726970586657726e-07, "loss": 2.4444, "step": 21040 }, { "epoch": 0.95, "learning_rate": 1.051905079156279e-07, "loss": 2.4453, "step": 21050 }, { "epoch": 0.95, "learning_rate": 1.0313155155791122e-07, "loss": 2.4894, "step": 21060 }, { "epoch": 0.95, "learning_rate": 1.0109284100538575e-07, "loss": 2.4744, "step": 21070 }, { "epoch": 0.96, "learning_rate": 9.907438042859874e-08, "loss": 2.4806, "step": 21080 }, { "epoch": 0.96, "learning_rate": 9.707617395666946e-08, "loss": 2.469, "step": 21090 }, { "epoch": 0.96, "learning_rate": 9.509822567728588e-08, "loss": 2.4879, "step": 21100 }, { "epoch": 0.96, "learning_rate": 9.314053963669245e-08, "loss": 2.461, "step": 21110 }, { "epoch": 0.96, "learning_rate": 9.120311983968455e-08, "loss": 2.4678, "step": 21120 }, { "epoch": 0.96, "learning_rate": 8.92859702495974e-08, "loss": 2.4626, "step": 21130 }, { "epoch": 0.96, "learning_rate": 8.738909478830382e-08, "loss": 2.4806, "step": 21140 }, { "epoch": 0.96, "learning_rate": 8.551249733619981e-08, "loss": 2.4655, "step": 21150 }, { "epoch": 0.96, "learning_rate": 8.365618173219791e-08, "loss": 2.4862, "step": 21160 }, { "epoch": 0.96, "learning_rate": 8.182015177372271e-08, "loss": 2.4743, "step": 21170 }, { "epoch": 0.96, "learning_rate": 8.000441121669978e-08, "loss": 2.486, "step": 21180 }, { "epoch": 0.96, "learning_rate": 7.82089637755501e-08, "loss": 2.4695, "step": 21190 }, { "epoch": 0.96, "learning_rate": 7.643381312318121e-08, "loss": 2.4692, "step": 21200 }, { "epoch": 0.96, "learning_rate": 7.467896289097831e-08, "loss": 2.4777, "step": 21210 }, { "epoch": 0.96, "learning_rate": 7.294441666880092e-08, "loss": 2.4675, "step": 21220 }, { "epoch": 0.96, "learning_rate": 7.123017800497067e-08, "loss": 2.4658, "step": 21230 }, { "epoch": 0.96, "learning_rate": 6.95362504062691e-08, "loss": 2.4707, "step": 21240 }, { "epoch": 0.96, "learning_rate": 6.786263733792542e-08, "loss": 2.4809, "step": 21250 }, { "epoch": 0.96, "learning_rate": 6.62093422236132e-08, "loss": 2.4608, "step": 21260 }, { "epoch": 0.96, "learning_rate": 6.457636844544258e-08, "loss": 2.453, "step": 21270 }, { "epoch": 0.96, "learning_rate": 6.29637193439503e-08, "loss": 2.4788, "step": 21280 }, { "epoch": 0.96, "learning_rate": 6.137139821809857e-08, "loss": 2.4687, "step": 21290 }, { "epoch": 0.97, "learning_rate": 5.979940832526399e-08, "loss": 2.4851, "step": 21300 }, { "epoch": 0.97, "learning_rate": 5.8247752881228634e-08, "loss": 2.4515, "step": 21310 }, { "epoch": 0.97, "learning_rate": 5.671643506018343e-08, "loss": 2.4553, "step": 21320 }, { "epoch": 0.97, "learning_rate": 5.520545799470811e-08, "loss": 2.4838, "step": 21330 }, { "epoch": 0.97, "learning_rate": 5.371482477577683e-08, "loss": 2.4465, "step": 21340 }, { "epoch": 0.97, "learning_rate": 5.224453845274591e-08, "loss": 2.4926, "step": 21350 }, { "epoch": 0.97, "learning_rate": 5.079460203334608e-08, "loss": 2.4768, "step": 21360 }, { "epoch": 0.97, "learning_rate": 4.936501848368025e-08, "loss": 2.5071, "step": 21370 }, { "epoch": 0.97, "learning_rate": 4.795579072821688e-08, "loss": 2.454, "step": 21380 }, { "epoch": 0.97, "learning_rate": 4.656692164978216e-08, "loss": 2.4656, "step": 21390 }, { "epoch": 0.97, "learning_rate": 4.519841408955339e-08, "loss": 2.467, "step": 21400 }, { "epoch": 0.97, "learning_rate": 4.385027084705673e-08, "loss": 2.454, "step": 21410 }, { "epoch": 0.97, "learning_rate": 4.252249468016056e-08, "loss": 2.4797, "step": 21420 }, { "epoch": 0.97, "learning_rate": 4.1215088305065484e-08, "loss": 2.4861, "step": 21430 }, { "epoch": 0.97, "learning_rate": 3.992805439630432e-08, "loss": 2.4652, "step": 21440 }, { "epoch": 0.97, "learning_rate": 3.866139558673432e-08, "loss": 2.4647, "step": 21450 }, { "epoch": 0.97, "learning_rate": 3.741511446753165e-08, "loss": 2.4553, "step": 21460 }, { "epoch": 0.97, "learning_rate": 3.618921358818473e-08, "loss": 2.4962, "step": 21470 }, { "epoch": 0.97, "learning_rate": 3.498369545649416e-08, "loss": 2.4711, "step": 21480 }, { "epoch": 0.97, "learning_rate": 3.379856253855951e-08, "loss": 2.4661, "step": 21490 }, { "epoch": 0.97, "learning_rate": 3.2633817258782565e-08, "loss": 2.4791, "step": 21500 }, { "epoch": 0.97, "learning_rate": 3.1489461999855144e-08, "loss": 2.4574, "step": 21510 }, { "epoch": 0.98, "learning_rate": 3.036549910276243e-08, "loss": 2.4967, "step": 21520 }, { "epoch": 0.98, "learning_rate": 2.926193086676743e-08, "loss": 2.4959, "step": 21530 }, { "epoch": 0.98, "learning_rate": 2.817875954941762e-08, "loss": 2.4495, "step": 21540 }, { "epoch": 0.98, "learning_rate": 2.7115987366530537e-08, "loss": 2.4738, "step": 21550 }, { "epoch": 0.98, "learning_rate": 2.6073616492198196e-08, "loss": 2.4714, "step": 21560 }, { "epoch": 0.98, "learning_rate": 2.5051649058773775e-08, "loss": 2.472, "step": 21570 }, { "epoch": 0.98, "learning_rate": 2.4050087156874957e-08, "loss": 2.4784, "step": 21580 }, { "epoch": 0.98, "learning_rate": 2.3068932835373924e-08, "loss": 2.4815, "step": 21590 }, { "epoch": 0.98, "learning_rate": 2.2108188101395147e-08, "loss": 2.4427, "step": 21600 }, { "epoch": 0.98, "learning_rate": 2.1167854920315367e-08, "loss": 2.4588, "step": 21610 }, { "epoch": 0.98, "learning_rate": 2.0247935215751414e-08, "loss": 2.4685, "step": 21620 }, { "epoch": 0.98, "learning_rate": 1.9348430869564616e-08, "loss": 2.4702, "step": 21630 }, { "epoch": 0.98, "learning_rate": 1.846934372184861e-08, "loss": 2.4726, "step": 21640 }, { "epoch": 0.98, "learning_rate": 1.7610675570934876e-08, "loss": 2.4615, "step": 21650 }, { "epoch": 0.98, "learning_rate": 1.677242817338165e-08, "loss": 2.4679, "step": 21660 }, { "epoch": 0.98, "learning_rate": 1.595460324397169e-08, "loss": 2.4779, "step": 21670 }, { "epoch": 0.98, "learning_rate": 1.5157202455712285e-08, "loss": 2.4525, "step": 21680 }, { "epoch": 0.98, "learning_rate": 1.4380227439828586e-08, "loss": 2.4903, "step": 21690 }, { "epoch": 0.98, "learning_rate": 1.3623679785761402e-08, "loss": 2.4855, "step": 21700 }, { "epoch": 0.98, "learning_rate": 1.2887561041162732e-08, "loss": 2.4625, "step": 21710 }, { "epoch": 0.98, "learning_rate": 1.2171872711895794e-08, "loss": 2.4875, "step": 21720 }, { "epoch": 0.98, "learning_rate": 1.147661626202723e-08, "loss": 2.4627, "step": 21730 }, { "epoch": 0.99, "learning_rate": 1.080179311382823e-08, "loss": 2.4736, "step": 21740 }, { "epoch": 0.99, "learning_rate": 1.0147404647770087e-08, "loss": 2.4471, "step": 21750 }, { "epoch": 0.99, "learning_rate": 9.513452202519757e-09, "loss": 2.4759, "step": 21760 }, { "epoch": 0.99, "learning_rate": 8.899937074940967e-09, "loss": 2.4866, "step": 21770 }, { "epoch": 0.99, "learning_rate": 8.306860520086446e-09, "loss": 2.4661, "step": 21780 }, { "epoch": 0.99, "learning_rate": 7.734223751201253e-09, "loss": 2.4702, "step": 21790 }, { "epoch": 0.99, "learning_rate": 7.182027939715008e-09, "loss": 2.4996, "step": 21800 }, { "epoch": 0.99, "learning_rate": 6.650274215243002e-09, "loss": 2.4813, "step": 21810 }, { "epoch": 0.99, "learning_rate": 6.138963665581754e-09, "loss": 2.4799, "step": 21820 }, { "epoch": 0.99, "learning_rate": 5.648097336709013e-09, "loss": 2.4832, "step": 21830 }, { "epoch": 0.99, "learning_rate": 5.177676232777096e-09, "loss": 2.4554, "step": 21840 }, { "epoch": 0.99, "learning_rate": 4.727701316117328e-09, "loss": 2.4958, "step": 21850 }, { "epoch": 0.99, "learning_rate": 4.298173507233383e-09, "loss": 2.4828, "step": 21860 }, { "epoch": 0.99, "learning_rate": 3.889093684801282e-09, "loss": 2.4568, "step": 21870 }, { "epoch": 0.99, "learning_rate": 3.5004626856660615e-09, "loss": 2.4764, "step": 21880 }, { "epoch": 0.99, "learning_rate": 3.1322813048406674e-09, "loss": 2.4882, "step": 21890 }, { "epoch": 0.99, "learning_rate": 2.78455029550706e-09, "loss": 2.4632, "step": 21900 }, { "epoch": 0.99, "learning_rate": 2.457270369010667e-09, "loss": 2.4827, "step": 21910 }, { "epoch": 0.99, "learning_rate": 2.1504421948603804e-09, "loss": 2.4838, "step": 21920 }, { "epoch": 0.99, "learning_rate": 1.864066400727449e-09, "loss": 2.4774, "step": 21930 }, { "epoch": 0.99, "learning_rate": 1.5981435724454764e-09, "loss": 2.4562, "step": 21940 }, { "epoch": 0.99, "learning_rate": 1.3526742540070913e-09, "loss": 2.4619, "step": 21950 }, { "epoch": 1.0, "learning_rate": 1.1276589475617272e-09, "loss": 2.4786, "step": 21960 }, { "epoch": 1.0, "learning_rate": 9.230981134200623e-10, "loss": 2.4812, "step": 21970 }, { "epoch": 1.0, "learning_rate": 7.389921700462488e-10, "loss": 2.4755, "step": 21980 }, { "epoch": 1.0, "learning_rate": 5.753414940623536e-10, "loss": 2.4642, "step": 21990 }, { "epoch": 1.0, "learning_rate": 4.321464202450276e-10, "loss": 2.4525, "step": 22000 }, { "epoch": 1.0, "eval_accuracy": 0.5417711580252581, "eval_loss": 2.2337894439697266, "eval_runtime": 9.8291, "eval_samples_per_second": 139.076, "eval_steps_per_second": 1.119, "step": 22000 }, { "epoch": 1.0, "learning_rate": 3.094072415255056e-10, "loss": 2.4573, "step": 22010 }, { "epoch": 1.0, "learning_rate": 2.0712420898849616e-10, "loss": 2.4752, "step": 22020 }, { "epoch": 1.0, "learning_rate": 1.2529753187218164e-10, "loss": 2.4564, "step": 22030 }, { "epoch": 1.0, "learning_rate": 6.392737756710788e-11, "loss": 2.4718, "step": 22040 }, { "epoch": 1.0, "learning_rate": 2.3013871617294513e-11, "loss": 2.4656, "step": 22050 }, { "epoch": 1.0, "learning_rate": 2.5570977180144896e-12, "loss": 2.4693, "step": 22060 }, { "epoch": 1.0, "step": 22065, "total_flos": 1.2504673732201021e+20, "train_loss": 2.52519507538886, "train_runtime": 21683.5632, "train_samples_per_second": 130.248, "train_steps_per_second": 1.018 } ], "logging_steps": 10, "max_steps": 22065, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "total_flos": 1.2504673732201021e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }