{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998272187985947, "eval_steps": 500, "global_step": 4340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023037493520704948, "grad_norm": 1594.4566650390625, "learning_rate": 1.1520737327188941e-07, "loss": 22.9062, "step": 1 }, { "epoch": 0.00046074987041409897, "grad_norm": 1567.3255615234375, "learning_rate": 2.3041474654377881e-07, "loss": 22.5337, "step": 2 }, { "epoch": 0.0006911248056211485, "grad_norm": 1452.3426513671875, "learning_rate": 3.456221198156682e-07, "loss": 17.3128, "step": 3 }, { "epoch": 0.0009214997408281979, "grad_norm": 1017.4400634765625, "learning_rate": 4.6082949308755763e-07, "loss": 13.3375, "step": 4 }, { "epoch": 0.0011518746760352473, "grad_norm": 932.8685302734375, "learning_rate": 5.760368663594471e-07, "loss": 13.7848, "step": 5 }, { "epoch": 0.001382249611242297, "grad_norm": 127.31488800048828, "learning_rate": 6.912442396313364e-07, "loss": 12.8038, "step": 6 }, { "epoch": 0.0016126245464493463, "grad_norm": 194.81959533691406, "learning_rate": 8.064516129032258e-07, "loss": 11.9004, "step": 7 }, { "epoch": 0.0018429994816563959, "grad_norm": 542.343505859375, "learning_rate": 9.216589861751153e-07, "loss": 12.7472, "step": 8 }, { "epoch": 0.0020733744168634455, "grad_norm": 505.3138732910156, "learning_rate": 1.0368663594470047e-06, "loss": 12.0547, "step": 9 }, { "epoch": 0.0023037493520704946, "grad_norm": 113.53895568847656, "learning_rate": 1.1520737327188942e-06, "loss": 10.6272, "step": 10 }, { "epoch": 0.002534124287277544, "grad_norm": 180.7011260986328, "learning_rate": 1.2672811059907836e-06, "loss": 10.1842, "step": 11 }, { "epoch": 0.002764499222484594, "grad_norm": 567.2562255859375, "learning_rate": 1.3824884792626729e-06, "loss": 10.3718, "step": 12 }, { "epoch": 0.002994874157691643, "grad_norm": 291.08685302734375, "learning_rate": 1.4976958525345621e-06, "loss": 9.2942, "step": 13 }, { "epoch": 0.0032252490928986926, "grad_norm": 115.84230041503906, "learning_rate": 1.6129032258064516e-06, "loss": 9.5537, "step": 14 }, { "epoch": 0.003455624028105742, "grad_norm": 101.48867797851562, "learning_rate": 1.7281105990783413e-06, "loss": 9.2961, "step": 15 }, { "epoch": 0.0036859989633127917, "grad_norm": 82.81599426269531, "learning_rate": 1.8433179723502305e-06, "loss": 8.8687, "step": 16 }, { "epoch": 0.003916373898519841, "grad_norm": 280.5905456542969, "learning_rate": 1.9585253456221198e-06, "loss": 8.986, "step": 17 }, { "epoch": 0.004146748833726891, "grad_norm": 94.63760375976562, "learning_rate": 2.0737327188940094e-06, "loss": 8.5743, "step": 18 }, { "epoch": 0.00437712376893394, "grad_norm": 84.24728393554688, "learning_rate": 2.1889400921658987e-06, "loss": 8.4601, "step": 19 }, { "epoch": 0.004607498704140989, "grad_norm": 49.04090118408203, "learning_rate": 2.3041474654377884e-06, "loss": 8.2599, "step": 20 }, { "epoch": 0.004837873639348039, "grad_norm": 399.87890625, "learning_rate": 2.4193548387096776e-06, "loss": 8.5471, "step": 21 }, { "epoch": 0.005068248574555088, "grad_norm": 227.25799560546875, "learning_rate": 2.5345622119815673e-06, "loss": 8.2131, "step": 22 }, { "epoch": 0.005298623509762138, "grad_norm": 59.213783264160156, "learning_rate": 2.6497695852534565e-06, "loss": 7.8614, "step": 23 }, { "epoch": 0.005528998444969188, "grad_norm": 48.93996047973633, "learning_rate": 2.7649769585253458e-06, "loss": 7.8214, "step": 24 }, { "epoch": 0.005759373380176237, "grad_norm": 172.89723205566406, "learning_rate": 2.880184331797235e-06, "loss": 7.7157, "step": 25 }, { "epoch": 0.005989748315383286, "grad_norm": 74.95767974853516, "learning_rate": 2.9953917050691243e-06, "loss": 7.5954, "step": 26 }, { "epoch": 0.006220123250590336, "grad_norm": 43.817466735839844, "learning_rate": 3.110599078341014e-06, "loss": 7.5198, "step": 27 }, { "epoch": 0.006450498185797385, "grad_norm": 36.20148849487305, "learning_rate": 3.225806451612903e-06, "loss": 7.449, "step": 28 }, { "epoch": 0.006680873121004435, "grad_norm": 158.88330078125, "learning_rate": 3.341013824884793e-06, "loss": 7.5168, "step": 29 }, { "epoch": 0.006911248056211484, "grad_norm": 193.55319213867188, "learning_rate": 3.4562211981566825e-06, "loss": 7.664, "step": 30 }, { "epoch": 0.0071416229914185335, "grad_norm": 96.09593200683594, "learning_rate": 3.5714285714285714e-06, "loss": 7.4555, "step": 31 }, { "epoch": 0.0073719979266255835, "grad_norm": 44.393035888671875, "learning_rate": 3.686635944700461e-06, "loss": 7.1901, "step": 32 }, { "epoch": 0.007602372861832633, "grad_norm": 86.88634490966797, "learning_rate": 3.8018433179723507e-06, "loss": 7.1674, "step": 33 }, { "epoch": 0.007832747797039682, "grad_norm": 39.123741149902344, "learning_rate": 3.9170506912442395e-06, "loss": 7.1132, "step": 34 }, { "epoch": 0.008063122732246731, "grad_norm": 69.69256591796875, "learning_rate": 4.032258064516129e-06, "loss": 7.0398, "step": 35 }, { "epoch": 0.008293497667453782, "grad_norm": 33.09981155395508, "learning_rate": 4.147465437788019e-06, "loss": 6.7979, "step": 36 }, { "epoch": 0.008523872602660831, "grad_norm": 56.03227233886719, "learning_rate": 4.2626728110599085e-06, "loss": 6.8593, "step": 37 }, { "epoch": 0.00875424753786788, "grad_norm": 88.85287475585938, "learning_rate": 4.377880184331797e-06, "loss": 6.9709, "step": 38 }, { "epoch": 0.00898462247307493, "grad_norm": 32.88846206665039, "learning_rate": 4.493087557603687e-06, "loss": 6.678, "step": 39 }, { "epoch": 0.009214997408281978, "grad_norm": 37.6388053894043, "learning_rate": 4.608294930875577e-06, "loss": 6.6369, "step": 40 }, { "epoch": 0.009445372343489028, "grad_norm": 43.54283905029297, "learning_rate": 4.7235023041474655e-06, "loss": 6.6535, "step": 41 }, { "epoch": 0.009675747278696079, "grad_norm": 99.49658966064453, "learning_rate": 4.838709677419355e-06, "loss": 6.9365, "step": 42 }, { "epoch": 0.009906122213903128, "grad_norm": 41.618350982666016, "learning_rate": 4.953917050691245e-06, "loss": 6.5855, "step": 43 }, { "epoch": 0.010136497149110177, "grad_norm": 45.60376739501953, "learning_rate": 5.0691244239631346e-06, "loss": 6.6531, "step": 44 }, { "epoch": 0.010366872084317226, "grad_norm": 33.11777114868164, "learning_rate": 5.184331797235023e-06, "loss": 6.6513, "step": 45 }, { "epoch": 0.010597247019524275, "grad_norm": 48.56146240234375, "learning_rate": 5.299539170506913e-06, "loss": 6.5863, "step": 46 }, { "epoch": 0.010827621954731326, "grad_norm": 34.525150299072266, "learning_rate": 5.414746543778802e-06, "loss": 6.5775, "step": 47 }, { "epoch": 0.011057996889938375, "grad_norm": 29.672636032104492, "learning_rate": 5.5299539170506915e-06, "loss": 6.4631, "step": 48 }, { "epoch": 0.011288371825145424, "grad_norm": 112.9600830078125, "learning_rate": 5.64516129032258e-06, "loss": 6.6074, "step": 49 }, { "epoch": 0.011518746760352474, "grad_norm": 39.32796859741211, "learning_rate": 5.76036866359447e-06, "loss": 6.5566, "step": 50 }, { "epoch": 0.011749121695559523, "grad_norm": 60.1692008972168, "learning_rate": 5.87557603686636e-06, "loss": 6.5016, "step": 51 }, { "epoch": 0.011979496630766572, "grad_norm": 25.19661521911621, "learning_rate": 5.9907834101382485e-06, "loss": 6.2654, "step": 52 }, { "epoch": 0.012209871565973623, "grad_norm": 65.47019958496094, "learning_rate": 6.105990783410138e-06, "loss": 6.4761, "step": 53 }, { "epoch": 0.012440246501180672, "grad_norm": 40.08528137207031, "learning_rate": 6.221198156682028e-06, "loss": 6.5496, "step": 54 }, { "epoch": 0.012670621436387721, "grad_norm": 32.989341735839844, "learning_rate": 6.3364055299539176e-06, "loss": 6.5793, "step": 55 }, { "epoch": 0.01290099637159477, "grad_norm": 26.665611267089844, "learning_rate": 6.451612903225806e-06, "loss": 6.2146, "step": 56 }, { "epoch": 0.01313137130680182, "grad_norm": 236.6200714111328, "learning_rate": 6.566820276497695e-06, "loss": 7.9681, "step": 57 }, { "epoch": 0.01336174624200887, "grad_norm": 367.0509033203125, "learning_rate": 6.682027649769586e-06, "loss": 8.8008, "step": 58 }, { "epoch": 0.01359212117721592, "grad_norm": 99.5822525024414, "learning_rate": 6.7972350230414745e-06, "loss": 7.0279, "step": 59 }, { "epoch": 0.013822496112422969, "grad_norm": 25.1353759765625, "learning_rate": 6.912442396313365e-06, "loss": 6.3138, "step": 60 }, { "epoch": 0.014052871047630018, "grad_norm": 39.076698303222656, "learning_rate": 7.027649769585254e-06, "loss": 6.6497, "step": 61 }, { "epoch": 0.014283245982837067, "grad_norm": 31.783367156982422, "learning_rate": 7.142857142857143e-06, "loss": 6.5285, "step": 62 }, { "epoch": 0.014513620918044116, "grad_norm": 34.101097106933594, "learning_rate": 7.258064516129033e-06, "loss": 6.6195, "step": 63 }, { "epoch": 0.014743995853251167, "grad_norm": 183.534912109375, "learning_rate": 7.373271889400922e-06, "loss": 7.3999, "step": 64 }, { "epoch": 0.014974370788458216, "grad_norm": 108.92210388183594, "learning_rate": 7.488479262672811e-06, "loss": 7.3254, "step": 65 }, { "epoch": 0.015204745723665265, "grad_norm": 44.97346115112305, "learning_rate": 7.603686635944701e-06, "loss": 6.548, "step": 66 }, { "epoch": 0.015435120658872314, "grad_norm": 20.90800666809082, "learning_rate": 7.71889400921659e-06, "loss": 6.2843, "step": 67 }, { "epoch": 0.015665495594079364, "grad_norm": 34.19648361206055, "learning_rate": 7.834101382488479e-06, "loss": 6.5051, "step": 68 }, { "epoch": 0.015895870529286413, "grad_norm": 138.57003784179688, "learning_rate": 7.949308755760369e-06, "loss": 6.9987, "step": 69 }, { "epoch": 0.016126245464493462, "grad_norm": 66.55327606201172, "learning_rate": 8.064516129032258e-06, "loss": 6.9552, "step": 70 }, { "epoch": 0.01635662039970051, "grad_norm": 23.039445877075195, "learning_rate": 8.179723502304148e-06, "loss": 6.6434, "step": 71 }, { "epoch": 0.016586995334907564, "grad_norm": 109.40170288085938, "learning_rate": 8.294930875576038e-06, "loss": 7.2498, "step": 72 }, { "epoch": 0.016817370270114613, "grad_norm": 68.2265853881836, "learning_rate": 8.410138248847926e-06, "loss": 7.0412, "step": 73 }, { "epoch": 0.017047745205321662, "grad_norm": 103.85686492919922, "learning_rate": 8.525345622119817e-06, "loss": 7.0934, "step": 74 }, { "epoch": 0.01727812014052871, "grad_norm": 23.992341995239258, "learning_rate": 8.640552995391705e-06, "loss": 6.9462, "step": 75 }, { "epoch": 0.01750849507573576, "grad_norm": 30.483966827392578, "learning_rate": 8.755760368663595e-06, "loss": 6.9441, "step": 76 }, { "epoch": 0.01773887001094281, "grad_norm": 110.04121398925781, "learning_rate": 8.870967741935484e-06, "loss": 7.2447, "step": 77 }, { "epoch": 0.01796924494614986, "grad_norm": 46.26778030395508, "learning_rate": 8.986175115207374e-06, "loss": 6.7643, "step": 78 }, { "epoch": 0.018199619881356908, "grad_norm": 32.12928009033203, "learning_rate": 9.101382488479262e-06, "loss": 6.7936, "step": 79 }, { "epoch": 0.018429994816563957, "grad_norm": 19.87050437927246, "learning_rate": 9.216589861751153e-06, "loss": 6.6529, "step": 80 }, { "epoch": 0.018660369751771006, "grad_norm": 51.821712493896484, "learning_rate": 9.331797235023041e-06, "loss": 6.7386, "step": 81 }, { "epoch": 0.018890744686978055, "grad_norm": 24.177959442138672, "learning_rate": 9.447004608294931e-06, "loss": 6.5528, "step": 82 }, { "epoch": 0.019121119622185108, "grad_norm": 32.943119049072266, "learning_rate": 9.56221198156682e-06, "loss": 6.6655, "step": 83 }, { "epoch": 0.019351494557392157, "grad_norm": 33.819091796875, "learning_rate": 9.67741935483871e-06, "loss": 6.5115, "step": 84 }, { "epoch": 0.019581869492599206, "grad_norm": 79.07975006103516, "learning_rate": 9.7926267281106e-06, "loss": 6.7879, "step": 85 }, { "epoch": 0.019812244427806255, "grad_norm": 20.27090072631836, "learning_rate": 9.90783410138249e-06, "loss": 6.7788, "step": 86 }, { "epoch": 0.020042619363013305, "grad_norm": 69.12643432617188, "learning_rate": 1.0023041474654378e-05, "loss": 6.8325, "step": 87 }, { "epoch": 0.020272994298220354, "grad_norm": 15.358186721801758, "learning_rate": 1.0138248847926269e-05, "loss": 6.5288, "step": 88 }, { "epoch": 0.020503369233427403, "grad_norm": 154.51055908203125, "learning_rate": 1.0253456221198157e-05, "loss": 7.978, "step": 89 }, { "epoch": 0.020733744168634452, "grad_norm": 32.67668533325195, "learning_rate": 1.0368663594470047e-05, "loss": 6.6559, "step": 90 }, { "epoch": 0.0209641191038415, "grad_norm": 20.563152313232422, "learning_rate": 1.0483870967741936e-05, "loss": 6.9109, "step": 91 }, { "epoch": 0.02119449403904855, "grad_norm": 15.178911209106445, "learning_rate": 1.0599078341013826e-05, "loss": 7.0306, "step": 92 }, { "epoch": 0.0214248689742556, "grad_norm": 18.797069549560547, "learning_rate": 1.0714285714285714e-05, "loss": 6.7414, "step": 93 }, { "epoch": 0.021655243909462652, "grad_norm": 25.08207893371582, "learning_rate": 1.0829493087557604e-05, "loss": 6.4629, "step": 94 }, { "epoch": 0.0218856188446697, "grad_norm": 147.73683166503906, "learning_rate": 1.0944700460829493e-05, "loss": 8.1874, "step": 95 }, { "epoch": 0.02211599377987675, "grad_norm": 120.57660675048828, "learning_rate": 1.1059907834101383e-05, "loss": 8.6597, "step": 96 }, { "epoch": 0.0223463687150838, "grad_norm": 119.59420776367188, "learning_rate": 1.1175115207373273e-05, "loss": 7.2706, "step": 97 }, { "epoch": 0.02257674365029085, "grad_norm": 25.834177017211914, "learning_rate": 1.129032258064516e-05, "loss": 6.8541, "step": 98 }, { "epoch": 0.022807118585497898, "grad_norm": 21.855329513549805, "learning_rate": 1.1405529953917052e-05, "loss": 6.6759, "step": 99 }, { "epoch": 0.023037493520704947, "grad_norm": 18.868379592895508, "learning_rate": 1.152073732718894e-05, "loss": 6.5239, "step": 100 }, { "epoch": 0.023267868455911996, "grad_norm": 41.770118713378906, "learning_rate": 1.163594470046083e-05, "loss": 6.7153, "step": 101 }, { "epoch": 0.023498243391119045, "grad_norm": 11.46066951751709, "learning_rate": 1.175115207373272e-05, "loss": 6.5468, "step": 102 }, { "epoch": 0.023728618326326095, "grad_norm": 20.574621200561523, "learning_rate": 1.1866359447004609e-05, "loss": 6.6786, "step": 103 }, { "epoch": 0.023958993261533144, "grad_norm": 15.287468910217285, "learning_rate": 1.1981566820276497e-05, "loss": 6.5862, "step": 104 }, { "epoch": 0.024189368196740196, "grad_norm": 28.37909507751465, "learning_rate": 1.2096774193548388e-05, "loss": 6.6368, "step": 105 }, { "epoch": 0.024419743131947245, "grad_norm": 29.917186737060547, "learning_rate": 1.2211981566820276e-05, "loss": 6.7996, "step": 106 }, { "epoch": 0.024650118067154295, "grad_norm": 13.021017074584961, "learning_rate": 1.2327188940092166e-05, "loss": 6.3875, "step": 107 }, { "epoch": 0.024880493002361344, "grad_norm": 16.882863998413086, "learning_rate": 1.2442396313364056e-05, "loss": 6.6672, "step": 108 }, { "epoch": 0.025110867937568393, "grad_norm": 15.507762908935547, "learning_rate": 1.2557603686635947e-05, "loss": 6.2909, "step": 109 }, { "epoch": 0.025341242872775442, "grad_norm": 28.54067039489746, "learning_rate": 1.2672811059907835e-05, "loss": 8.1596, "step": 110 }, { "epoch": 0.02557161780798249, "grad_norm": 60.45378875732422, "learning_rate": 1.2788018433179725e-05, "loss": 7.9875, "step": 111 }, { "epoch": 0.02580199274318954, "grad_norm": 69.33523559570312, "learning_rate": 1.2903225806451613e-05, "loss": 8.7867, "step": 112 }, { "epoch": 0.02603236767839659, "grad_norm": 43.21647262573242, "learning_rate": 1.3018433179723502e-05, "loss": 8.2251, "step": 113 }, { "epoch": 0.02626274261360364, "grad_norm": 16.18756866455078, "learning_rate": 1.313364055299539e-05, "loss": 7.3762, "step": 114 }, { "epoch": 0.026493117548810688, "grad_norm": 13.258502006530762, "learning_rate": 1.3248847926267283e-05, "loss": 7.3179, "step": 115 }, { "epoch": 0.02672349248401774, "grad_norm": 17.896329879760742, "learning_rate": 1.3364055299539171e-05, "loss": 7.1203, "step": 116 }, { "epoch": 0.02695386741922479, "grad_norm": 14.105202674865723, "learning_rate": 1.3479262672811061e-05, "loss": 6.8528, "step": 117 }, { "epoch": 0.02718424235443184, "grad_norm": 19.443058013916016, "learning_rate": 1.3594470046082949e-05, "loss": 6.5741, "step": 118 }, { "epoch": 0.027414617289638888, "grad_norm": 63.326812744140625, "learning_rate": 1.3709677419354839e-05, "loss": 7.2936, "step": 119 }, { "epoch": 0.027644992224845937, "grad_norm": 41.20466232299805, "learning_rate": 1.382488479262673e-05, "loss": 7.5243, "step": 120 }, { "epoch": 0.027875367160052986, "grad_norm": 28.661746978759766, "learning_rate": 1.3940092165898618e-05, "loss": 7.2676, "step": 121 }, { "epoch": 0.028105742095260036, "grad_norm": 24.014629364013672, "learning_rate": 1.4055299539170508e-05, "loss": 6.949, "step": 122 }, { "epoch": 0.028336117030467085, "grad_norm": 13.124433517456055, "learning_rate": 1.4170506912442397e-05, "loss": 6.721, "step": 123 }, { "epoch": 0.028566491965674134, "grad_norm": 8.793283462524414, "learning_rate": 1.4285714285714285e-05, "loss": 6.7842, "step": 124 }, { "epoch": 0.028796866900881183, "grad_norm": 26.413114547729492, "learning_rate": 1.4400921658986175e-05, "loss": 7.0163, "step": 125 }, { "epoch": 0.029027241836088232, "grad_norm": 38.37158966064453, "learning_rate": 1.4516129032258066e-05, "loss": 7.0974, "step": 126 }, { "epoch": 0.029257616771295285, "grad_norm": 26.630558013916016, "learning_rate": 1.4631336405529954e-05, "loss": 7.1387, "step": 127 }, { "epoch": 0.029487991706502334, "grad_norm": 18.00218391418457, "learning_rate": 1.4746543778801844e-05, "loss": 7.1257, "step": 128 }, { "epoch": 0.029718366641709383, "grad_norm": 11.482783317565918, "learning_rate": 1.4861751152073732e-05, "loss": 6.8459, "step": 129 }, { "epoch": 0.029948741576916432, "grad_norm": 8.87709903717041, "learning_rate": 1.4976958525345622e-05, "loss": 6.7509, "step": 130 }, { "epoch": 0.03017911651212348, "grad_norm": 11.119589805603027, "learning_rate": 1.5092165898617513e-05, "loss": 6.6887, "step": 131 }, { "epoch": 0.03040949144733053, "grad_norm": 32.762054443359375, "learning_rate": 1.5207373271889403e-05, "loss": 6.7217, "step": 132 }, { "epoch": 0.03063986638253758, "grad_norm": 35.915977478027344, "learning_rate": 1.5322580645161292e-05, "loss": 7.0565, "step": 133 }, { "epoch": 0.03087024131774463, "grad_norm": 54.48796081542969, "learning_rate": 1.543778801843318e-05, "loss": 7.2302, "step": 134 }, { "epoch": 0.031100616252951678, "grad_norm": 29.374467849731445, "learning_rate": 1.555299539170507e-05, "loss": 6.9438, "step": 135 }, { "epoch": 0.03133099118815873, "grad_norm": 18.620025634765625, "learning_rate": 1.5668202764976958e-05, "loss": 6.9608, "step": 136 }, { "epoch": 0.031561366123365776, "grad_norm": 10.405224800109863, "learning_rate": 1.578341013824885e-05, "loss": 6.8175, "step": 137 }, { "epoch": 0.031791741058572826, "grad_norm": 8.030860900878906, "learning_rate": 1.5898617511520737e-05, "loss": 6.7288, "step": 138 }, { "epoch": 0.032022115993779875, "grad_norm": 8.503053665161133, "learning_rate": 1.6013824884792627e-05, "loss": 6.6433, "step": 139 }, { "epoch": 0.032252490928986924, "grad_norm": 8.725685119628906, "learning_rate": 1.6129032258064517e-05, "loss": 6.4787, "step": 140 }, { "epoch": 0.03248286586419397, "grad_norm": 12.266093254089355, "learning_rate": 1.6244239631336406e-05, "loss": 6.3937, "step": 141 }, { "epoch": 0.03271324079940102, "grad_norm": 9.070500373840332, "learning_rate": 1.6359447004608296e-05, "loss": 6.5592, "step": 142 }, { "epoch": 0.03294361573460807, "grad_norm": 16.0535831451416, "learning_rate": 1.6474654377880186e-05, "loss": 6.4269, "step": 143 }, { "epoch": 0.03317399066981513, "grad_norm": 9.274794578552246, "learning_rate": 1.6589861751152075e-05, "loss": 6.4934, "step": 144 }, { "epoch": 0.03340436560502218, "grad_norm": 7.2650251388549805, "learning_rate": 1.6705069124423965e-05, "loss": 6.3559, "step": 145 }, { "epoch": 0.033634740540229226, "grad_norm": 10.921993255615234, "learning_rate": 1.682027649769585e-05, "loss": 6.2568, "step": 146 }, { "epoch": 0.033865115475436275, "grad_norm": 10.156475067138672, "learning_rate": 1.693548387096774e-05, "loss": 6.3112, "step": 147 }, { "epoch": 0.034095490410643324, "grad_norm": 7.286623954772949, "learning_rate": 1.7050691244239634e-05, "loss": 6.1908, "step": 148 }, { "epoch": 0.03432586534585037, "grad_norm": 4.7289228439331055, "learning_rate": 1.7165898617511524e-05, "loss": 6.2857, "step": 149 }, { "epoch": 0.03455624028105742, "grad_norm": 10.633949279785156, "learning_rate": 1.728110599078341e-05, "loss": 6.2565, "step": 150 }, { "epoch": 0.03478661521626447, "grad_norm": 9.130669593811035, "learning_rate": 1.73963133640553e-05, "loss": 6.2454, "step": 151 }, { "epoch": 0.03501699015147152, "grad_norm": 6.253342151641846, "learning_rate": 1.751152073732719e-05, "loss": 6.2446, "step": 152 }, { "epoch": 0.03524736508667857, "grad_norm": 9.536768913269043, "learning_rate": 1.762672811059908e-05, "loss": 6.2483, "step": 153 }, { "epoch": 0.03547774002188562, "grad_norm": 7.208560943603516, "learning_rate": 1.774193548387097e-05, "loss": 6.3481, "step": 154 }, { "epoch": 0.03570811495709267, "grad_norm": 7.844919681549072, "learning_rate": 1.785714285714286e-05, "loss": 6.0237, "step": 155 }, { "epoch": 0.03593848989229972, "grad_norm": 19.922014236450195, "learning_rate": 1.7972350230414748e-05, "loss": 6.3382, "step": 156 }, { "epoch": 0.036168864827506766, "grad_norm": 7.899311542510986, "learning_rate": 1.8087557603686638e-05, "loss": 6.5564, "step": 157 }, { "epoch": 0.036399239762713816, "grad_norm": 4.210349082946777, "learning_rate": 1.8202764976958524e-05, "loss": 6.2467, "step": 158 }, { "epoch": 0.036629614697920865, "grad_norm": 9.54643726348877, "learning_rate": 1.8317972350230417e-05, "loss": 6.3017, "step": 159 }, { "epoch": 0.036859989633127914, "grad_norm": 8.60799789428711, "learning_rate": 1.8433179723502307e-05, "loss": 6.1425, "step": 160 }, { "epoch": 0.03709036456833496, "grad_norm": 5.543302536010742, "learning_rate": 1.8548387096774193e-05, "loss": 6.0362, "step": 161 }, { "epoch": 0.03732073950354201, "grad_norm": 16.04932975769043, "learning_rate": 1.8663594470046083e-05, "loss": 6.3585, "step": 162 }, { "epoch": 0.03755111443874906, "grad_norm": 7.42307186126709, "learning_rate": 1.8778801843317972e-05, "loss": 6.2374, "step": 163 }, { "epoch": 0.03778148937395611, "grad_norm": 5.0764594078063965, "learning_rate": 1.8894009216589862e-05, "loss": 6.1595, "step": 164 }, { "epoch": 0.03801186430916316, "grad_norm": 9.386462211608887, "learning_rate": 1.9009216589861752e-05, "loss": 6.172, "step": 165 }, { "epoch": 0.038242239244370216, "grad_norm": 6.157299518585205, "learning_rate": 1.912442396313364e-05, "loss": 6.3213, "step": 166 }, { "epoch": 0.038472614179577265, "grad_norm": 9.257742881774902, "learning_rate": 1.923963133640553e-05, "loss": 6.5346, "step": 167 }, { "epoch": 0.038702989114784314, "grad_norm": 8.435046195983887, "learning_rate": 1.935483870967742e-05, "loss": 6.546, "step": 168 }, { "epoch": 0.03893336404999136, "grad_norm": 4.502070903778076, "learning_rate": 1.9470046082949307e-05, "loss": 6.2328, "step": 169 }, { "epoch": 0.03916373898519841, "grad_norm": 7.6476545333862305, "learning_rate": 1.95852534562212e-05, "loss": 6.2247, "step": 170 }, { "epoch": 0.03939411392040546, "grad_norm": 21.256046295166016, "learning_rate": 1.970046082949309e-05, "loss": 6.4694, "step": 171 }, { "epoch": 0.03962448885561251, "grad_norm": 19.301620483398438, "learning_rate": 1.981566820276498e-05, "loss": 6.6163, "step": 172 }, { "epoch": 0.03985486379081956, "grad_norm": 5.733494758605957, "learning_rate": 1.9930875576036866e-05, "loss": 6.2254, "step": 173 }, { "epoch": 0.04008523872602661, "grad_norm": 6.285529613494873, "learning_rate": 2.0046082949308755e-05, "loss": 6.272, "step": 174 }, { "epoch": 0.04031561366123366, "grad_norm": 4.591073036193848, "learning_rate": 2.0161290322580645e-05, "loss": 6.1812, "step": 175 }, { "epoch": 0.04054598859644071, "grad_norm": 5.485518932342529, "learning_rate": 2.0276497695852538e-05, "loss": 6.3558, "step": 176 }, { "epoch": 0.04077636353164776, "grad_norm": 4.753672122955322, "learning_rate": 2.0391705069124424e-05, "loss": 6.1454, "step": 177 }, { "epoch": 0.041006738466854806, "grad_norm": 5.435968399047852, "learning_rate": 2.0506912442396314e-05, "loss": 6.07, "step": 178 }, { "epoch": 0.041237113402061855, "grad_norm": 8.77924633026123, "learning_rate": 2.0622119815668204e-05, "loss": 6.1585, "step": 179 }, { "epoch": 0.041467488337268904, "grad_norm": 5.366504192352295, "learning_rate": 2.0737327188940094e-05, "loss": 6.1213, "step": 180 }, { "epoch": 0.04169786327247595, "grad_norm": 5.897524356842041, "learning_rate": 2.0852534562211983e-05, "loss": 6.0698, "step": 181 }, { "epoch": 0.041928238207683, "grad_norm": 4.4051337242126465, "learning_rate": 2.0967741935483873e-05, "loss": 6.2967, "step": 182 }, { "epoch": 0.04215861314289005, "grad_norm": 3.155621290206909, "learning_rate": 2.1082949308755763e-05, "loss": 6.0862, "step": 183 }, { "epoch": 0.0423889880780971, "grad_norm": 9.620055198669434, "learning_rate": 2.1198156682027652e-05, "loss": 6.2186, "step": 184 }, { "epoch": 0.04261936301330415, "grad_norm": 10.667745590209961, "learning_rate": 2.131336405529954e-05, "loss": 6.2051, "step": 185 }, { "epoch": 0.0428497379485112, "grad_norm": 5.259674072265625, "learning_rate": 2.1428571428571428e-05, "loss": 6.3328, "step": 186 }, { "epoch": 0.04308011288371825, "grad_norm": 3.1524181365966797, "learning_rate": 2.154377880184332e-05, "loss": 6.2196, "step": 187 }, { "epoch": 0.043310487818925304, "grad_norm": 3.635998487472534, "learning_rate": 2.1658986175115207e-05, "loss": 5.9951, "step": 188 }, { "epoch": 0.04354086275413235, "grad_norm": 6.7658281326293945, "learning_rate": 2.1774193548387097e-05, "loss": 6.0388, "step": 189 }, { "epoch": 0.0437712376893394, "grad_norm": 4.929965019226074, "learning_rate": 2.1889400921658987e-05, "loss": 6.1855, "step": 190 }, { "epoch": 0.04400161262454645, "grad_norm": 6.018585681915283, "learning_rate": 2.2004608294930877e-05, "loss": 6.0664, "step": 191 }, { "epoch": 0.0442319875597535, "grad_norm": 3.4982683658599854, "learning_rate": 2.2119815668202766e-05, "loss": 5.9201, "step": 192 }, { "epoch": 0.04446236249496055, "grad_norm": 7.258645057678223, "learning_rate": 2.2235023041474656e-05, "loss": 6.1048, "step": 193 }, { "epoch": 0.0446927374301676, "grad_norm": 3.8074333667755127, "learning_rate": 2.2350230414746546e-05, "loss": 5.8739, "step": 194 }, { "epoch": 0.04492311236537465, "grad_norm": 4.930709362030029, "learning_rate": 2.2465437788018435e-05, "loss": 5.8946, "step": 195 }, { "epoch": 0.0451534873005817, "grad_norm": 5.257072448730469, "learning_rate": 2.258064516129032e-05, "loss": 6.1072, "step": 196 }, { "epoch": 0.04538386223578875, "grad_norm": 3.3343987464904785, "learning_rate": 2.269585253456221e-05, "loss": 5.7407, "step": 197 }, { "epoch": 0.045614237170995796, "grad_norm": 11.439895629882812, "learning_rate": 2.2811059907834104e-05, "loss": 6.0228, "step": 198 }, { "epoch": 0.045844612106202845, "grad_norm": 8.961471557617188, "learning_rate": 2.2926267281105994e-05, "loss": 5.8788, "step": 199 }, { "epoch": 0.046074987041409894, "grad_norm": 4.506409168243408, "learning_rate": 2.304147465437788e-05, "loss": 5.867, "step": 200 }, { "epoch": 0.04630536197661694, "grad_norm": 4.064340114593506, "learning_rate": 2.315668202764977e-05, "loss": 5.7824, "step": 201 }, { "epoch": 0.04653573691182399, "grad_norm": 3.9953746795654297, "learning_rate": 2.327188940092166e-05, "loss": 5.7914, "step": 202 }, { "epoch": 0.04676611184703104, "grad_norm": 6.593684196472168, "learning_rate": 2.338709677419355e-05, "loss": 5.6312, "step": 203 }, { "epoch": 0.04699648678223809, "grad_norm": 14.462120056152344, "learning_rate": 2.350230414746544e-05, "loss": 5.853, "step": 204 }, { "epoch": 0.04722686171744514, "grad_norm": 4.630040645599365, "learning_rate": 2.361751152073733e-05, "loss": 5.6611, "step": 205 }, { "epoch": 0.04745723665265219, "grad_norm": 4.87545919418335, "learning_rate": 2.3732718894009218e-05, "loss": 5.8721, "step": 206 }, { "epoch": 0.04768761158785924, "grad_norm": 4.383213520050049, "learning_rate": 2.3847926267281108e-05, "loss": 5.7307, "step": 207 }, { "epoch": 0.04791798652306629, "grad_norm": 5.39447021484375, "learning_rate": 2.3963133640552994e-05, "loss": 5.7439, "step": 208 }, { "epoch": 0.04814836145827334, "grad_norm": 3.950260639190674, "learning_rate": 2.4078341013824887e-05, "loss": 5.813, "step": 209 }, { "epoch": 0.04837873639348039, "grad_norm": 4.496622085571289, "learning_rate": 2.4193548387096777e-05, "loss": 5.6824, "step": 210 }, { "epoch": 0.04860911132868744, "grad_norm": 10.339614868164062, "learning_rate": 2.4308755760368667e-05, "loss": 5.673, "step": 211 }, { "epoch": 0.04883948626389449, "grad_norm": 10.465367317199707, "learning_rate": 2.4423963133640553e-05, "loss": 5.9042, "step": 212 }, { "epoch": 0.04906986119910154, "grad_norm": 7.2779316902160645, "learning_rate": 2.4539170506912443e-05, "loss": 5.8795, "step": 213 }, { "epoch": 0.04930023613430859, "grad_norm": 9.472447395324707, "learning_rate": 2.4654377880184332e-05, "loss": 5.7785, "step": 214 }, { "epoch": 0.04953061106951564, "grad_norm": 7.958222389221191, "learning_rate": 2.4769585253456222e-05, "loss": 5.8912, "step": 215 }, { "epoch": 0.04976098600472269, "grad_norm": 3.428849220275879, "learning_rate": 2.488479262672811e-05, "loss": 5.954, "step": 216 }, { "epoch": 0.04999136093992974, "grad_norm": 4.390380859375, "learning_rate": 2.5e-05, "loss": 5.8049, "step": 217 }, { "epoch": 0.050221735875136786, "grad_norm": 4.296689033508301, "learning_rate": 2.5115207373271894e-05, "loss": 5.7523, "step": 218 }, { "epoch": 0.050452110810343835, "grad_norm": 4.756168842315674, "learning_rate": 2.523041474654378e-05, "loss": 5.5805, "step": 219 }, { "epoch": 0.050682485745550884, "grad_norm": 7.496289253234863, "learning_rate": 2.534562211981567e-05, "loss": 5.6795, "step": 220 }, { "epoch": 0.05091286068075793, "grad_norm": 21.85667610168457, "learning_rate": 2.5460829493087556e-05, "loss": 6.192, "step": 221 }, { "epoch": 0.05114323561596498, "grad_norm": 7.925740718841553, "learning_rate": 2.557603686635945e-05, "loss": 5.8755, "step": 222 }, { "epoch": 0.05137361055117203, "grad_norm": 5.8627214431762695, "learning_rate": 2.569124423963134e-05, "loss": 5.7962, "step": 223 }, { "epoch": 0.05160398548637908, "grad_norm": 4.581672668457031, "learning_rate": 2.5806451612903226e-05, "loss": 5.6927, "step": 224 }, { "epoch": 0.05183436042158613, "grad_norm": 3.8783817291259766, "learning_rate": 2.592165898617512e-05, "loss": 5.5172, "step": 225 }, { "epoch": 0.05206473535679318, "grad_norm": 4.383774280548096, "learning_rate": 2.6036866359447005e-05, "loss": 5.5652, "step": 226 }, { "epoch": 0.05229511029200023, "grad_norm": 5.264337539672852, "learning_rate": 2.6152073732718895e-05, "loss": 5.6639, "step": 227 }, { "epoch": 0.05252548522720728, "grad_norm": 5.456058502197266, "learning_rate": 2.626728110599078e-05, "loss": 5.4965, "step": 228 }, { "epoch": 0.05275586016241433, "grad_norm": 5.120140075683594, "learning_rate": 2.6382488479262674e-05, "loss": 5.4905, "step": 229 }, { "epoch": 0.052986235097621376, "grad_norm": 4.532288551330566, "learning_rate": 2.6497695852534567e-05, "loss": 5.471, "step": 230 }, { "epoch": 0.053216610032828425, "grad_norm": 3.1011741161346436, "learning_rate": 2.661290322580645e-05, "loss": 5.3674, "step": 231 }, { "epoch": 0.05344698496803548, "grad_norm": 2.872101306915283, "learning_rate": 2.6728110599078343e-05, "loss": 5.5139, "step": 232 }, { "epoch": 0.05367735990324253, "grad_norm": 3.0827088356018066, "learning_rate": 2.684331797235023e-05, "loss": 5.4256, "step": 233 }, { "epoch": 0.05390773483844958, "grad_norm": 2.915752410888672, "learning_rate": 2.6958525345622122e-05, "loss": 5.4456, "step": 234 }, { "epoch": 0.05413810977365663, "grad_norm": 3.222841739654541, "learning_rate": 2.7073732718894012e-05, "loss": 5.4395, "step": 235 }, { "epoch": 0.05436848470886368, "grad_norm": 4.253575325012207, "learning_rate": 2.7188940092165898e-05, "loss": 5.3481, "step": 236 }, { "epoch": 0.05459885964407073, "grad_norm": 8.239569664001465, "learning_rate": 2.730414746543779e-05, "loss": 5.4102, "step": 237 }, { "epoch": 0.054829234579277776, "grad_norm": 4.697526931762695, "learning_rate": 2.7419354838709678e-05, "loss": 5.3389, "step": 238 }, { "epoch": 0.055059609514484825, "grad_norm": 3.892936944961548, "learning_rate": 2.7534562211981567e-05, "loss": 5.4552, "step": 239 }, { "epoch": 0.055289984449691874, "grad_norm": 2.3581833839416504, "learning_rate": 2.764976958525346e-05, "loss": 5.2796, "step": 240 }, { "epoch": 0.055520359384898924, "grad_norm": 6.906960964202881, "learning_rate": 2.7764976958525347e-05, "loss": 5.3193, "step": 241 }, { "epoch": 0.05575073432010597, "grad_norm": 4.946208953857422, "learning_rate": 2.7880184331797236e-05, "loss": 5.4414, "step": 242 }, { "epoch": 0.05598110925531302, "grad_norm": 4.640061378479004, "learning_rate": 2.7995391705069123e-05, "loss": 5.3629, "step": 243 }, { "epoch": 0.05621148419052007, "grad_norm": 4.520419120788574, "learning_rate": 2.8110599078341016e-05, "loss": 5.359, "step": 244 }, { "epoch": 0.05644185912572712, "grad_norm": 5.221437931060791, "learning_rate": 2.822580645161291e-05, "loss": 5.3849, "step": 245 }, { "epoch": 0.05667223406093417, "grad_norm": 3.349203586578369, "learning_rate": 2.8341013824884795e-05, "loss": 5.2438, "step": 246 }, { "epoch": 0.05690260899614122, "grad_norm": 2.9473555088043213, "learning_rate": 2.8456221198156685e-05, "loss": 5.3257, "step": 247 }, { "epoch": 0.05713298393134827, "grad_norm": 3.000087261199951, "learning_rate": 2.857142857142857e-05, "loss": 5.4129, "step": 248 }, { "epoch": 0.05736335886655532, "grad_norm": 4.294400691986084, "learning_rate": 2.8686635944700464e-05, "loss": 5.2456, "step": 249 }, { "epoch": 0.057593733801762366, "grad_norm": 2.668947458267212, "learning_rate": 2.880184331797235e-05, "loss": 5.1572, "step": 250 }, { "epoch": 0.057824108736969415, "grad_norm": 2.7505874633789062, "learning_rate": 2.891705069124424e-05, "loss": 5.0642, "step": 251 }, { "epoch": 0.058054483672176464, "grad_norm": 4.551009654998779, "learning_rate": 2.9032258064516133e-05, "loss": 5.3784, "step": 252 }, { "epoch": 0.05828485860738351, "grad_norm": 3.4702234268188477, "learning_rate": 2.914746543778802e-05, "loss": 5.1284, "step": 253 }, { "epoch": 0.05851523354259057, "grad_norm": 2.5467958450317383, "learning_rate": 2.926267281105991e-05, "loss": 5.1852, "step": 254 }, { "epoch": 0.05874560847779762, "grad_norm": 2.4651448726654053, "learning_rate": 2.9377880184331795e-05, "loss": 5.1707, "step": 255 }, { "epoch": 0.05897598341300467, "grad_norm": 4.5291337966918945, "learning_rate": 2.9493087557603688e-05, "loss": 5.1818, "step": 256 }, { "epoch": 0.05920635834821172, "grad_norm": 2.7584426403045654, "learning_rate": 2.960829493087558e-05, "loss": 5.1762, "step": 257 }, { "epoch": 0.059436733283418766, "grad_norm": 3.0259592533111572, "learning_rate": 2.9723502304147464e-05, "loss": 5.1813, "step": 258 }, { "epoch": 0.059667108218625815, "grad_norm": 3.501207113265991, "learning_rate": 2.9838709677419357e-05, "loss": 5.1446, "step": 259 }, { "epoch": 0.059897483153832864, "grad_norm": 3.3434364795684814, "learning_rate": 2.9953917050691244e-05, "loss": 5.0949, "step": 260 }, { "epoch": 0.060127858089039914, "grad_norm": 2.8805439472198486, "learning_rate": 3.0069124423963137e-05, "loss": 5.1788, "step": 261 }, { "epoch": 0.06035823302424696, "grad_norm": 2.448179006576538, "learning_rate": 3.0184331797235026e-05, "loss": 5.0815, "step": 262 }, { "epoch": 0.06058860795945401, "grad_norm": 4.603321075439453, "learning_rate": 3.0299539170506913e-05, "loss": 5.1289, "step": 263 }, { "epoch": 0.06081898289466106, "grad_norm": 3.2496676445007324, "learning_rate": 3.0414746543778806e-05, "loss": 5.0371, "step": 264 }, { "epoch": 0.06104935782986811, "grad_norm": 3.0860774517059326, "learning_rate": 3.052995391705069e-05, "loss": 5.0348, "step": 265 }, { "epoch": 0.06127973276507516, "grad_norm": 3.4716455936431885, "learning_rate": 3.0645161290322585e-05, "loss": 5.2508, "step": 266 }, { "epoch": 0.06151010770028221, "grad_norm": 2.695411205291748, "learning_rate": 3.076036866359447e-05, "loss": 5.1875, "step": 267 }, { "epoch": 0.06174048263548926, "grad_norm": 2.168516159057617, "learning_rate": 3.087557603686636e-05, "loss": 5.2362, "step": 268 }, { "epoch": 0.06197085757069631, "grad_norm": 3.2687554359436035, "learning_rate": 3.0990783410138254e-05, "loss": 5.1398, "step": 269 }, { "epoch": 0.062201232505903356, "grad_norm": 3.612806558609009, "learning_rate": 3.110599078341014e-05, "loss": 5.0379, "step": 270 }, { "epoch": 0.062431607441110405, "grad_norm": 3.084611654281616, "learning_rate": 3.122119815668203e-05, "loss": 5.0965, "step": 271 }, { "epoch": 0.06266198237631745, "grad_norm": 3.216107130050659, "learning_rate": 3.1336405529953916e-05, "loss": 4.9937, "step": 272 }, { "epoch": 0.06289235731152451, "grad_norm": 2.387368679046631, "learning_rate": 3.1451612903225806e-05, "loss": 5.0171, "step": 273 }, { "epoch": 0.06312273224673155, "grad_norm": 2.9483132362365723, "learning_rate": 3.15668202764977e-05, "loss": 5.0565, "step": 274 }, { "epoch": 0.06335310718193861, "grad_norm": 2.0053296089172363, "learning_rate": 3.1682027649769585e-05, "loss": 5.0101, "step": 275 }, { "epoch": 0.06358348211714565, "grad_norm": 2.746181011199951, "learning_rate": 3.1797235023041475e-05, "loss": 5.0298, "step": 276 }, { "epoch": 0.06381385705235271, "grad_norm": 1.8751425743103027, "learning_rate": 3.1912442396313365e-05, "loss": 5.0216, "step": 277 }, { "epoch": 0.06404423198755975, "grad_norm": 2.0997838973999023, "learning_rate": 3.2027649769585254e-05, "loss": 4.888, "step": 278 }, { "epoch": 0.0642746069227668, "grad_norm": 2.2946183681488037, "learning_rate": 3.2142857142857144e-05, "loss": 4.8875, "step": 279 }, { "epoch": 0.06450498185797385, "grad_norm": 2.5832936763763428, "learning_rate": 3.2258064516129034e-05, "loss": 4.8033, "step": 280 }, { "epoch": 0.0647353567931809, "grad_norm": 3.909541606903076, "learning_rate": 3.237327188940092e-05, "loss": 5.0186, "step": 281 }, { "epoch": 0.06496573172838795, "grad_norm": 2.605252981185913, "learning_rate": 3.248847926267281e-05, "loss": 4.9028, "step": 282 }, { "epoch": 0.065196106663595, "grad_norm": 2.41758131980896, "learning_rate": 3.26036866359447e-05, "loss": 4.9259, "step": 283 }, { "epoch": 0.06542648159880204, "grad_norm": 2.5665547847747803, "learning_rate": 3.271889400921659e-05, "loss": 4.8866, "step": 284 }, { "epoch": 0.0656568565340091, "grad_norm": 4.012719631195068, "learning_rate": 3.283410138248848e-05, "loss": 4.7964, "step": 285 }, { "epoch": 0.06588723146921614, "grad_norm": 3.18152117729187, "learning_rate": 3.294930875576037e-05, "loss": 4.9071, "step": 286 }, { "epoch": 0.0661176064044232, "grad_norm": 2.7538106441497803, "learning_rate": 3.306451612903226e-05, "loss": 4.8512, "step": 287 }, { "epoch": 0.06634798133963025, "grad_norm": 1.9454307556152344, "learning_rate": 3.317972350230415e-05, "loss": 4.7769, "step": 288 }, { "epoch": 0.0665783562748373, "grad_norm": 2.0154223442077637, "learning_rate": 3.3294930875576034e-05, "loss": 4.8521, "step": 289 }, { "epoch": 0.06680873121004435, "grad_norm": 2.0577802658081055, "learning_rate": 3.341013824884793e-05, "loss": 4.687, "step": 290 }, { "epoch": 0.0670391061452514, "grad_norm": 2.255734443664551, "learning_rate": 3.352534562211982e-05, "loss": 4.6875, "step": 291 }, { "epoch": 0.06726948108045845, "grad_norm": 3.3694067001342773, "learning_rate": 3.36405529953917e-05, "loss": 4.609, "step": 292 }, { "epoch": 0.0674998560156655, "grad_norm": 2.2380590438842773, "learning_rate": 3.37557603686636e-05, "loss": 4.7347, "step": 293 }, { "epoch": 0.06773023095087255, "grad_norm": 2.684077024459839, "learning_rate": 3.387096774193548e-05, "loss": 4.668, "step": 294 }, { "epoch": 0.06796060588607959, "grad_norm": 2.7844464778900146, "learning_rate": 3.398617511520737e-05, "loss": 4.6853, "step": 295 }, { "epoch": 0.06819098082128665, "grad_norm": 1.7667138576507568, "learning_rate": 3.410138248847927e-05, "loss": 4.7349, "step": 296 }, { "epoch": 0.06842135575649369, "grad_norm": 2.4507548809051514, "learning_rate": 3.421658986175115e-05, "loss": 4.7376, "step": 297 }, { "epoch": 0.06865173069170075, "grad_norm": 1.7319035530090332, "learning_rate": 3.433179723502305e-05, "loss": 4.6561, "step": 298 }, { "epoch": 0.06888210562690779, "grad_norm": 3.340327501296997, "learning_rate": 3.444700460829493e-05, "loss": 4.6483, "step": 299 }, { "epoch": 0.06911248056211484, "grad_norm": 3.1018238067626953, "learning_rate": 3.456221198156682e-05, "loss": 4.7905, "step": 300 }, { "epoch": 0.06934285549732189, "grad_norm": 2.019655466079712, "learning_rate": 3.467741935483872e-05, "loss": 4.6308, "step": 301 }, { "epoch": 0.06957323043252894, "grad_norm": 2.313497543334961, "learning_rate": 3.47926267281106e-05, "loss": 4.6812, "step": 302 }, { "epoch": 0.06980360536773599, "grad_norm": 2.370356559753418, "learning_rate": 3.490783410138249e-05, "loss": 4.7006, "step": 303 }, { "epoch": 0.07003398030294304, "grad_norm": 2.242685556411743, "learning_rate": 3.502304147465438e-05, "loss": 4.651, "step": 304 }, { "epoch": 0.07026435523815008, "grad_norm": 2.6164326667785645, "learning_rate": 3.513824884792627e-05, "loss": 4.7071, "step": 305 }, { "epoch": 0.07049473017335714, "grad_norm": 2.491487979888916, "learning_rate": 3.525345622119816e-05, "loss": 4.6408, "step": 306 }, { "epoch": 0.07072510510856418, "grad_norm": 3.0500853061676025, "learning_rate": 3.536866359447005e-05, "loss": 4.6631, "step": 307 }, { "epoch": 0.07095548004377124, "grad_norm": 3.121361017227173, "learning_rate": 3.548387096774194e-05, "loss": 4.5441, "step": 308 }, { "epoch": 0.0711858549789783, "grad_norm": 1.835186243057251, "learning_rate": 3.559907834101383e-05, "loss": 4.5763, "step": 309 }, { "epoch": 0.07141622991418534, "grad_norm": 3.9447991847991943, "learning_rate": 3.571428571428572e-05, "loss": 4.6204, "step": 310 }, { "epoch": 0.07164660484939239, "grad_norm": 2.690244436264038, "learning_rate": 3.58294930875576e-05, "loss": 4.7332, "step": 311 }, { "epoch": 0.07187697978459943, "grad_norm": 2.4133219718933105, "learning_rate": 3.5944700460829496e-05, "loss": 4.5912, "step": 312 }, { "epoch": 0.07210735471980649, "grad_norm": 1.5372670888900757, "learning_rate": 3.6059907834101386e-05, "loss": 4.6339, "step": 313 }, { "epoch": 0.07233772965501353, "grad_norm": 2.3969459533691406, "learning_rate": 3.6175115207373276e-05, "loss": 4.7821, "step": 314 }, { "epoch": 0.07256810459022059, "grad_norm": 2.096712112426758, "learning_rate": 3.6290322580645165e-05, "loss": 4.6636, "step": 315 }, { "epoch": 0.07279847952542763, "grad_norm": 1.723876953125, "learning_rate": 3.640552995391705e-05, "loss": 4.554, "step": 316 }, { "epoch": 0.07302885446063469, "grad_norm": 3.1935081481933594, "learning_rate": 3.6520737327188945e-05, "loss": 4.601, "step": 317 }, { "epoch": 0.07325922939584173, "grad_norm": 2.553666830062866, "learning_rate": 3.6635944700460834e-05, "loss": 4.5053, "step": 318 }, { "epoch": 0.07348960433104879, "grad_norm": 1.9630941152572632, "learning_rate": 3.675115207373272e-05, "loss": 4.5374, "step": 319 }, { "epoch": 0.07371997926625583, "grad_norm": 1.6277803182601929, "learning_rate": 3.6866359447004614e-05, "loss": 4.5016, "step": 320 }, { "epoch": 0.07395035420146288, "grad_norm": 2.011202573776245, "learning_rate": 3.6981566820276497e-05, "loss": 4.5516, "step": 321 }, { "epoch": 0.07418072913666993, "grad_norm": 1.664764165878296, "learning_rate": 3.7096774193548386e-05, "loss": 4.6432, "step": 322 }, { "epoch": 0.07441110407187698, "grad_norm": 1.6655915975570679, "learning_rate": 3.721198156682028e-05, "loss": 4.4517, "step": 323 }, { "epoch": 0.07464147900708402, "grad_norm": 2.366396188735962, "learning_rate": 3.7327188940092166e-05, "loss": 4.4171, "step": 324 }, { "epoch": 0.07487185394229108, "grad_norm": 2.5583620071411133, "learning_rate": 3.744239631336406e-05, "loss": 4.5928, "step": 325 }, { "epoch": 0.07510222887749812, "grad_norm": 1.6068898439407349, "learning_rate": 3.7557603686635945e-05, "loss": 4.4136, "step": 326 }, { "epoch": 0.07533260381270518, "grad_norm": 2.4570958614349365, "learning_rate": 3.7672811059907835e-05, "loss": 4.4999, "step": 327 }, { "epoch": 0.07556297874791222, "grad_norm": 2.3231022357940674, "learning_rate": 3.7788018433179724e-05, "loss": 4.4208, "step": 328 }, { "epoch": 0.07579335368311928, "grad_norm": 2.1784634590148926, "learning_rate": 3.7903225806451614e-05, "loss": 4.43, "step": 329 }, { "epoch": 0.07602372861832632, "grad_norm": 1.6127572059631348, "learning_rate": 3.8018433179723504e-05, "loss": 4.3376, "step": 330 }, { "epoch": 0.07625410355353338, "grad_norm": 45.16930389404297, "learning_rate": 3.813364055299539e-05, "loss": 4.5538, "step": 331 }, { "epoch": 0.07648447848874043, "grad_norm": 3.786637783050537, "learning_rate": 3.824884792626728e-05, "loss": 4.615, "step": 332 }, { "epoch": 0.07671485342394747, "grad_norm": 2.0460128784179688, "learning_rate": 3.836405529953917e-05, "loss": 4.5226, "step": 333 }, { "epoch": 0.07694522835915453, "grad_norm": 1.8169045448303223, "learning_rate": 3.847926267281106e-05, "loss": 4.523, "step": 334 }, { "epoch": 0.07717560329436157, "grad_norm": 1.839950442314148, "learning_rate": 3.859447004608295e-05, "loss": 4.3564, "step": 335 }, { "epoch": 0.07740597822956863, "grad_norm": 2.0956501960754395, "learning_rate": 3.870967741935484e-05, "loss": 4.4445, "step": 336 }, { "epoch": 0.07763635316477567, "grad_norm": 2.33282732963562, "learning_rate": 3.882488479262673e-05, "loss": 4.338, "step": 337 }, { "epoch": 0.07786672809998273, "grad_norm": 1.7841521501541138, "learning_rate": 3.8940092165898614e-05, "loss": 4.5268, "step": 338 }, { "epoch": 0.07809710303518977, "grad_norm": 1.7240924835205078, "learning_rate": 3.905529953917051e-05, "loss": 4.3695, "step": 339 }, { "epoch": 0.07832747797039682, "grad_norm": 1.9093135595321655, "learning_rate": 3.91705069124424e-05, "loss": 4.3781, "step": 340 }, { "epoch": 0.07855785290560387, "grad_norm": 1.5652649402618408, "learning_rate": 3.928571428571429e-05, "loss": 4.3809, "step": 341 }, { "epoch": 0.07878822784081092, "grad_norm": 1.7985944747924805, "learning_rate": 3.940092165898618e-05, "loss": 4.3109, "step": 342 }, { "epoch": 0.07901860277601797, "grad_norm": 1.6924302577972412, "learning_rate": 3.951612903225806e-05, "loss": 4.3512, "step": 343 }, { "epoch": 0.07924897771122502, "grad_norm": 1.4189538955688477, "learning_rate": 3.963133640552996e-05, "loss": 4.3398, "step": 344 }, { "epoch": 0.07947935264643206, "grad_norm": 2.7740564346313477, "learning_rate": 3.974654377880185e-05, "loss": 4.3673, "step": 345 }, { "epoch": 0.07970972758163912, "grad_norm": 1.4598958492279053, "learning_rate": 3.986175115207373e-05, "loss": 4.2282, "step": 346 }, { "epoch": 0.07994010251684616, "grad_norm": 1.778603434562683, "learning_rate": 3.997695852534563e-05, "loss": 4.2708, "step": 347 }, { "epoch": 0.08017047745205322, "grad_norm": 1.7756723165512085, "learning_rate": 4.009216589861751e-05, "loss": 4.2242, "step": 348 }, { "epoch": 0.08040085238726026, "grad_norm": 1.512681007385254, "learning_rate": 4.02073732718894e-05, "loss": 4.3123, "step": 349 }, { "epoch": 0.08063122732246732, "grad_norm": 2.1157407760620117, "learning_rate": 4.032258064516129e-05, "loss": 4.2753, "step": 350 }, { "epoch": 0.08086160225767436, "grad_norm": 2.7157623767852783, "learning_rate": 4.043778801843318e-05, "loss": 4.3553, "step": 351 }, { "epoch": 0.08109197719288141, "grad_norm": 1.722152590751648, "learning_rate": 4.0552995391705076e-05, "loss": 4.2139, "step": 352 }, { "epoch": 0.08132235212808847, "grad_norm": 2.1604456901550293, "learning_rate": 4.066820276497696e-05, "loss": 4.3053, "step": 353 }, { "epoch": 0.08155272706329551, "grad_norm": 1.6570841073989868, "learning_rate": 4.078341013824885e-05, "loss": 4.3399, "step": 354 }, { "epoch": 0.08178310199850257, "grad_norm": 2.2384395599365234, "learning_rate": 4.089861751152074e-05, "loss": 4.2907, "step": 355 }, { "epoch": 0.08201347693370961, "grad_norm": 1.7170519828796387, "learning_rate": 4.101382488479263e-05, "loss": 4.2607, "step": 356 }, { "epoch": 0.08224385186891667, "grad_norm": 1.547389030456543, "learning_rate": 4.112903225806452e-05, "loss": 4.1436, "step": 357 }, { "epoch": 0.08247422680412371, "grad_norm": 1.8681570291519165, "learning_rate": 4.124423963133641e-05, "loss": 4.2229, "step": 358 }, { "epoch": 0.08270460173933077, "grad_norm": 1.6170064210891724, "learning_rate": 4.13594470046083e-05, "loss": 4.1295, "step": 359 }, { "epoch": 0.08293497667453781, "grad_norm": 1.9492337703704834, "learning_rate": 4.147465437788019e-05, "loss": 4.153, "step": 360 }, { "epoch": 0.08316535160974486, "grad_norm": 2.27461314201355, "learning_rate": 4.158986175115208e-05, "loss": 4.1871, "step": 361 }, { "epoch": 0.0833957265449519, "grad_norm": 1.722580075263977, "learning_rate": 4.1705069124423966e-05, "loss": 4.1824, "step": 362 }, { "epoch": 0.08362610148015896, "grad_norm": 1.4347522258758545, "learning_rate": 4.1820276497695856e-05, "loss": 4.206, "step": 363 }, { "epoch": 0.083856476415366, "grad_norm": 2.0974271297454834, "learning_rate": 4.1935483870967746e-05, "loss": 4.2403, "step": 364 }, { "epoch": 0.08408685135057306, "grad_norm": 1.3827341794967651, "learning_rate": 4.205069124423963e-05, "loss": 4.0936, "step": 365 }, { "epoch": 0.0843172262857801, "grad_norm": 1.3549107313156128, "learning_rate": 4.2165898617511525e-05, "loss": 4.0989, "step": 366 }, { "epoch": 0.08454760122098716, "grad_norm": 2.243731737136841, "learning_rate": 4.228110599078341e-05, "loss": 4.1313, "step": 367 }, { "epoch": 0.0847779761561942, "grad_norm": 1.6700782775878906, "learning_rate": 4.2396313364055304e-05, "loss": 4.0952, "step": 368 }, { "epoch": 0.08500835109140126, "grad_norm": 1.3244996070861816, "learning_rate": 4.2511520737327194e-05, "loss": 4.0729, "step": 369 }, { "epoch": 0.0852387260266083, "grad_norm": 2.1057326793670654, "learning_rate": 4.262672811059908e-05, "loss": 4.1979, "step": 370 }, { "epoch": 0.08546910096181536, "grad_norm": 1.5440417528152466, "learning_rate": 4.2741935483870973e-05, "loss": 4.1251, "step": 371 }, { "epoch": 0.0856994758970224, "grad_norm": 1.6285864114761353, "learning_rate": 4.2857142857142856e-05, "loss": 4.1043, "step": 372 }, { "epoch": 0.08592985083222945, "grad_norm": 1.7524313926696777, "learning_rate": 4.2972350230414746e-05, "loss": 4.1038, "step": 373 }, { "epoch": 0.0861602257674365, "grad_norm": 1.6570454835891724, "learning_rate": 4.308755760368664e-05, "loss": 4.155, "step": 374 }, { "epoch": 0.08639060070264355, "grad_norm": 47.79402542114258, "learning_rate": 4.3202764976958525e-05, "loss": 4.4206, "step": 375 }, { "epoch": 0.08662097563785061, "grad_norm": 2.9920742511749268, "learning_rate": 4.3317972350230415e-05, "loss": 4.3379, "step": 376 }, { "epoch": 0.08685135057305765, "grad_norm": 1.5256212949752808, "learning_rate": 4.3433179723502305e-05, "loss": 4.0817, "step": 377 }, { "epoch": 0.0870817255082647, "grad_norm": 1.7618952989578247, "learning_rate": 4.3548387096774194e-05, "loss": 4.085, "step": 378 }, { "epoch": 0.08731210044347175, "grad_norm": 1.6105235815048218, "learning_rate": 4.366359447004609e-05, "loss": 4.0581, "step": 379 }, { "epoch": 0.0875424753786788, "grad_norm": 1.7078181505203247, "learning_rate": 4.3778801843317974e-05, "loss": 4.1479, "step": 380 }, { "epoch": 0.08777285031388585, "grad_norm": 1.9367135763168335, "learning_rate": 4.389400921658986e-05, "loss": 4.098, "step": 381 }, { "epoch": 0.0880032252490929, "grad_norm": 2.0167195796966553, "learning_rate": 4.400921658986175e-05, "loss": 4.1421, "step": 382 }, { "epoch": 0.08823360018429995, "grad_norm": 1.5214768648147583, "learning_rate": 4.412442396313364e-05, "loss": 4.1083, "step": 383 }, { "epoch": 0.088463975119507, "grad_norm": 2.1924123764038086, "learning_rate": 4.423963133640553e-05, "loss": 4.099, "step": 384 }, { "epoch": 0.08869435005471404, "grad_norm": 1.5435683727264404, "learning_rate": 4.435483870967742e-05, "loss": 4.1404, "step": 385 }, { "epoch": 0.0889247249899211, "grad_norm": 1.559342384338379, "learning_rate": 4.447004608294931e-05, "loss": 4.1689, "step": 386 }, { "epoch": 0.08915509992512814, "grad_norm": 1.3777494430541992, "learning_rate": 4.45852534562212e-05, "loss": 4.0269, "step": 387 }, { "epoch": 0.0893854748603352, "grad_norm": 1.5149104595184326, "learning_rate": 4.470046082949309e-05, "loss": 4.0251, "step": 388 }, { "epoch": 0.08961584979554224, "grad_norm": 1.3710771799087524, "learning_rate": 4.4815668202764974e-05, "loss": 4.0016, "step": 389 }, { "epoch": 0.0898462247307493, "grad_norm": 1.2723767757415771, "learning_rate": 4.493087557603687e-05, "loss": 3.9478, "step": 390 }, { "epoch": 0.09007659966595634, "grad_norm": 1.4704326391220093, "learning_rate": 4.504608294930876e-05, "loss": 3.9671, "step": 391 }, { "epoch": 0.0903069746011634, "grad_norm": 1.4796208143234253, "learning_rate": 4.516129032258064e-05, "loss": 4.0111, "step": 392 }, { "epoch": 0.09053734953637044, "grad_norm": 1.2395745515823364, "learning_rate": 4.527649769585254e-05, "loss": 3.9096, "step": 393 }, { "epoch": 0.0907677244715775, "grad_norm": 1.16193425655365, "learning_rate": 4.539170506912442e-05, "loss": 3.9909, "step": 394 }, { "epoch": 0.09099809940678454, "grad_norm": 1.4799085855484009, "learning_rate": 4.550691244239632e-05, "loss": 3.9123, "step": 395 }, { "epoch": 0.09122847434199159, "grad_norm": 1.3686784505844116, "learning_rate": 4.562211981566821e-05, "loss": 3.8244, "step": 396 }, { "epoch": 0.09145884927719863, "grad_norm": 1.8400416374206543, "learning_rate": 4.573732718894009e-05, "loss": 3.9392, "step": 397 }, { "epoch": 0.09168922421240569, "grad_norm": 1.6548770666122437, "learning_rate": 4.585253456221199e-05, "loss": 4.0477, "step": 398 }, { "epoch": 0.09191959914761275, "grad_norm": 1.2030049562454224, "learning_rate": 4.596774193548387e-05, "loss": 3.8756, "step": 399 }, { "epoch": 0.09214997408281979, "grad_norm": 1.4500328302383423, "learning_rate": 4.608294930875576e-05, "loss": 3.8607, "step": 400 }, { "epoch": 0.09238034901802684, "grad_norm": 1.4373520612716675, "learning_rate": 4.619815668202766e-05, "loss": 3.9254, "step": 401 }, { "epoch": 0.09261072395323389, "grad_norm": 2.107468843460083, "learning_rate": 4.631336405529954e-05, "loss": 3.8863, "step": 402 }, { "epoch": 0.09284109888844094, "grad_norm": 1.4776005744934082, "learning_rate": 4.642857142857143e-05, "loss": 3.8914, "step": 403 }, { "epoch": 0.09307147382364798, "grad_norm": 2.1544620990753174, "learning_rate": 4.654377880184332e-05, "loss": 3.8637, "step": 404 }, { "epoch": 0.09330184875885504, "grad_norm": 1.5037600994110107, "learning_rate": 4.665898617511521e-05, "loss": 3.8106, "step": 405 }, { "epoch": 0.09353222369406208, "grad_norm": 1.762795090675354, "learning_rate": 4.67741935483871e-05, "loss": 3.9003, "step": 406 }, { "epoch": 0.09376259862926914, "grad_norm": 1.334369421005249, "learning_rate": 4.688940092165899e-05, "loss": 3.9405, "step": 407 }, { "epoch": 0.09399297356447618, "grad_norm": 1.509394884109497, "learning_rate": 4.700460829493088e-05, "loss": 3.9704, "step": 408 }, { "epoch": 0.09422334849968324, "grad_norm": 1.3410989046096802, "learning_rate": 4.711981566820277e-05, "loss": 3.7945, "step": 409 }, { "epoch": 0.09445372343489028, "grad_norm": 1.4017417430877686, "learning_rate": 4.723502304147466e-05, "loss": 3.8051, "step": 410 }, { "epoch": 0.09468409837009734, "grad_norm": 2.700751781463623, "learning_rate": 4.735023041474655e-05, "loss": 3.892, "step": 411 }, { "epoch": 0.09491447330530438, "grad_norm": 1.9442697763442993, "learning_rate": 4.7465437788018436e-05, "loss": 3.8253, "step": 412 }, { "epoch": 0.09514484824051143, "grad_norm": 1.257071852684021, "learning_rate": 4.7580645161290326e-05, "loss": 3.8696, "step": 413 }, { "epoch": 0.09537522317571848, "grad_norm": 1.903388261795044, "learning_rate": 4.7695852534562216e-05, "loss": 3.7929, "step": 414 }, { "epoch": 0.09560559811092553, "grad_norm": 1.5545810461044312, "learning_rate": 4.7811059907834105e-05, "loss": 3.7642, "step": 415 }, { "epoch": 0.09583597304613257, "grad_norm": 1.389573574066162, "learning_rate": 4.792626728110599e-05, "loss": 3.8, "step": 416 }, { "epoch": 0.09606634798133963, "grad_norm": 1.4427945613861084, "learning_rate": 4.8041474654377885e-05, "loss": 3.7985, "step": 417 }, { "epoch": 0.09629672291654667, "grad_norm": 1.060282826423645, "learning_rate": 4.8156682027649774e-05, "loss": 3.7601, "step": 418 }, { "epoch": 0.09652709785175373, "grad_norm": 1.9607473611831665, "learning_rate": 4.827188940092166e-05, "loss": 3.7996, "step": 419 }, { "epoch": 0.09675747278696079, "grad_norm": 1.4580037593841553, "learning_rate": 4.8387096774193554e-05, "loss": 3.7477, "step": 420 }, { "epoch": 0.09698784772216783, "grad_norm": 1.4635933637619019, "learning_rate": 4.850230414746544e-05, "loss": 3.8056, "step": 421 }, { "epoch": 0.09721822265737488, "grad_norm": 1.3334156274795532, "learning_rate": 4.861751152073733e-05, "loss": 3.7636, "step": 422 }, { "epoch": 0.09744859759258193, "grad_norm": 1.3015316724777222, "learning_rate": 4.873271889400922e-05, "loss": 3.7296, "step": 423 }, { "epoch": 0.09767897252778898, "grad_norm": 1.6161762475967407, "learning_rate": 4.8847926267281106e-05, "loss": 3.6642, "step": 424 }, { "epoch": 0.09790934746299602, "grad_norm": 1.3481590747833252, "learning_rate": 4.8963133640553e-05, "loss": 3.7277, "step": 425 }, { "epoch": 0.09813972239820308, "grad_norm": 1.4616525173187256, "learning_rate": 4.9078341013824885e-05, "loss": 3.718, "step": 426 }, { "epoch": 0.09837009733341012, "grad_norm": 1.130948543548584, "learning_rate": 4.9193548387096775e-05, "loss": 3.6084, "step": 427 }, { "epoch": 0.09860047226861718, "grad_norm": 1.3699579238891602, "learning_rate": 4.9308755760368664e-05, "loss": 3.7219, "step": 428 }, { "epoch": 0.09883084720382422, "grad_norm": 1.3362312316894531, "learning_rate": 4.9423963133640554e-05, "loss": 3.6989, "step": 429 }, { "epoch": 0.09906122213903128, "grad_norm": 1.1975277662277222, "learning_rate": 4.9539170506912444e-05, "loss": 3.7152, "step": 430 }, { "epoch": 0.09929159707423832, "grad_norm": 1.5545814037322998, "learning_rate": 4.965437788018433e-05, "loss": 3.6748, "step": 431 }, { "epoch": 0.09952197200944538, "grad_norm": 1.3987513780593872, "learning_rate": 4.976958525345622e-05, "loss": 3.7189, "step": 432 }, { "epoch": 0.09975234694465242, "grad_norm": 1.3021868467330933, "learning_rate": 4.988479262672811e-05, "loss": 3.6829, "step": 433 }, { "epoch": 0.09998272187985947, "grad_norm": 1.6729682683944702, "learning_rate": 5e-05, "loss": 3.5428, "step": 434 }, { "epoch": 0.10021309681506652, "grad_norm": 2.0897364616394043, "learning_rate": 4.999999191378551e-05, "loss": 3.6177, "step": 435 }, { "epoch": 0.10044347175027357, "grad_norm": 1.5938087701797485, "learning_rate": 4.999996765514727e-05, "loss": 3.6451, "step": 436 }, { "epoch": 0.10067384668548061, "grad_norm": 1.7185041904449463, "learning_rate": 4.999992722410096e-05, "loss": 3.55, "step": 437 }, { "epoch": 0.10090422162068767, "grad_norm": 1.4308032989501953, "learning_rate": 4.999987062067275e-05, "loss": 3.6945, "step": 438 }, { "epoch": 0.10113459655589471, "grad_norm": 1.2790651321411133, "learning_rate": 4.999979784489925e-05, "loss": 3.5771, "step": 439 }, { "epoch": 0.10136497149110177, "grad_norm": 1.44058096408844, "learning_rate": 4.9999708896827545e-05, "loss": 3.6951, "step": 440 }, { "epoch": 0.10159534642630881, "grad_norm": 1.6534777879714966, "learning_rate": 4.999960377651517e-05, "loss": 3.6594, "step": 441 }, { "epoch": 0.10182572136151587, "grad_norm": 1.2747985124588013, "learning_rate": 4.9999482484030126e-05, "loss": 3.6363, "step": 442 }, { "epoch": 0.10205609629672292, "grad_norm": 1.319456696510315, "learning_rate": 4.999934501945087e-05, "loss": 3.5272, "step": 443 }, { "epoch": 0.10228647123192997, "grad_norm": 1.5695756673812866, "learning_rate": 4.999919138286634e-05, "loss": 3.5904, "step": 444 }, { "epoch": 0.10251684616713702, "grad_norm": 1.5203303098678589, "learning_rate": 4.999902157437592e-05, "loss": 3.5228, "step": 445 }, { "epoch": 0.10274722110234406, "grad_norm": 1.5462837219238281, "learning_rate": 4.999883559408946e-05, "loss": 3.5019, "step": 446 }, { "epoch": 0.10297759603755112, "grad_norm": 1.412718653678894, "learning_rate": 4.999863344212726e-05, "loss": 3.4981, "step": 447 }, { "epoch": 0.10320797097275816, "grad_norm": 1.4553322792053223, "learning_rate": 4.99984151186201e-05, "loss": 3.5582, "step": 448 }, { "epoch": 0.10343834590796522, "grad_norm": 1.3562270402908325, "learning_rate": 4.999818062370922e-05, "loss": 3.4977, "step": 449 }, { "epoch": 0.10366872084317226, "grad_norm": 1.144981026649475, "learning_rate": 4.9997929957546295e-05, "loss": 3.5125, "step": 450 }, { "epoch": 0.10389909577837932, "grad_norm": 1.415742039680481, "learning_rate": 4.9997663120293494e-05, "loss": 3.5127, "step": 451 }, { "epoch": 0.10412947071358636, "grad_norm": 1.4453996419906616, "learning_rate": 4.999738011212344e-05, "loss": 3.4373, "step": 452 }, { "epoch": 0.10435984564879341, "grad_norm": 1.4047281742095947, "learning_rate": 4.999708093321919e-05, "loss": 3.3869, "step": 453 }, { "epoch": 0.10459022058400046, "grad_norm": 1.4768929481506348, "learning_rate": 4.9996765583774294e-05, "loss": 3.453, "step": 454 }, { "epoch": 0.10482059551920751, "grad_norm": 1.270439624786377, "learning_rate": 4.999643406399275e-05, "loss": 3.3983, "step": 455 }, { "epoch": 0.10505097045441456, "grad_norm": 1.3315300941467285, "learning_rate": 4.9996086374089014e-05, "loss": 3.3892, "step": 456 }, { "epoch": 0.10528134538962161, "grad_norm": 1.494204044342041, "learning_rate": 4.9995722514288014e-05, "loss": 3.4972, "step": 457 }, { "epoch": 0.10551172032482865, "grad_norm": 1.673117756843567, "learning_rate": 4.999534248482511e-05, "loss": 3.485, "step": 458 }, { "epoch": 0.10574209526003571, "grad_norm": 1.2268890142440796, "learning_rate": 4.9994946285946164e-05, "loss": 3.3377, "step": 459 }, { "epoch": 0.10597247019524275, "grad_norm": 1.2153892517089844, "learning_rate": 4.9994533917907474e-05, "loss": 3.3896, "step": 460 }, { "epoch": 0.10620284513044981, "grad_norm": 1.1724520921707153, "learning_rate": 4.9994105380975785e-05, "loss": 3.3628, "step": 461 }, { "epoch": 0.10643322006565685, "grad_norm": 1.2380563020706177, "learning_rate": 4.999366067542833e-05, "loss": 3.3411, "step": 462 }, { "epoch": 0.1066635950008639, "grad_norm": 1.1636296510696411, "learning_rate": 4.999319980155278e-05, "loss": 3.3526, "step": 463 }, { "epoch": 0.10689396993607096, "grad_norm": 1.2142494916915894, "learning_rate": 4.999272275964727e-05, "loss": 3.3376, "step": 464 }, { "epoch": 0.107124344871278, "grad_norm": 1.1903728246688843, "learning_rate": 4.999222955002041e-05, "loss": 3.3495, "step": 465 }, { "epoch": 0.10735471980648506, "grad_norm": 1.2283053398132324, "learning_rate": 4.999172017299124e-05, "loss": 3.3379, "step": 466 }, { "epoch": 0.1075850947416921, "grad_norm": 1.6733195781707764, "learning_rate": 4.9991194628889295e-05, "loss": 3.3276, "step": 467 }, { "epoch": 0.10781546967689916, "grad_norm": 1.542602777481079, "learning_rate": 4.999065291805452e-05, "loss": 3.3173, "step": 468 }, { "epoch": 0.1080458446121062, "grad_norm": 1.116407036781311, "learning_rate": 4.9990095040837384e-05, "loss": 3.2757, "step": 469 }, { "epoch": 0.10827621954731326, "grad_norm": 1.2951315641403198, "learning_rate": 4.998952099759874e-05, "loss": 3.299, "step": 470 }, { "epoch": 0.1085065944825203, "grad_norm": 1.2223730087280273, "learning_rate": 4.9988930788709945e-05, "loss": 3.3317, "step": 471 }, { "epoch": 0.10873696941772736, "grad_norm": 1.3285932540893555, "learning_rate": 4.9988324414552815e-05, "loss": 3.3844, "step": 472 }, { "epoch": 0.1089673443529344, "grad_norm": 1.1416820287704468, "learning_rate": 4.99877018755196e-05, "loss": 3.264, "step": 473 }, { "epoch": 0.10919771928814145, "grad_norm": 1.3511673212051392, "learning_rate": 4.9987063172013025e-05, "loss": 3.3395, "step": 474 }, { "epoch": 0.1094280942233485, "grad_norm": 1.5644711256027222, "learning_rate": 4.998640830444627e-05, "loss": 3.2815, "step": 475 }, { "epoch": 0.10965846915855555, "grad_norm": 1.316091775894165, "learning_rate": 4.998573727324295e-05, "loss": 3.2345, "step": 476 }, { "epoch": 0.1098888440937626, "grad_norm": 1.4375779628753662, "learning_rate": 4.998505007883717e-05, "loss": 3.1693, "step": 477 }, { "epoch": 0.11011921902896965, "grad_norm": 1.0675535202026367, "learning_rate": 4.998434672167347e-05, "loss": 3.1893, "step": 478 }, { "epoch": 0.11034959396417669, "grad_norm": 1.335480809211731, "learning_rate": 4.998362720220684e-05, "loss": 3.2589, "step": 479 }, { "epoch": 0.11057996889938375, "grad_norm": 1.2530442476272583, "learning_rate": 4.998289152090275e-05, "loss": 3.1996, "step": 480 }, { "epoch": 0.11081034383459079, "grad_norm": 1.2685022354125977, "learning_rate": 4.99821396782371e-05, "loss": 3.2309, "step": 481 }, { "epoch": 0.11104071876979785, "grad_norm": 1.1864712238311768, "learning_rate": 4.998137167469625e-05, "loss": 3.2285, "step": 482 }, { "epoch": 0.11127109370500489, "grad_norm": 1.2745239734649658, "learning_rate": 4.998058751077705e-05, "loss": 3.1204, "step": 483 }, { "epoch": 0.11150146864021195, "grad_norm": 1.1159403324127197, "learning_rate": 4.997978718698673e-05, "loss": 3.1841, "step": 484 }, { "epoch": 0.11173184357541899, "grad_norm": 1.1317418813705444, "learning_rate": 4.997897070384304e-05, "loss": 3.1207, "step": 485 }, { "epoch": 0.11196221851062604, "grad_norm": 1.3020508289337158, "learning_rate": 4.997813806187416e-05, "loss": 3.1465, "step": 486 }, { "epoch": 0.1121925934458331, "grad_norm": 1.3162682056427002, "learning_rate": 4.997728926161872e-05, "loss": 3.1314, "step": 487 }, { "epoch": 0.11242296838104014, "grad_norm": 1.150675654411316, "learning_rate": 4.9976424303625815e-05, "loss": 3.2067, "step": 488 }, { "epoch": 0.1126533433162472, "grad_norm": 1.2268855571746826, "learning_rate": 4.997554318845497e-05, "loss": 3.1353, "step": 489 }, { "epoch": 0.11288371825145424, "grad_norm": 1.543678879737854, "learning_rate": 4.997464591667619e-05, "loss": 3.1538, "step": 490 }, { "epoch": 0.1131140931866613, "grad_norm": 1.0117371082305908, "learning_rate": 4.9973732488869904e-05, "loss": 3.1005, "step": 491 }, { "epoch": 0.11334446812186834, "grad_norm": 1.1045228242874146, "learning_rate": 4.9972802905627016e-05, "loss": 3.0636, "step": 492 }, { "epoch": 0.1135748430570754, "grad_norm": 1.5203276872634888, "learning_rate": 4.997185716754887e-05, "loss": 3.1568, "step": 493 }, { "epoch": 0.11380521799228244, "grad_norm": 1.5281258821487427, "learning_rate": 4.997089527524725e-05, "loss": 3.0939, "step": 494 }, { "epoch": 0.11403559292748949, "grad_norm": 1.1460622549057007, "learning_rate": 4.996991722934442e-05, "loss": 3.1042, "step": 495 }, { "epoch": 0.11426596786269654, "grad_norm": 1.2073392868041992, "learning_rate": 4.996892303047306e-05, "loss": 3.1116, "step": 496 }, { "epoch": 0.11449634279790359, "grad_norm": 1.2669086456298828, "learning_rate": 4.9967912679276316e-05, "loss": 3.1108, "step": 497 }, { "epoch": 0.11472671773311063, "grad_norm": 1.09354829788208, "learning_rate": 4.99668861764078e-05, "loss": 3.0616, "step": 498 }, { "epoch": 0.11495709266831769, "grad_norm": 1.497175693511963, "learning_rate": 4.996584352253153e-05, "loss": 3.0822, "step": 499 }, { "epoch": 0.11518746760352473, "grad_norm": 1.4674029350280762, "learning_rate": 4.9964784718322e-05, "loss": 3.0658, "step": 500 }, { "epoch": 0.11541784253873179, "grad_norm": 1.2889704704284668, "learning_rate": 4.996370976446415e-05, "loss": 3.0911, "step": 501 }, { "epoch": 0.11564821747393883, "grad_norm": 1.1823532581329346, "learning_rate": 4.9962618661653374e-05, "loss": 3.034, "step": 502 }, { "epoch": 0.11587859240914589, "grad_norm": 1.1607518196105957, "learning_rate": 4.99615114105955e-05, "loss": 3.0247, "step": 503 }, { "epoch": 0.11610896734435293, "grad_norm": 1.2165247201919556, "learning_rate": 4.9960388012006784e-05, "loss": 2.9751, "step": 504 }, { "epoch": 0.11633934227955998, "grad_norm": 1.3248049020767212, "learning_rate": 4.995924846661398e-05, "loss": 3.0553, "step": 505 }, { "epoch": 0.11656971721476703, "grad_norm": 0.9891802668571472, "learning_rate": 4.995809277515424e-05, "loss": 3.0277, "step": 506 }, { "epoch": 0.11680009214997408, "grad_norm": 1.3908474445343018, "learning_rate": 4.995692093837518e-05, "loss": 3.0772, "step": 507 }, { "epoch": 0.11703046708518114, "grad_norm": 1.1450340747833252, "learning_rate": 4.995573295703487e-05, "loss": 3.0131, "step": 508 }, { "epoch": 0.11726084202038818, "grad_norm": 1.2157433032989502, "learning_rate": 4.9954528831901795e-05, "loss": 2.9749, "step": 509 }, { "epoch": 0.11749121695559524, "grad_norm": 1.081217885017395, "learning_rate": 4.99533085637549e-05, "loss": 2.9403, "step": 510 }, { "epoch": 0.11772159189080228, "grad_norm": 1.0841819047927856, "learning_rate": 4.9952072153383575e-05, "loss": 2.9517, "step": 511 }, { "epoch": 0.11795196682600934, "grad_norm": 1.330822229385376, "learning_rate": 4.995081960158766e-05, "loss": 3.0223, "step": 512 }, { "epoch": 0.11818234176121638, "grad_norm": 1.1728214025497437, "learning_rate": 4.994955090917742e-05, "loss": 2.9141, "step": 513 }, { "epoch": 0.11841271669642343, "grad_norm": 1.323855996131897, "learning_rate": 4.994826607697358e-05, "loss": 2.9322, "step": 514 }, { "epoch": 0.11864309163163048, "grad_norm": 0.9319844841957092, "learning_rate": 4.9946965105807275e-05, "loss": 2.9206, "step": 515 }, { "epoch": 0.11887346656683753, "grad_norm": 1.2361253499984741, "learning_rate": 4.994564799652011e-05, "loss": 2.9634, "step": 516 }, { "epoch": 0.11910384150204457, "grad_norm": 1.2407070398330688, "learning_rate": 4.994431474996412e-05, "loss": 2.9582, "step": 517 }, { "epoch": 0.11933421643725163, "grad_norm": 1.2262343168258667, "learning_rate": 4.994296536700177e-05, "loss": 2.8793, "step": 518 }, { "epoch": 0.11956459137245867, "grad_norm": 1.3256343603134155, "learning_rate": 4.9941599848505985e-05, "loss": 2.9273, "step": 519 }, { "epoch": 0.11979496630766573, "grad_norm": 1.1419692039489746, "learning_rate": 4.99402181953601e-05, "loss": 2.9052, "step": 520 }, { "epoch": 0.12002534124287277, "grad_norm": 1.1616865396499634, "learning_rate": 4.993882040845792e-05, "loss": 2.9021, "step": 521 }, { "epoch": 0.12025571617807983, "grad_norm": 1.2539517879486084, "learning_rate": 4.993740648870365e-05, "loss": 2.9061, "step": 522 }, { "epoch": 0.12048609111328687, "grad_norm": 1.1511468887329102, "learning_rate": 4.993597643701198e-05, "loss": 2.8316, "step": 523 }, { "epoch": 0.12071646604849393, "grad_norm": 1.22184419631958, "learning_rate": 4.993453025430797e-05, "loss": 2.8681, "step": 524 }, { "epoch": 0.12094684098370097, "grad_norm": 1.0296560525894165, "learning_rate": 4.993306794152717e-05, "loss": 2.8283, "step": 525 }, { "epoch": 0.12117721591890802, "grad_norm": 1.2369314432144165, "learning_rate": 4.993158949961555e-05, "loss": 2.8632, "step": 526 }, { "epoch": 0.12140759085411507, "grad_norm": 1.2746834754943848, "learning_rate": 4.9930094929529506e-05, "loss": 2.8795, "step": 527 }, { "epoch": 0.12163796578932212, "grad_norm": 1.4031105041503906, "learning_rate": 4.9928584232235866e-05, "loss": 2.8351, "step": 528 }, { "epoch": 0.12186834072452916, "grad_norm": 1.3131935596466064, "learning_rate": 4.9927057408711904e-05, "loss": 2.8936, "step": 529 }, { "epoch": 0.12209871565973622, "grad_norm": 1.5465071201324463, "learning_rate": 4.992551445994531e-05, "loss": 2.8346, "step": 530 }, { "epoch": 0.12232909059494328, "grad_norm": 1.4071685075759888, "learning_rate": 4.992395538693422e-05, "loss": 2.7942, "step": 531 }, { "epoch": 0.12255946553015032, "grad_norm": 1.239355444908142, "learning_rate": 4.992238019068718e-05, "loss": 2.7974, "step": 532 }, { "epoch": 0.12278984046535737, "grad_norm": 0.9831039309501648, "learning_rate": 4.99207888722232e-05, "loss": 2.7854, "step": 533 }, { "epoch": 0.12302021540056442, "grad_norm": 1.167510747909546, "learning_rate": 4.9919181432571686e-05, "loss": 2.7574, "step": 534 }, { "epoch": 0.12325059033577147, "grad_norm": 1.2628921270370483, "learning_rate": 4.991755787277249e-05, "loss": 2.823, "step": 535 }, { "epoch": 0.12348096527097852, "grad_norm": 1.273445725440979, "learning_rate": 4.991591819387589e-05, "loss": 2.7734, "step": 536 }, { "epoch": 0.12371134020618557, "grad_norm": 1.336830735206604, "learning_rate": 4.991426239694259e-05, "loss": 2.7779, "step": 537 }, { "epoch": 0.12394171514139261, "grad_norm": 1.0944422483444214, "learning_rate": 4.991259048304372e-05, "loss": 2.7131, "step": 538 }, { "epoch": 0.12417209007659967, "grad_norm": 1.1006290912628174, "learning_rate": 4.9910902453260824e-05, "loss": 2.7407, "step": 539 }, { "epoch": 0.12440246501180671, "grad_norm": 1.2868058681488037, "learning_rate": 4.99091983086859e-05, "loss": 2.7633, "step": 540 }, { "epoch": 0.12463283994701377, "grad_norm": 1.5736297369003296, "learning_rate": 4.990747805042135e-05, "loss": 2.7985, "step": 541 }, { "epoch": 0.12486321488222081, "grad_norm": 1.593061089515686, "learning_rate": 4.9905741679580007e-05, "loss": 2.717, "step": 542 }, { "epoch": 0.12509358981742785, "grad_norm": 2.191934585571289, "learning_rate": 4.990398919728511e-05, "loss": 2.8053, "step": 543 }, { "epoch": 0.1253239647526349, "grad_norm": 1.1305919885635376, "learning_rate": 4.990222060467035e-05, "loss": 2.7147, "step": 544 }, { "epoch": 0.12555433968784196, "grad_norm": 1.8921200037002563, "learning_rate": 4.9900435902879825e-05, "loss": 2.764, "step": 545 }, { "epoch": 0.12578471462304902, "grad_norm": 2.211972951889038, "learning_rate": 4.9898635093068036e-05, "loss": 2.7865, "step": 546 }, { "epoch": 0.12601508955825605, "grad_norm": 1.0959868431091309, "learning_rate": 4.989681817639994e-05, "loss": 2.747, "step": 547 }, { "epoch": 0.1262454644934631, "grad_norm": 1.8587756156921387, "learning_rate": 4.9894985154050887e-05, "loss": 2.7143, "step": 548 }, { "epoch": 0.12647583942867016, "grad_norm": 1.9631823301315308, "learning_rate": 4.989313602720666e-05, "loss": 2.7737, "step": 549 }, { "epoch": 0.12670621436387722, "grad_norm": 1.4554961919784546, "learning_rate": 4.989127079706345e-05, "loss": 2.6923, "step": 550 }, { "epoch": 0.12693658929908425, "grad_norm": 1.3169633150100708, "learning_rate": 4.988938946482786e-05, "loss": 2.6665, "step": 551 }, { "epoch": 0.1271669642342913, "grad_norm": 1.1875041723251343, "learning_rate": 4.988749203171693e-05, "loss": 2.6689, "step": 552 }, { "epoch": 0.12739733916949836, "grad_norm": 1.7975043058395386, "learning_rate": 4.988557849895811e-05, "loss": 2.7185, "step": 553 }, { "epoch": 0.12762771410470541, "grad_norm": 1.5742747783660889, "learning_rate": 4.988364886778925e-05, "loss": 2.5844, "step": 554 }, { "epoch": 0.12785808903991247, "grad_norm": 1.1302361488342285, "learning_rate": 4.988170313945861e-05, "loss": 2.5981, "step": 555 }, { "epoch": 0.1280884639751195, "grad_norm": 1.9412686824798584, "learning_rate": 4.98797413152249e-05, "loss": 2.6428, "step": 556 }, { "epoch": 0.12831883891032655, "grad_norm": 2.3457326889038086, "learning_rate": 4.98777633963572e-05, "loss": 2.6634, "step": 557 }, { "epoch": 0.1285492138455336, "grad_norm": 3.9170572757720947, "learning_rate": 4.987576938413504e-05, "loss": 2.6306, "step": 558 }, { "epoch": 0.12877958878074067, "grad_norm": 7.513334274291992, "learning_rate": 4.987375927984832e-05, "loss": 2.7079, "step": 559 }, { "epoch": 0.1290099637159477, "grad_norm": 2.340204954147339, "learning_rate": 4.987173308479738e-05, "loss": 2.6856, "step": 560 }, { "epoch": 0.12924033865115475, "grad_norm": 2.6999058723449707, "learning_rate": 4.9869690800292965e-05, "loss": 2.6142, "step": 561 }, { "epoch": 0.1294707135863618, "grad_norm": 2.237706184387207, "learning_rate": 4.9867632427656206e-05, "loss": 2.6379, "step": 562 }, { "epoch": 0.12970108852156886, "grad_norm": 1.8714499473571777, "learning_rate": 4.986555796821868e-05, "loss": 2.6603, "step": 563 }, { "epoch": 0.1299314634567759, "grad_norm": 1.3226529359817505, "learning_rate": 4.986346742332234e-05, "loss": 2.5705, "step": 564 }, { "epoch": 0.13016183839198295, "grad_norm": 1.4232512712478638, "learning_rate": 4.9861360794319546e-05, "loss": 2.5757, "step": 565 }, { "epoch": 0.13039221332719, "grad_norm": 1.312693476676941, "learning_rate": 4.985923808257308e-05, "loss": 2.5911, "step": 566 }, { "epoch": 0.13062258826239706, "grad_norm": 1.4455422163009644, "learning_rate": 4.985709928945611e-05, "loss": 2.6071, "step": 567 }, { "epoch": 0.1308529631976041, "grad_norm": 1.5549144744873047, "learning_rate": 4.9854944416352225e-05, "loss": 2.5684, "step": 568 }, { "epoch": 0.13108333813281114, "grad_norm": 1.2311406135559082, "learning_rate": 4.98527734646554e-05, "loss": 2.5054, "step": 569 }, { "epoch": 0.1313137130680182, "grad_norm": 3.2267868518829346, "learning_rate": 4.985058643577002e-05, "loss": 2.4995, "step": 570 }, { "epoch": 0.13154408800322526, "grad_norm": 2.401266574859619, "learning_rate": 4.9848383331110867e-05, "loss": 2.5772, "step": 571 }, { "epoch": 0.13177446293843229, "grad_norm": 1.0504907369613647, "learning_rate": 4.984616415210312e-05, "loss": 2.5479, "step": 572 }, { "epoch": 0.13200483787363934, "grad_norm": 1.7297791242599487, "learning_rate": 4.984392890018236e-05, "loss": 2.5015, "step": 573 }, { "epoch": 0.1322352128088464, "grad_norm": 1.463789939880371, "learning_rate": 4.984167757679458e-05, "loss": 2.5321, "step": 574 }, { "epoch": 0.13246558774405345, "grad_norm": 1.2236517667770386, "learning_rate": 4.983941018339613e-05, "loss": 2.5032, "step": 575 }, { "epoch": 0.1326959626792605, "grad_norm": 1.2850890159606934, "learning_rate": 4.983712672145379e-05, "loss": 2.5357, "step": 576 }, { "epoch": 0.13292633761446754, "grad_norm": 1.280199646949768, "learning_rate": 4.9834827192444733e-05, "loss": 2.4562, "step": 577 }, { "epoch": 0.1331567125496746, "grad_norm": 1.2631950378417969, "learning_rate": 4.983251159785651e-05, "loss": 2.4846, "step": 578 }, { "epoch": 0.13338708748488165, "grad_norm": 1.3578568696975708, "learning_rate": 4.983017993918708e-05, "loss": 2.4877, "step": 579 }, { "epoch": 0.1336174624200887, "grad_norm": 1.338599681854248, "learning_rate": 4.982783221794478e-05, "loss": 2.5567, "step": 580 }, { "epoch": 0.13384783735529573, "grad_norm": 1.8400907516479492, "learning_rate": 4.982546843564834e-05, "loss": 2.5778, "step": 581 }, { "epoch": 0.1340782122905028, "grad_norm": 1.0268421173095703, "learning_rate": 4.9823088593826896e-05, "loss": 2.4514, "step": 582 }, { "epoch": 0.13430858722570985, "grad_norm": 1.3791072368621826, "learning_rate": 4.9820692694019956e-05, "loss": 2.5332, "step": 583 }, { "epoch": 0.1345389621609169, "grad_norm": 1.0470119714736938, "learning_rate": 4.981828073777741e-05, "loss": 2.4758, "step": 584 }, { "epoch": 0.13476933709612393, "grad_norm": 1.4773871898651123, "learning_rate": 4.9815852726659564e-05, "loss": 2.4755, "step": 585 }, { "epoch": 0.134999712031331, "grad_norm": 1.2702157497406006, "learning_rate": 4.981340866223707e-05, "loss": 2.4263, "step": 586 }, { "epoch": 0.13523008696653804, "grad_norm": 1.0748322010040283, "learning_rate": 4.981094854609101e-05, "loss": 2.4342, "step": 587 }, { "epoch": 0.1354604619017451, "grad_norm": 1.3905810117721558, "learning_rate": 4.980847237981281e-05, "loss": 2.4427, "step": 588 }, { "epoch": 0.13569083683695213, "grad_norm": 1.0650360584259033, "learning_rate": 4.9805980165004304e-05, "loss": 2.4359, "step": 589 }, { "epoch": 0.13592121177215918, "grad_norm": 1.243291974067688, "learning_rate": 4.9803471903277696e-05, "loss": 2.4253, "step": 590 }, { "epoch": 0.13615158670736624, "grad_norm": 1.1497853994369507, "learning_rate": 4.980094759625556e-05, "loss": 2.4613, "step": 591 }, { "epoch": 0.1363819616425733, "grad_norm": 1.1312801837921143, "learning_rate": 4.979840724557089e-05, "loss": 2.4168, "step": 592 }, { "epoch": 0.13661233657778032, "grad_norm": 1.2346746921539307, "learning_rate": 4.9795850852867004e-05, "loss": 2.4698, "step": 593 }, { "epoch": 0.13684271151298738, "grad_norm": 1.1483747959136963, "learning_rate": 4.979327841979764e-05, "loss": 2.4247, "step": 594 }, { "epoch": 0.13707308644819444, "grad_norm": 0.9811630845069885, "learning_rate": 4.97906899480269e-05, "loss": 2.4614, "step": 595 }, { "epoch": 0.1373034613834015, "grad_norm": 1.1860783100128174, "learning_rate": 4.978808543922925e-05, "loss": 2.4393, "step": 596 }, { "epoch": 0.13753383631860855, "grad_norm": 1.069810390472412, "learning_rate": 4.978546489508955e-05, "loss": 2.4205, "step": 597 }, { "epoch": 0.13776421125381558, "grad_norm": 1.1588655710220337, "learning_rate": 4.978282831730301e-05, "loss": 2.4084, "step": 598 }, { "epoch": 0.13799458618902263, "grad_norm": 0.9729504585266113, "learning_rate": 4.9780175707575234e-05, "loss": 2.3952, "step": 599 }, { "epoch": 0.1382249611242297, "grad_norm": 1.0078094005584717, "learning_rate": 4.9777507067622186e-05, "loss": 2.4064, "step": 600 }, { "epoch": 0.13845533605943675, "grad_norm": 1.0245592594146729, "learning_rate": 4.9774822399170196e-05, "loss": 2.4058, "step": 601 }, { "epoch": 0.13868571099464377, "grad_norm": 1.0388532876968384, "learning_rate": 4.977212170395598e-05, "loss": 2.4131, "step": 602 }, { "epoch": 0.13891608592985083, "grad_norm": 1.072117805480957, "learning_rate": 4.976940498372659e-05, "loss": 2.3742, "step": 603 }, { "epoch": 0.13914646086505789, "grad_norm": 1.363053798675537, "learning_rate": 4.9766672240239485e-05, "loss": 2.4335, "step": 604 }, { "epoch": 0.13937683580026494, "grad_norm": 0.9123207330703735, "learning_rate": 4.9763923475262464e-05, "loss": 2.3597, "step": 605 }, { "epoch": 0.13960721073547197, "grad_norm": 1.0962797403335571, "learning_rate": 4.976115869057368e-05, "loss": 2.3566, "step": 606 }, { "epoch": 0.13983758567067903, "grad_norm": 1.0829681158065796, "learning_rate": 4.975837788796169e-05, "loss": 2.4351, "step": 607 }, { "epoch": 0.14006796060588608, "grad_norm": 0.9290595054626465, "learning_rate": 4.975558106922538e-05, "loss": 2.3775, "step": 608 }, { "epoch": 0.14029833554109314, "grad_norm": 1.0513983964920044, "learning_rate": 4.9752768236173983e-05, "loss": 2.4134, "step": 609 }, { "epoch": 0.14052871047630017, "grad_norm": 0.8620465397834778, "learning_rate": 4.974993939062713e-05, "loss": 2.3663, "step": 610 }, { "epoch": 0.14075908541150722, "grad_norm": 1.1166764497756958, "learning_rate": 4.97470945344148e-05, "loss": 2.3906, "step": 611 }, { "epoch": 0.14098946034671428, "grad_norm": 10.560090065002441, "learning_rate": 4.974423366937731e-05, "loss": 2.451, "step": 612 }, { "epoch": 0.14121983528192134, "grad_norm": 1.534456491470337, "learning_rate": 4.9741356797365345e-05, "loss": 2.4159, "step": 613 }, { "epoch": 0.14145021021712836, "grad_norm": 1.037190556526184, "learning_rate": 4.9738463920239955e-05, "loss": 2.4027, "step": 614 }, { "epoch": 0.14168058515233542, "grad_norm": 1.0326586961746216, "learning_rate": 4.973555503987252e-05, "loss": 2.3827, "step": 615 }, { "epoch": 0.14191096008754248, "grad_norm": 0.9757535457611084, "learning_rate": 4.97326301581448e-05, "loss": 2.4046, "step": 616 }, { "epoch": 0.14214133502274953, "grad_norm": 1.0059373378753662, "learning_rate": 4.972968927694889e-05, "loss": 2.328, "step": 617 }, { "epoch": 0.1423717099579566, "grad_norm": 0.858684241771698, "learning_rate": 4.9726732398187234e-05, "loss": 2.3644, "step": 618 }, { "epoch": 0.14260208489316362, "grad_norm": 0.7890450954437256, "learning_rate": 4.972375952377263e-05, "loss": 2.3704, "step": 619 }, { "epoch": 0.14283245982837067, "grad_norm": 0.8138490319252014, "learning_rate": 4.972077065562821e-05, "loss": 2.376, "step": 620 }, { "epoch": 0.14306283476357773, "grad_norm": 0.7049270868301392, "learning_rate": 4.9717765795687495e-05, "loss": 2.4188, "step": 621 }, { "epoch": 0.14329320969878478, "grad_norm": 0.764702558517456, "learning_rate": 4.971474494589429e-05, "loss": 2.3425, "step": 622 }, { "epoch": 0.1435235846339918, "grad_norm": 0.75132155418396, "learning_rate": 4.971170810820279e-05, "loss": 2.3559, "step": 623 }, { "epoch": 0.14375395956919887, "grad_norm": 0.7658779621124268, "learning_rate": 4.970865528457751e-05, "loss": 2.3178, "step": 624 }, { "epoch": 0.14398433450440593, "grad_norm": 0.7456948161125183, "learning_rate": 4.970558647699332e-05, "loss": 2.301, "step": 625 }, { "epoch": 0.14421470943961298, "grad_norm": 0.7142807841300964, "learning_rate": 4.970250168743541e-05, "loss": 2.293, "step": 626 }, { "epoch": 0.14444508437482, "grad_norm": 0.713147759437561, "learning_rate": 4.969940091789933e-05, "loss": 2.3126, "step": 627 }, { "epoch": 0.14467545931002707, "grad_norm": 0.7684802412986755, "learning_rate": 4.969628417039096e-05, "loss": 2.306, "step": 628 }, { "epoch": 0.14490583424523412, "grad_norm": 0.8344357013702393, "learning_rate": 4.969315144692651e-05, "loss": 2.2223, "step": 629 }, { "epoch": 0.14513620918044118, "grad_norm": 0.8178191184997559, "learning_rate": 4.969000274953254e-05, "loss": 2.3257, "step": 630 }, { "epoch": 0.1453665841156482, "grad_norm": 0.9177209138870239, "learning_rate": 4.968683808024593e-05, "loss": 2.3001, "step": 631 }, { "epoch": 0.14559695905085526, "grad_norm": 0.957196831703186, "learning_rate": 4.9683657441113884e-05, "loss": 2.3463, "step": 632 }, { "epoch": 0.14582733398606232, "grad_norm": 0.9687195420265198, "learning_rate": 4.9680460834193964e-05, "loss": 2.3094, "step": 633 }, { "epoch": 0.14605770892126937, "grad_norm": 0.8334886431694031, "learning_rate": 4.967724826155404e-05, "loss": 2.3037, "step": 634 }, { "epoch": 0.1462880838564764, "grad_norm": 0.8597080111503601, "learning_rate": 4.967401972527231e-05, "loss": 2.3541, "step": 635 }, { "epoch": 0.14651845879168346, "grad_norm": 0.8464825749397278, "learning_rate": 4.967077522743731e-05, "loss": 2.3293, "step": 636 }, { "epoch": 0.14674883372689052, "grad_norm": 0.8050293922424316, "learning_rate": 4.96675147701479e-05, "loss": 2.2974, "step": 637 }, { "epoch": 0.14697920866209757, "grad_norm": 0.8256909251213074, "learning_rate": 4.9664238355513255e-05, "loss": 2.2813, "step": 638 }, { "epoch": 0.1472095835973046, "grad_norm": 0.7616457939147949, "learning_rate": 4.966094598565288e-05, "loss": 2.2242, "step": 639 }, { "epoch": 0.14743995853251166, "grad_norm": 0.7411653399467468, "learning_rate": 4.96576376626966e-05, "loss": 2.2182, "step": 640 }, { "epoch": 0.1476703334677187, "grad_norm": 0.6901570558547974, "learning_rate": 4.965431338878456e-05, "loss": 2.2387, "step": 641 }, { "epoch": 0.14790070840292577, "grad_norm": 1.0186954736709595, "learning_rate": 4.965097316606722e-05, "loss": 2.3118, "step": 642 }, { "epoch": 0.14813108333813282, "grad_norm": 0.972894012928009, "learning_rate": 4.964761699670537e-05, "loss": 2.2718, "step": 643 }, { "epoch": 0.14836145827333985, "grad_norm": 1.0664464235305786, "learning_rate": 4.964424488287009e-05, "loss": 2.234, "step": 644 }, { "epoch": 0.1485918332085469, "grad_norm": 0.7914363145828247, "learning_rate": 4.9640856826742816e-05, "loss": 2.3304, "step": 645 }, { "epoch": 0.14882220814375396, "grad_norm": 0.9038671851158142, "learning_rate": 4.9637452830515245e-05, "loss": 2.2944, "step": 646 }, { "epoch": 0.14905258307896102, "grad_norm": 0.8845005631446838, "learning_rate": 4.963403289638943e-05, "loss": 2.2336, "step": 647 }, { "epoch": 0.14928295801416805, "grad_norm": 0.8158465623855591, "learning_rate": 4.963059702657771e-05, "loss": 2.2773, "step": 648 }, { "epoch": 0.1495133329493751, "grad_norm": 0.777198076248169, "learning_rate": 4.962714522330274e-05, "loss": 2.323, "step": 649 }, { "epoch": 0.14974370788458216, "grad_norm": 0.6728597283363342, "learning_rate": 4.962367748879748e-05, "loss": 2.2618, "step": 650 }, { "epoch": 0.14997408281978922, "grad_norm": 0.7147519588470459, "learning_rate": 4.962019382530521e-05, "loss": 2.256, "step": 651 }, { "epoch": 0.15020445775499625, "grad_norm": 0.6552104949951172, "learning_rate": 4.961669423507947e-05, "loss": 2.2673, "step": 652 }, { "epoch": 0.1504348326902033, "grad_norm": 0.7152547240257263, "learning_rate": 4.961317872038418e-05, "loss": 2.2902, "step": 653 }, { "epoch": 0.15066520762541036, "grad_norm": 0.6445720195770264, "learning_rate": 4.960964728349348e-05, "loss": 2.2082, "step": 654 }, { "epoch": 0.15089558256061741, "grad_norm": 0.6637941002845764, "learning_rate": 4.960609992669186e-05, "loss": 2.2409, "step": 655 }, { "epoch": 0.15112595749582444, "grad_norm": 0.6085172891616821, "learning_rate": 4.96025366522741e-05, "loss": 2.2739, "step": 656 }, { "epoch": 0.1513563324310315, "grad_norm": 0.8219171166419983, "learning_rate": 4.959895746254527e-05, "loss": 2.2355, "step": 657 }, { "epoch": 0.15158670736623855, "grad_norm": 0.6857849359512329, "learning_rate": 4.9595362359820727e-05, "loss": 2.2757, "step": 658 }, { "epoch": 0.1518170823014456, "grad_norm": 0.6388952136039734, "learning_rate": 4.959175134642614e-05, "loss": 2.2578, "step": 659 }, { "epoch": 0.15204745723665264, "grad_norm": 0.8845183849334717, "learning_rate": 4.958812442469747e-05, "loss": 2.1651, "step": 660 }, { "epoch": 0.1522778321718597, "grad_norm": 0.9298895597457886, "learning_rate": 4.9584481596980955e-05, "loss": 2.2929, "step": 661 }, { "epoch": 0.15250820710706675, "grad_norm": 0.8152828216552734, "learning_rate": 4.9580822865633125e-05, "loss": 2.2463, "step": 662 }, { "epoch": 0.1527385820422738, "grad_norm": 0.8429036140441895, "learning_rate": 4.957714823302081e-05, "loss": 2.2816, "step": 663 }, { "epoch": 0.15296895697748086, "grad_norm": 0.8883467316627502, "learning_rate": 4.957345770152113e-05, "loss": 2.2754, "step": 664 }, { "epoch": 0.1531993319126879, "grad_norm": 1.0276157855987549, "learning_rate": 4.9569751273521454e-05, "loss": 2.2802, "step": 665 }, { "epoch": 0.15342970684789495, "grad_norm": 0.9960843324661255, "learning_rate": 4.9566028951419474e-05, "loss": 2.2163, "step": 666 }, { "epoch": 0.153660081783102, "grad_norm": 1.1139601469039917, "learning_rate": 4.956229073762315e-05, "loss": 2.2817, "step": 667 }, { "epoch": 0.15389045671830906, "grad_norm": 1.0659136772155762, "learning_rate": 4.955853663455072e-05, "loss": 2.3005, "step": 668 }, { "epoch": 0.1541208316535161, "grad_norm": 0.8750963807106018, "learning_rate": 4.9554766644630713e-05, "loss": 2.269, "step": 669 }, { "epoch": 0.15435120658872314, "grad_norm": 0.8922746777534485, "learning_rate": 4.9550980770301905e-05, "loss": 2.2577, "step": 670 }, { "epoch": 0.1545815815239302, "grad_norm": 0.8554069995880127, "learning_rate": 4.954717901401338e-05, "loss": 2.2603, "step": 671 }, { "epoch": 0.15481195645913726, "grad_norm": 0.8567933440208435, "learning_rate": 4.954336137822449e-05, "loss": 2.2478, "step": 672 }, { "epoch": 0.15504233139434428, "grad_norm": 0.8024277091026306, "learning_rate": 4.953952786540483e-05, "loss": 2.2385, "step": 673 }, { "epoch": 0.15527270632955134, "grad_norm": 1.150817632675171, "learning_rate": 4.9535678478034307e-05, "loss": 2.2122, "step": 674 }, { "epoch": 0.1555030812647584, "grad_norm": 0.7103257179260254, "learning_rate": 4.953181321860307e-05, "loss": 2.2366, "step": 675 }, { "epoch": 0.15573345619996545, "grad_norm": 1.3318164348602295, "learning_rate": 4.952793208961156e-05, "loss": 2.3029, "step": 676 }, { "epoch": 0.15596383113517248, "grad_norm": 0.7148452401161194, "learning_rate": 4.952403509357044e-05, "loss": 2.2697, "step": 677 }, { "epoch": 0.15619420607037954, "grad_norm": 0.7653288841247559, "learning_rate": 4.95201222330007e-05, "loss": 2.2357, "step": 678 }, { "epoch": 0.1564245810055866, "grad_norm": 0.7247732877731323, "learning_rate": 4.9516193510433526e-05, "loss": 2.2355, "step": 679 }, { "epoch": 0.15665495594079365, "grad_norm": 0.7918069362640381, "learning_rate": 4.951224892841042e-05, "loss": 2.2545, "step": 680 }, { "epoch": 0.15688533087600068, "grad_norm": 0.7208665609359741, "learning_rate": 4.950828848948311e-05, "loss": 2.2278, "step": 681 }, { "epoch": 0.15711570581120773, "grad_norm": 0.7559713125228882, "learning_rate": 4.9504312196213596e-05, "loss": 2.255, "step": 682 }, { "epoch": 0.1573460807464148, "grad_norm": 0.9552430510520935, "learning_rate": 4.950032005117413e-05, "loss": 2.2696, "step": 683 }, { "epoch": 0.15757645568162185, "grad_norm": 0.9840549826622009, "learning_rate": 4.9496312056947226e-05, "loss": 2.2213, "step": 684 }, { "epoch": 0.1578068306168289, "grad_norm": 0.7679042816162109, "learning_rate": 4.9492288216125635e-05, "loss": 2.2328, "step": 685 }, { "epoch": 0.15803720555203593, "grad_norm": 0.6872347593307495, "learning_rate": 4.948824853131236e-05, "loss": 2.2168, "step": 686 }, { "epoch": 0.158267580487243, "grad_norm": 0.724403977394104, "learning_rate": 4.9484193005120684e-05, "loss": 2.2502, "step": 687 }, { "epoch": 0.15849795542245004, "grad_norm": 0.8040013313293457, "learning_rate": 4.94801216401741e-05, "loss": 2.2093, "step": 688 }, { "epoch": 0.1587283303576571, "grad_norm": 0.8830114603042603, "learning_rate": 4.947603443910637e-05, "loss": 2.2248, "step": 689 }, { "epoch": 0.15895870529286413, "grad_norm": 0.756533682346344, "learning_rate": 4.947193140456149e-05, "loss": 2.2078, "step": 690 }, { "epoch": 0.15918908022807118, "grad_norm": 0.6929740309715271, "learning_rate": 4.94678125391937e-05, "loss": 2.1737, "step": 691 }, { "epoch": 0.15941945516327824, "grad_norm": 0.7126024961471558, "learning_rate": 4.946367784566748e-05, "loss": 2.2657, "step": 692 }, { "epoch": 0.1596498300984853, "grad_norm": 0.6481022238731384, "learning_rate": 4.945952732665755e-05, "loss": 2.1506, "step": 693 }, { "epoch": 0.15988020503369232, "grad_norm": 0.5727748870849609, "learning_rate": 4.945536098484888e-05, "loss": 2.1775, "step": 694 }, { "epoch": 0.16011057996889938, "grad_norm": 0.6148141026496887, "learning_rate": 4.945117882293666e-05, "loss": 2.2352, "step": 695 }, { "epoch": 0.16034095490410644, "grad_norm": 0.5967652797698975, "learning_rate": 4.944698084362631e-05, "loss": 2.1842, "step": 696 }, { "epoch": 0.1605713298393135, "grad_norm": 0.5928661227226257, "learning_rate": 4.94427670496335e-05, "loss": 2.2163, "step": 697 }, { "epoch": 0.16080170477452052, "grad_norm": 0.5838789343833923, "learning_rate": 4.943853744368412e-05, "loss": 2.1837, "step": 698 }, { "epoch": 0.16103207970972758, "grad_norm": 0.6125690340995789, "learning_rate": 4.9434292028514294e-05, "loss": 2.1929, "step": 699 }, { "epoch": 0.16126245464493463, "grad_norm": 0.6911262273788452, "learning_rate": 4.943003080687035e-05, "loss": 2.1687, "step": 700 }, { "epoch": 0.1614928295801417, "grad_norm": 0.686281144618988, "learning_rate": 4.9425753781508886e-05, "loss": 2.1516, "step": 701 }, { "epoch": 0.16172320451534872, "grad_norm": 0.8202507495880127, "learning_rate": 4.942146095519669e-05, "loss": 2.197, "step": 702 }, { "epoch": 0.16195357945055577, "grad_norm": 0.9710200428962708, "learning_rate": 4.941715233071077e-05, "loss": 2.2108, "step": 703 }, { "epoch": 0.16218395438576283, "grad_norm": 1.1825032234191895, "learning_rate": 4.941282791083836e-05, "loss": 2.2635, "step": 704 }, { "epoch": 0.16241432932096989, "grad_norm": 0.7789471745491028, "learning_rate": 4.940848769837693e-05, "loss": 2.2122, "step": 705 }, { "epoch": 0.16264470425617694, "grad_norm": 0.9164131879806519, "learning_rate": 4.9404131696134145e-05, "loss": 2.2536, "step": 706 }, { "epoch": 0.16287507919138397, "grad_norm": 0.8298353552818298, "learning_rate": 4.939975990692789e-05, "loss": 2.2359, "step": 707 }, { "epoch": 0.16310545412659103, "grad_norm": 0.939236581325531, "learning_rate": 4.9395372333586255e-05, "loss": 2.2594, "step": 708 }, { "epoch": 0.16333582906179808, "grad_norm": 0.8118873238563538, "learning_rate": 4.939096897894756e-05, "loss": 2.2377, "step": 709 }, { "epoch": 0.16356620399700514, "grad_norm": 0.7632454633712769, "learning_rate": 4.938654984586032e-05, "loss": 2.1891, "step": 710 }, { "epoch": 0.16379657893221217, "grad_norm": 0.7089853882789612, "learning_rate": 4.9382114937183254e-05, "loss": 2.1991, "step": 711 }, { "epoch": 0.16402695386741922, "grad_norm": 0.7485292553901672, "learning_rate": 4.93776642557853e-05, "loss": 2.1994, "step": 712 }, { "epoch": 0.16425732880262628, "grad_norm": 0.6641004681587219, "learning_rate": 4.937319780454559e-05, "loss": 2.1503, "step": 713 }, { "epoch": 0.16448770373783334, "grad_norm": 0.8115439414978027, "learning_rate": 4.936871558635346e-05, "loss": 2.1661, "step": 714 }, { "epoch": 0.16471807867304036, "grad_norm": 0.6381528973579407, "learning_rate": 4.936421760410843e-05, "loss": 2.1787, "step": 715 }, { "epoch": 0.16494845360824742, "grad_norm": 0.9702723026275635, "learning_rate": 4.9359703860720243e-05, "loss": 2.183, "step": 716 }, { "epoch": 0.16517882854345448, "grad_norm": 0.6373147964477539, "learning_rate": 4.9355174359108834e-05, "loss": 2.2376, "step": 717 }, { "epoch": 0.16540920347866153, "grad_norm": 0.7607632875442505, "learning_rate": 4.935062910220431e-05, "loss": 2.2182, "step": 718 }, { "epoch": 0.16563957841386856, "grad_norm": 0.818124532699585, "learning_rate": 4.9346068092946996e-05, "loss": 2.2342, "step": 719 }, { "epoch": 0.16586995334907562, "grad_norm": 0.8324174284934998, "learning_rate": 4.9341491334287384e-05, "loss": 2.1724, "step": 720 }, { "epoch": 0.16610032828428267, "grad_norm": 0.9065958261489868, "learning_rate": 4.933689882918618e-05, "loss": 2.2077, "step": 721 }, { "epoch": 0.16633070321948973, "grad_norm": 0.7437676191329956, "learning_rate": 4.933229058061425e-05, "loss": 2.1565, "step": 722 }, { "epoch": 0.16656107815469676, "grad_norm": 0.6887902617454529, "learning_rate": 4.932766659155267e-05, "loss": 2.1618, "step": 723 }, { "epoch": 0.1667914530899038, "grad_norm": 0.7176859378814697, "learning_rate": 4.9323026864992675e-05, "loss": 2.1759, "step": 724 }, { "epoch": 0.16702182802511087, "grad_norm": 0.6586791276931763, "learning_rate": 4.9318371403935694e-05, "loss": 2.1271, "step": 725 }, { "epoch": 0.16725220296031792, "grad_norm": 0.6681697368621826, "learning_rate": 4.931370021139333e-05, "loss": 2.15, "step": 726 }, { "epoch": 0.16748257789552495, "grad_norm": 0.6991308927536011, "learning_rate": 4.930901329038737e-05, "loss": 2.2119, "step": 727 }, { "epoch": 0.167712952830732, "grad_norm": 0.6316118240356445, "learning_rate": 4.930431064394977e-05, "loss": 2.1505, "step": 728 }, { "epoch": 0.16794332776593907, "grad_norm": 0.5830152630805969, "learning_rate": 4.929959227512265e-05, "loss": 2.148, "step": 729 }, { "epoch": 0.16817370270114612, "grad_norm": 0.6181976199150085, "learning_rate": 4.929485818695831e-05, "loss": 2.2141, "step": 730 }, { "epoch": 0.16840407763635318, "grad_norm": 0.5857372879981995, "learning_rate": 4.929010838251923e-05, "loss": 2.1529, "step": 731 }, { "epoch": 0.1686344525715602, "grad_norm": 0.5849147439002991, "learning_rate": 4.928534286487802e-05, "loss": 2.1892, "step": 732 }, { "epoch": 0.16886482750676726, "grad_norm": 0.6379908323287964, "learning_rate": 4.928056163711752e-05, "loss": 2.1663, "step": 733 }, { "epoch": 0.16909520244197432, "grad_norm": 0.685780942440033, "learning_rate": 4.927576470233065e-05, "loss": 2.1259, "step": 734 }, { "epoch": 0.16932557737718137, "grad_norm": 0.5891405940055847, "learning_rate": 4.927095206362057e-05, "loss": 2.1828, "step": 735 }, { "epoch": 0.1695559523123884, "grad_norm": 0.5975565314292908, "learning_rate": 4.926612372410053e-05, "loss": 2.1444, "step": 736 }, { "epoch": 0.16978632724759546, "grad_norm": 0.6588759422302246, "learning_rate": 4.9261279686893994e-05, "loss": 2.1415, "step": 737 }, { "epoch": 0.17001670218280251, "grad_norm": 0.6326754093170166, "learning_rate": 4.925641995513455e-05, "loss": 2.1528, "step": 738 }, { "epoch": 0.17024707711800957, "grad_norm": 0.5835785269737244, "learning_rate": 4.925154453196593e-05, "loss": 2.1425, "step": 739 }, { "epoch": 0.1704774520532166, "grad_norm": 0.5475502610206604, "learning_rate": 4.924665342054204e-05, "loss": 2.1626, "step": 740 }, { "epoch": 0.17070782698842366, "grad_norm": 0.6812952756881714, "learning_rate": 4.924174662402694e-05, "loss": 2.1614, "step": 741 }, { "epoch": 0.1709382019236307, "grad_norm": 0.5551726818084717, "learning_rate": 4.9236824145594814e-05, "loss": 2.1588, "step": 742 }, { "epoch": 0.17116857685883777, "grad_norm": 0.6677431464195251, "learning_rate": 4.9231885988429984e-05, "loss": 2.1097, "step": 743 }, { "epoch": 0.1713989517940448, "grad_norm": 0.7117226719856262, "learning_rate": 4.922693215572695e-05, "loss": 2.1257, "step": 744 }, { "epoch": 0.17162932672925185, "grad_norm": 0.710252046585083, "learning_rate": 4.9221962650690324e-05, "loss": 2.1352, "step": 745 }, { "epoch": 0.1718597016644589, "grad_norm": 0.6591752767562866, "learning_rate": 4.9216977476534865e-05, "loss": 2.1181, "step": 746 }, { "epoch": 0.17209007659966596, "grad_norm": 0.5880843997001648, "learning_rate": 4.921197663648547e-05, "loss": 2.0865, "step": 747 }, { "epoch": 0.172320451534873, "grad_norm": 0.5900591015815735, "learning_rate": 4.920696013377716e-05, "loss": 2.097, "step": 748 }, { "epoch": 0.17255082647008005, "grad_norm": 0.6271031498908997, "learning_rate": 4.920192797165511e-05, "loss": 2.1257, "step": 749 }, { "epoch": 0.1727812014052871, "grad_norm": 0.684492826461792, "learning_rate": 4.91968801533746e-05, "loss": 2.0553, "step": 750 }, { "epoch": 0.17301157634049416, "grad_norm": 0.622170090675354, "learning_rate": 4.919181668220106e-05, "loss": 2.1407, "step": 751 }, { "epoch": 0.17324195127570122, "grad_norm": 0.8506683111190796, "learning_rate": 4.9186737561410025e-05, "loss": 2.0906, "step": 752 }, { "epoch": 0.17347232621090825, "grad_norm": 0.706804096698761, "learning_rate": 4.918164279428716e-05, "loss": 2.1594, "step": 753 }, { "epoch": 0.1737027011461153, "grad_norm": 0.6147509813308716, "learning_rate": 4.917653238412827e-05, "loss": 2.1643, "step": 754 }, { "epoch": 0.17393307608132236, "grad_norm": 0.658263087272644, "learning_rate": 4.917140633423926e-05, "loss": 2.1299, "step": 755 }, { "epoch": 0.1741634510165294, "grad_norm": 0.65981525182724, "learning_rate": 4.916626464793616e-05, "loss": 2.1534, "step": 756 }, { "epoch": 0.17439382595173644, "grad_norm": 0.6437305212020874, "learning_rate": 4.91611073285451e-05, "loss": 2.1466, "step": 757 }, { "epoch": 0.1746242008869435, "grad_norm": 0.6281527280807495, "learning_rate": 4.9155934379402335e-05, "loss": 2.1053, "step": 758 }, { "epoch": 0.17485457582215055, "grad_norm": 0.7242773771286011, "learning_rate": 4.915074580385425e-05, "loss": 2.1625, "step": 759 }, { "epoch": 0.1750849507573576, "grad_norm": 0.7637112736701965, "learning_rate": 4.9145541605257304e-05, "loss": 2.1081, "step": 760 }, { "epoch": 0.17531532569256464, "grad_norm": 0.6448433995246887, "learning_rate": 4.9140321786978086e-05, "loss": 2.0646, "step": 761 }, { "epoch": 0.1755457006277717, "grad_norm": 0.6671368479728699, "learning_rate": 4.913508635239327e-05, "loss": 2.1013, "step": 762 }, { "epoch": 0.17577607556297875, "grad_norm": 0.6111666560173035, "learning_rate": 4.912983530488966e-05, "loss": 2.1699, "step": 763 }, { "epoch": 0.1760064504981858, "grad_norm": 0.5790660977363586, "learning_rate": 4.9124568647864134e-05, "loss": 2.1449, "step": 764 }, { "epoch": 0.17623682543339284, "grad_norm": 0.5977033972740173, "learning_rate": 4.911928638472368e-05, "loss": 2.0924, "step": 765 }, { "epoch": 0.1764672003685999, "grad_norm": 0.6603258848190308, "learning_rate": 4.9113988518885375e-05, "loss": 2.1765, "step": 766 }, { "epoch": 0.17669757530380695, "grad_norm": 0.6226357817649841, "learning_rate": 4.91086750537764e-05, "loss": 2.0929, "step": 767 }, { "epoch": 0.176927950239014, "grad_norm": 0.6008521318435669, "learning_rate": 4.9103345992834014e-05, "loss": 2.0989, "step": 768 }, { "epoch": 0.17715832517422103, "grad_norm": 0.6286969780921936, "learning_rate": 4.909800133950557e-05, "loss": 2.15, "step": 769 }, { "epoch": 0.1773887001094281, "grad_norm": 0.5760012865066528, "learning_rate": 4.909264109724853e-05, "loss": 2.112, "step": 770 }, { "epoch": 0.17761907504463514, "grad_norm": 0.6309198141098022, "learning_rate": 4.908726526953039e-05, "loss": 2.113, "step": 771 }, { "epoch": 0.1778494499798422, "grad_norm": 0.6377184987068176, "learning_rate": 4.908187385982877e-05, "loss": 2.11, "step": 772 }, { "epoch": 0.17807982491504926, "grad_norm": 0.589045524597168, "learning_rate": 4.907646687163136e-05, "loss": 2.0826, "step": 773 }, { "epoch": 0.17831019985025628, "grad_norm": 0.6895807385444641, "learning_rate": 4.9071044308435927e-05, "loss": 2.1504, "step": 774 }, { "epoch": 0.17854057478546334, "grad_norm": 0.6089166402816772, "learning_rate": 4.90656061737503e-05, "loss": 2.1267, "step": 775 }, { "epoch": 0.1787709497206704, "grad_norm": 0.8886992335319519, "learning_rate": 4.9060152471092414e-05, "loss": 2.1391, "step": 776 }, { "epoch": 0.17900132465587745, "grad_norm": 0.6812739372253418, "learning_rate": 4.905468320399023e-05, "loss": 2.1352, "step": 777 }, { "epoch": 0.17923169959108448, "grad_norm": 0.7204179167747498, "learning_rate": 4.904919837598182e-05, "loss": 2.142, "step": 778 }, { "epoch": 0.17946207452629154, "grad_norm": 0.9123693704605103, "learning_rate": 4.904369799061529e-05, "loss": 2.142, "step": 779 }, { "epoch": 0.1796924494614986, "grad_norm": 0.9489081501960754, "learning_rate": 4.9038182051448834e-05, "loss": 2.1836, "step": 780 }, { "epoch": 0.17992282439670565, "grad_norm": 0.7568516731262207, "learning_rate": 4.903265056205069e-05, "loss": 2.192, "step": 781 }, { "epoch": 0.18015319933191268, "grad_norm": 0.6307718753814697, "learning_rate": 4.902710352599917e-05, "loss": 2.1024, "step": 782 }, { "epoch": 0.18038357426711973, "grad_norm": 0.6446706652641296, "learning_rate": 4.902154094688263e-05, "loss": 2.1005, "step": 783 }, { "epoch": 0.1806139492023268, "grad_norm": 0.6943814754486084, "learning_rate": 4.901596282829948e-05, "loss": 2.1337, "step": 784 }, { "epoch": 0.18084432413753385, "grad_norm": 0.7025820016860962, "learning_rate": 4.9010369173858204e-05, "loss": 2.1551, "step": 785 }, { "epoch": 0.18107469907274087, "grad_norm": 0.6562637090682983, "learning_rate": 4.9004759987177316e-05, "loss": 2.163, "step": 786 }, { "epoch": 0.18130507400794793, "grad_norm": 0.7729638814926147, "learning_rate": 4.8999135271885384e-05, "loss": 2.1723, "step": 787 }, { "epoch": 0.181535448943155, "grad_norm": 0.7694789171218872, "learning_rate": 4.899349503162102e-05, "loss": 2.1615, "step": 788 }, { "epoch": 0.18176582387836204, "grad_norm": 0.7201782464981079, "learning_rate": 4.898783927003286e-05, "loss": 2.1501, "step": 789 }, { "epoch": 0.18199619881356907, "grad_norm": 0.6488814949989319, "learning_rate": 4.898216799077964e-05, "loss": 2.1125, "step": 790 }, { "epoch": 0.18222657374877613, "grad_norm": 0.6475187540054321, "learning_rate": 4.897648119753006e-05, "loss": 2.1269, "step": 791 }, { "epoch": 0.18245694868398318, "grad_norm": 0.6358548998832703, "learning_rate": 4.89707788939629e-05, "loss": 2.1067, "step": 792 }, { "epoch": 0.18268732361919024, "grad_norm": 0.5440219640731812, "learning_rate": 4.896506108376697e-05, "loss": 2.0717, "step": 793 }, { "epoch": 0.18291769855439727, "grad_norm": 0.53034907579422, "learning_rate": 4.89593277706411e-05, "loss": 2.1126, "step": 794 }, { "epoch": 0.18314807348960432, "grad_norm": 0.589480996131897, "learning_rate": 4.895357895829415e-05, "loss": 2.1614, "step": 795 }, { "epoch": 0.18337844842481138, "grad_norm": 0.6854775547981262, "learning_rate": 4.894781465044502e-05, "loss": 2.146, "step": 796 }, { "epoch": 0.18360882336001844, "grad_norm": 0.657214343547821, "learning_rate": 4.8942034850822627e-05, "loss": 2.1392, "step": 797 }, { "epoch": 0.1838391982952255, "grad_norm": 0.5785201787948608, "learning_rate": 4.8936239563165895e-05, "loss": 2.0843, "step": 798 }, { "epoch": 0.18406957323043252, "grad_norm": 0.6410690546035767, "learning_rate": 4.893042879122378e-05, "loss": 2.1338, "step": 799 }, { "epoch": 0.18429994816563958, "grad_norm": 0.6272732615470886, "learning_rate": 4.892460253875526e-05, "loss": 2.1101, "step": 800 }, { "epoch": 0.18453032310084663, "grad_norm": 0.6744984984397888, "learning_rate": 4.891876080952932e-05, "loss": 2.1613, "step": 801 }, { "epoch": 0.1847606980360537, "grad_norm": 0.5647260546684265, "learning_rate": 4.891290360732495e-05, "loss": 2.1156, "step": 802 }, { "epoch": 0.18499107297126072, "grad_norm": 0.6140429377555847, "learning_rate": 4.890703093593118e-05, "loss": 2.0846, "step": 803 }, { "epoch": 0.18522144790646777, "grad_norm": 0.5427979230880737, "learning_rate": 4.8901142799147003e-05, "loss": 2.0972, "step": 804 }, { "epoch": 0.18545182284167483, "grad_norm": 0.5976928472518921, "learning_rate": 4.889523920078144e-05, "loss": 2.1406, "step": 805 }, { "epoch": 0.18568219777688189, "grad_norm": 0.5360948443412781, "learning_rate": 4.888932014465352e-05, "loss": 2.1067, "step": 806 }, { "epoch": 0.1859125727120889, "grad_norm": 0.5457767248153687, "learning_rate": 4.888338563459226e-05, "loss": 2.1416, "step": 807 }, { "epoch": 0.18614294764729597, "grad_norm": 0.5629678964614868, "learning_rate": 4.887743567443668e-05, "loss": 2.1153, "step": 808 }, { "epoch": 0.18637332258250303, "grad_norm": 0.5647518038749695, "learning_rate": 4.887147026803578e-05, "loss": 2.0925, "step": 809 }, { "epoch": 0.18660369751771008, "grad_norm": 0.5323279500007629, "learning_rate": 4.886548941924858e-05, "loss": 2.0536, "step": 810 }, { "epoch": 0.1868340724529171, "grad_norm": 0.5150105357170105, "learning_rate": 4.8859493131944067e-05, "loss": 2.0759, "step": 811 }, { "epoch": 0.18706444738812417, "grad_norm": 0.5062418580055237, "learning_rate": 4.885348141000122e-05, "loss": 2.0783, "step": 812 }, { "epoch": 0.18729482232333122, "grad_norm": 0.5212958455085754, "learning_rate": 4.8847454257309016e-05, "loss": 2.1124, "step": 813 }, { "epoch": 0.18752519725853828, "grad_norm": 0.7950560450553894, "learning_rate": 4.884141167776639e-05, "loss": 2.1401, "step": 814 }, { "epoch": 0.1877555721937453, "grad_norm": 0.4997531473636627, "learning_rate": 4.883535367528228e-05, "loss": 2.112, "step": 815 }, { "epoch": 0.18798594712895236, "grad_norm": 0.5138757228851318, "learning_rate": 4.882928025377558e-05, "loss": 2.0626, "step": 816 }, { "epoch": 0.18821632206415942, "grad_norm": 0.5386158227920532, "learning_rate": 4.882319141717517e-05, "loss": 2.1007, "step": 817 }, { "epoch": 0.18844669699936648, "grad_norm": 0.6067231893539429, "learning_rate": 4.881708716941992e-05, "loss": 2.1122, "step": 818 }, { "epoch": 0.18867707193457353, "grad_norm": 0.7217810153961182, "learning_rate": 4.881096751445863e-05, "loss": 2.1033, "step": 819 }, { "epoch": 0.18890744686978056, "grad_norm": 0.7960318922996521, "learning_rate": 4.880483245625009e-05, "loss": 2.1123, "step": 820 }, { "epoch": 0.18913782180498762, "grad_norm": 0.7623084783554077, "learning_rate": 4.879868199876305e-05, "loss": 2.1139, "step": 821 }, { "epoch": 0.18936819674019467, "grad_norm": 0.6472498178482056, "learning_rate": 4.879251614597624e-05, "loss": 2.1313, "step": 822 }, { "epoch": 0.18959857167540173, "grad_norm": 0.5661115646362305, "learning_rate": 4.8786334901878314e-05, "loss": 2.1198, "step": 823 }, { "epoch": 0.18982894661060876, "grad_norm": 0.6380001902580261, "learning_rate": 4.8780138270467915e-05, "loss": 2.1152, "step": 824 }, { "epoch": 0.1900593215458158, "grad_norm": 0.5145418643951416, "learning_rate": 4.8773926255753625e-05, "loss": 2.0781, "step": 825 }, { "epoch": 0.19028969648102287, "grad_norm": 0.5228146910667419, "learning_rate": 4.876769886175396e-05, "loss": 2.0868, "step": 826 }, { "epoch": 0.19052007141622992, "grad_norm": 0.5065976977348328, "learning_rate": 4.876145609249742e-05, "loss": 2.0738, "step": 827 }, { "epoch": 0.19075044635143695, "grad_norm": 0.5353624820709229, "learning_rate": 4.875519795202243e-05, "loss": 2.0297, "step": 828 }, { "epoch": 0.190980821286644, "grad_norm": 0.5345510840415955, "learning_rate": 4.874892444437737e-05, "loss": 2.0738, "step": 829 }, { "epoch": 0.19121119622185107, "grad_norm": 0.5596432685852051, "learning_rate": 4.874263557362056e-05, "loss": 2.0638, "step": 830 }, { "epoch": 0.19144157115705812, "grad_norm": 0.5331322550773621, "learning_rate": 4.873633134382022e-05, "loss": 2.066, "step": 831 }, { "epoch": 0.19167194609226515, "grad_norm": 0.6511186361312866, "learning_rate": 4.8730011759054575e-05, "loss": 2.1202, "step": 832 }, { "epoch": 0.1919023210274722, "grad_norm": 0.6131678223609924, "learning_rate": 4.872367682341173e-05, "loss": 2.1086, "step": 833 }, { "epoch": 0.19213269596267926, "grad_norm": 0.619877815246582, "learning_rate": 4.871732654098974e-05, "loss": 2.0882, "step": 834 }, { "epoch": 0.19236307089788632, "grad_norm": 0.6042771935462952, "learning_rate": 4.8710960915896576e-05, "loss": 2.0786, "step": 835 }, { "epoch": 0.19259344583309335, "grad_norm": 0.5842416286468506, "learning_rate": 4.870457995225015e-05, "loss": 2.1345, "step": 836 }, { "epoch": 0.1928238207683004, "grad_norm": 0.5702958106994629, "learning_rate": 4.86981836541783e-05, "loss": 2.0709, "step": 837 }, { "epoch": 0.19305419570350746, "grad_norm": 0.5143870711326599, "learning_rate": 4.869177202581875e-05, "loss": 2.0967, "step": 838 }, { "epoch": 0.19328457063871451, "grad_norm": 0.5121338963508606, "learning_rate": 4.868534507131919e-05, "loss": 2.1057, "step": 839 }, { "epoch": 0.19351494557392157, "grad_norm": 0.6078178882598877, "learning_rate": 4.867890279483717e-05, "loss": 2.0748, "step": 840 }, { "epoch": 0.1937453205091286, "grad_norm": 0.5417404770851135, "learning_rate": 4.867244520054021e-05, "loss": 2.1172, "step": 841 }, { "epoch": 0.19397569544433566, "grad_norm": 0.4854262173175812, "learning_rate": 4.866597229260568e-05, "loss": 2.0702, "step": 842 }, { "epoch": 0.1942060703795427, "grad_norm": 0.4625869393348694, "learning_rate": 4.865948407522091e-05, "loss": 2.0519, "step": 843 }, { "epoch": 0.19443644531474977, "grad_norm": 0.5693988800048828, "learning_rate": 4.86529805525831e-05, "loss": 2.0604, "step": 844 }, { "epoch": 0.1946668202499568, "grad_norm": 0.5548104643821716, "learning_rate": 4.864646172889936e-05, "loss": 2.0359, "step": 845 }, { "epoch": 0.19489719518516385, "grad_norm": 0.5638557076454163, "learning_rate": 4.8639927608386695e-05, "loss": 2.0972, "step": 846 }, { "epoch": 0.1951275701203709, "grad_norm": 0.6276601552963257, "learning_rate": 4.8633378195272015e-05, "loss": 2.1003, "step": 847 }, { "epoch": 0.19535794505557796, "grad_norm": 0.6544185876846313, "learning_rate": 4.862681349379212e-05, "loss": 2.1096, "step": 848 }, { "epoch": 0.195588319990785, "grad_norm": 0.6650116443634033, "learning_rate": 4.862023350819368e-05, "loss": 2.0877, "step": 849 }, { "epoch": 0.19581869492599205, "grad_norm": 0.6193546056747437, "learning_rate": 4.861363824273329e-05, "loss": 2.1327, "step": 850 }, { "epoch": 0.1960490698611991, "grad_norm": 0.5609481334686279, "learning_rate": 4.86070277016774e-05, "loss": 2.0672, "step": 851 }, { "epoch": 0.19627944479640616, "grad_norm": 0.5644036531448364, "learning_rate": 4.860040188930235e-05, "loss": 2.0972, "step": 852 }, { "epoch": 0.1965098197316132, "grad_norm": 0.560346245765686, "learning_rate": 4.8593760809894353e-05, "loss": 2.0654, "step": 853 }, { "epoch": 0.19674019466682025, "grad_norm": 0.5397983193397522, "learning_rate": 4.858710446774951e-05, "loss": 2.0562, "step": 854 }, { "epoch": 0.1969705696020273, "grad_norm": 0.5113317966461182, "learning_rate": 4.8580432867173786e-05, "loss": 2.0852, "step": 855 }, { "epoch": 0.19720094453723436, "grad_norm": 0.5536808967590332, "learning_rate": 4.8573746012483034e-05, "loss": 2.0851, "step": 856 }, { "epoch": 0.19743131947244139, "grad_norm": 0.5026911497116089, "learning_rate": 4.856704390800294e-05, "loss": 2.1035, "step": 857 }, { "epoch": 0.19766169440764844, "grad_norm": 0.5320426225662231, "learning_rate": 4.8560326558069095e-05, "loss": 2.0455, "step": 858 }, { "epoch": 0.1978920693428555, "grad_norm": 0.5496866106987, "learning_rate": 4.8553593967026924e-05, "loss": 2.0417, "step": 859 }, { "epoch": 0.19812244427806255, "grad_norm": 0.5826241970062256, "learning_rate": 4.8546846139231725e-05, "loss": 2.0546, "step": 860 }, { "epoch": 0.1983528192132696, "grad_norm": 0.5962859988212585, "learning_rate": 4.8540083079048645e-05, "loss": 2.0298, "step": 861 }, { "epoch": 0.19858319414847664, "grad_norm": 0.5451933741569519, "learning_rate": 4.85333047908527e-05, "loss": 2.0562, "step": 862 }, { "epoch": 0.1988135690836837, "grad_norm": 0.5343855023384094, "learning_rate": 4.852651127902872e-05, "loss": 2.112, "step": 863 }, { "epoch": 0.19904394401889075, "grad_norm": 0.5266100168228149, "learning_rate": 4.851970254797143e-05, "loss": 2.0229, "step": 864 }, { "epoch": 0.1992743189540978, "grad_norm": 0.4917639195919037, "learning_rate": 4.851287860208538e-05, "loss": 2.0371, "step": 865 }, { "epoch": 0.19950469388930483, "grad_norm": 0.5113179087638855, "learning_rate": 4.850603944578494e-05, "loss": 2.0426, "step": 866 }, { "epoch": 0.1997350688245119, "grad_norm": 0.5337914824485779, "learning_rate": 4.849918508349437e-05, "loss": 2.0666, "step": 867 }, { "epoch": 0.19996544375971895, "grad_norm": 0.4738094210624695, "learning_rate": 4.849231551964771e-05, "loss": 2.0442, "step": 868 }, { "epoch": 0.200195818694926, "grad_norm": 0.5399355292320251, "learning_rate": 4.8485430758688885e-05, "loss": 2.0973, "step": 869 }, { "epoch": 0.20042619363013303, "grad_norm": 0.5631855130195618, "learning_rate": 4.847853080507161e-05, "loss": 2.0381, "step": 870 }, { "epoch": 0.2006565685653401, "grad_norm": 0.576168954372406, "learning_rate": 4.847161566325945e-05, "loss": 2.0681, "step": 871 }, { "epoch": 0.20088694350054714, "grad_norm": 0.6625809669494629, "learning_rate": 4.846468533772579e-05, "loss": 2.0609, "step": 872 }, { "epoch": 0.2011173184357542, "grad_norm": 0.5283498167991638, "learning_rate": 4.845773983295385e-05, "loss": 2.1173, "step": 873 }, { "epoch": 0.20134769337096123, "grad_norm": 0.5141335725784302, "learning_rate": 4.845077915343664e-05, "loss": 2.0194, "step": 874 }, { "epoch": 0.20157806830616828, "grad_norm": 0.5351462364196777, "learning_rate": 4.844380330367701e-05, "loss": 2.0593, "step": 875 }, { "epoch": 0.20180844324137534, "grad_norm": 0.6282929182052612, "learning_rate": 4.843681228818763e-05, "loss": 1.9985, "step": 876 }, { "epoch": 0.2020388181765824, "grad_norm": 0.5234463810920715, "learning_rate": 4.842980611149095e-05, "loss": 2.0625, "step": 877 }, { "epoch": 0.20226919311178942, "grad_norm": 0.5697107911109924, "learning_rate": 4.8422784778119244e-05, "loss": 2.0524, "step": 878 }, { "epoch": 0.20249956804699648, "grad_norm": 0.5521575808525085, "learning_rate": 4.84157482926146e-05, "loss": 2.0694, "step": 879 }, { "epoch": 0.20272994298220354, "grad_norm": 0.6034829616546631, "learning_rate": 4.840869665952892e-05, "loss": 2.0637, "step": 880 }, { "epoch": 0.2029603179174106, "grad_norm": 0.6228094696998596, "learning_rate": 4.840162988342385e-05, "loss": 2.1203, "step": 881 }, { "epoch": 0.20319069285261762, "grad_norm": 0.5739889144897461, "learning_rate": 4.83945479688709e-05, "loss": 2.0898, "step": 882 }, { "epoch": 0.20342106778782468, "grad_norm": 0.5077548027038574, "learning_rate": 4.8387450920451304e-05, "loss": 2.078, "step": 883 }, { "epoch": 0.20365144272303173, "grad_norm": 0.5355992913246155, "learning_rate": 4.8380338742756157e-05, "loss": 2.1247, "step": 884 }, { "epoch": 0.2038818176582388, "grad_norm": 0.5421868562698364, "learning_rate": 4.837321144038629e-05, "loss": 2.0744, "step": 885 }, { "epoch": 0.20411219259344585, "grad_norm": 0.5539047122001648, "learning_rate": 4.8366069017952334e-05, "loss": 2.0912, "step": 886 }, { "epoch": 0.20434256752865287, "grad_norm": 0.5278409123420715, "learning_rate": 4.835891148007471e-05, "loss": 2.0872, "step": 887 }, { "epoch": 0.20457294246385993, "grad_norm": 0.5649670362472534, "learning_rate": 4.835173883138361e-05, "loss": 2.1018, "step": 888 }, { "epoch": 0.204803317399067, "grad_norm": 0.6509919762611389, "learning_rate": 4.834455107651898e-05, "loss": 2.0631, "step": 889 }, { "epoch": 0.20503369233427404, "grad_norm": 0.7405110001564026, "learning_rate": 4.833734822013058e-05, "loss": 2.0003, "step": 890 }, { "epoch": 0.20526406726948107, "grad_norm": 0.6573016047477722, "learning_rate": 4.833013026687791e-05, "loss": 2.054, "step": 891 }, { "epoch": 0.20549444220468813, "grad_norm": 0.6960479617118835, "learning_rate": 4.832289722143024e-05, "loss": 2.1094, "step": 892 }, { "epoch": 0.20572481713989518, "grad_norm": 0.5651811957359314, "learning_rate": 4.831564908846661e-05, "loss": 2.0439, "step": 893 }, { "epoch": 0.20595519207510224, "grad_norm": 0.533174455165863, "learning_rate": 4.830838587267582e-05, "loss": 2.06, "step": 894 }, { "epoch": 0.20618556701030927, "grad_norm": 0.5143818855285645, "learning_rate": 4.830110757875642e-05, "loss": 2.0373, "step": 895 }, { "epoch": 0.20641594194551632, "grad_norm": 0.5714979767799377, "learning_rate": 4.829381421141671e-05, "loss": 2.0758, "step": 896 }, { "epoch": 0.20664631688072338, "grad_norm": 0.5100910067558289, "learning_rate": 4.8286505775374766e-05, "loss": 2.0713, "step": 897 }, { "epoch": 0.20687669181593044, "grad_norm": 0.4907883405685425, "learning_rate": 4.8279182275358384e-05, "loss": 2.0687, "step": 898 }, { "epoch": 0.20710706675113746, "grad_norm": 0.5295329689979553, "learning_rate": 4.827184371610511e-05, "loss": 2.0115, "step": 899 }, { "epoch": 0.20733744168634452, "grad_norm": 0.5228695273399353, "learning_rate": 4.826449010236225e-05, "loss": 2.0198, "step": 900 }, { "epoch": 0.20756781662155158, "grad_norm": 0.5062867999076843, "learning_rate": 4.825712143888682e-05, "loss": 2.1208, "step": 901 }, { "epoch": 0.20779819155675863, "grad_norm": 0.6005653142929077, "learning_rate": 4.82497377304456e-05, "loss": 2.0875, "step": 902 }, { "epoch": 0.20802856649196566, "grad_norm": 0.4995572865009308, "learning_rate": 4.824233898181509e-05, "loss": 2.0447, "step": 903 }, { "epoch": 0.20825894142717272, "grad_norm": 0.5190582275390625, "learning_rate": 4.823492519778151e-05, "loss": 2.0637, "step": 904 }, { "epoch": 0.20848931636237977, "grad_norm": 0.49516943097114563, "learning_rate": 4.822749638314083e-05, "loss": 2.0805, "step": 905 }, { "epoch": 0.20871969129758683, "grad_norm": 0.5479497909545898, "learning_rate": 4.822005254269871e-05, "loss": 2.0357, "step": 906 }, { "epoch": 0.20895006623279389, "grad_norm": 0.5074241161346436, "learning_rate": 4.821259368127057e-05, "loss": 2.043, "step": 907 }, { "epoch": 0.2091804411680009, "grad_norm": 0.5113227367401123, "learning_rate": 4.820511980368152e-05, "loss": 2.0259, "step": 908 }, { "epoch": 0.20941081610320797, "grad_norm": 0.5276791453361511, "learning_rate": 4.819763091476638e-05, "loss": 2.0618, "step": 909 }, { "epoch": 0.20964119103841503, "grad_norm": 0.49579286575317383, "learning_rate": 4.81901270193697e-05, "loss": 2.0409, "step": 910 }, { "epoch": 0.20987156597362208, "grad_norm": 0.5343020558357239, "learning_rate": 4.818260812234572e-05, "loss": 2.0152, "step": 911 }, { "epoch": 0.2101019409088291, "grad_norm": 0.5289919972419739, "learning_rate": 4.8175074228558405e-05, "loss": 2.0145, "step": 912 }, { "epoch": 0.21033231584403617, "grad_norm": 0.5177943110466003, "learning_rate": 4.81675253428814e-05, "loss": 2.0718, "step": 913 }, { "epoch": 0.21056269077924322, "grad_norm": 0.5182514190673828, "learning_rate": 4.8159961470198065e-05, "loss": 2.0895, "step": 914 }, { "epoch": 0.21079306571445028, "grad_norm": 0.500887393951416, "learning_rate": 4.815238261540145e-05, "loss": 2.0088, "step": 915 }, { "epoch": 0.2110234406496573, "grad_norm": 0.5029407739639282, "learning_rate": 4.814478878339428e-05, "loss": 2.041, "step": 916 }, { "epoch": 0.21125381558486436, "grad_norm": 0.4836624264717102, "learning_rate": 4.8137179979088995e-05, "loss": 2.0685, "step": 917 }, { "epoch": 0.21148419052007142, "grad_norm": 0.49370288848876953, "learning_rate": 4.81295562074077e-05, "loss": 2.0656, "step": 918 }, { "epoch": 0.21171456545527848, "grad_norm": 0.5092511773109436, "learning_rate": 4.8121917473282204e-05, "loss": 2.0752, "step": 919 }, { "epoch": 0.2119449403904855, "grad_norm": 0.5204573273658752, "learning_rate": 4.811426378165398e-05, "loss": 2.0171, "step": 920 }, { "epoch": 0.21217531532569256, "grad_norm": 0.47680389881134033, "learning_rate": 4.810659513747416e-05, "loss": 2.0691, "step": 921 }, { "epoch": 0.21240569026089962, "grad_norm": 0.49312111735343933, "learning_rate": 4.8098911545703603e-05, "loss": 2.0473, "step": 922 }, { "epoch": 0.21263606519610667, "grad_norm": 0.5237171053886414, "learning_rate": 4.8091213011312776e-05, "loss": 2.0355, "step": 923 }, { "epoch": 0.2128664401313137, "grad_norm": 0.5351514220237732, "learning_rate": 4.808349953928184e-05, "loss": 2.0622, "step": 924 }, { "epoch": 0.21309681506652076, "grad_norm": 0.5261739492416382, "learning_rate": 4.807577113460063e-05, "loss": 2.0777, "step": 925 }, { "epoch": 0.2133271900017278, "grad_norm": 0.5481389164924622, "learning_rate": 4.806802780226862e-05, "loss": 2.0392, "step": 926 }, { "epoch": 0.21355756493693487, "grad_norm": 0.46465179324150085, "learning_rate": 4.8060269547294957e-05, "loss": 2.0853, "step": 927 }, { "epoch": 0.21378793987214192, "grad_norm": 0.483564168214798, "learning_rate": 4.8052496374698424e-05, "loss": 1.9709, "step": 928 }, { "epoch": 0.21401831480734895, "grad_norm": 0.4891057014465332, "learning_rate": 4.804470828950748e-05, "loss": 2.0403, "step": 929 }, { "epoch": 0.214248689742556, "grad_norm": 0.47752058506011963, "learning_rate": 4.803690529676019e-05, "loss": 2.0635, "step": 930 }, { "epoch": 0.21447906467776306, "grad_norm": 0.4904891550540924, "learning_rate": 4.802908740150431e-05, "loss": 2.0362, "step": 931 }, { "epoch": 0.21470943961297012, "grad_norm": 0.5039438605308533, "learning_rate": 4.8021254608797214e-05, "loss": 2.0504, "step": 932 }, { "epoch": 0.21493981454817715, "grad_norm": 0.5997321605682373, "learning_rate": 4.801340692370591e-05, "loss": 2.0369, "step": 933 }, { "epoch": 0.2151701894833842, "grad_norm": 0.5206987857818604, "learning_rate": 4.800554435130703e-05, "loss": 2.0494, "step": 934 }, { "epoch": 0.21540056441859126, "grad_norm": 0.48666244745254517, "learning_rate": 4.799766689668687e-05, "loss": 2.0442, "step": 935 }, { "epoch": 0.21563093935379832, "grad_norm": 0.5062952041625977, "learning_rate": 4.798977456494131e-05, "loss": 2.0776, "step": 936 }, { "epoch": 0.21586131428900535, "grad_norm": 0.5487467050552368, "learning_rate": 4.798186736117589e-05, "loss": 2.033, "step": 937 }, { "epoch": 0.2160916892242124, "grad_norm": 0.47973212599754333, "learning_rate": 4.7973945290505766e-05, "loss": 2.0227, "step": 938 }, { "epoch": 0.21632206415941946, "grad_norm": 0.8392747640609741, "learning_rate": 4.796600835805569e-05, "loss": 2.0427, "step": 939 }, { "epoch": 0.21655243909462651, "grad_norm": 0.6689661741256714, "learning_rate": 4.795805656896005e-05, "loss": 2.0664, "step": 940 }, { "epoch": 0.21678281402983354, "grad_norm": 0.7187620997428894, "learning_rate": 4.795008992836283e-05, "loss": 2.026, "step": 941 }, { "epoch": 0.2170131889650406, "grad_norm": 0.7304360866546631, "learning_rate": 4.794210844141763e-05, "loss": 2.023, "step": 942 }, { "epoch": 0.21724356390024765, "grad_norm": 1.2929189205169678, "learning_rate": 4.7934112113287647e-05, "loss": 2.1077, "step": 943 }, { "epoch": 0.2174739388354547, "grad_norm": 0.5875136256217957, "learning_rate": 4.7926100949145685e-05, "loss": 2.0453, "step": 944 }, { "epoch": 0.21770431377066174, "grad_norm": 0.7315645813941956, "learning_rate": 4.791807495417414e-05, "loss": 2.0729, "step": 945 }, { "epoch": 0.2179346887058688, "grad_norm": 1.6160253286361694, "learning_rate": 4.791003413356502e-05, "loss": 2.0722, "step": 946 }, { "epoch": 0.21816506364107585, "grad_norm": 8.241984367370605, "learning_rate": 4.7901978492519894e-05, "loss": 2.1946, "step": 947 }, { "epoch": 0.2183954385762829, "grad_norm": 0.9933608174324036, "learning_rate": 4.7893908036249935e-05, "loss": 2.1311, "step": 948 }, { "epoch": 0.21862581351148996, "grad_norm": 8.58627700805664, "learning_rate": 4.788582276997591e-05, "loss": 2.2089, "step": 949 }, { "epoch": 0.218856188446697, "grad_norm": 7.712833881378174, "learning_rate": 4.787772269892813e-05, "loss": 2.5341, "step": 950 }, { "epoch": 0.21908656338190405, "grad_norm": 1.9506646394729614, "learning_rate": 4.786960782834653e-05, "loss": 2.3956, "step": 951 }, { "epoch": 0.2193169383171111, "grad_norm": 11.799286842346191, "learning_rate": 4.78614781634806e-05, "loss": 2.4234, "step": 952 }, { "epoch": 0.21954731325231816, "grad_norm": 13.39142894744873, "learning_rate": 4.785333370958938e-05, "loss": 2.735, "step": 953 }, { "epoch": 0.2197776881875252, "grad_norm": 12.80130386352539, "learning_rate": 4.78451744719415e-05, "loss": 3.1358, "step": 954 }, { "epoch": 0.22000806312273224, "grad_norm": 7.136785507202148, "learning_rate": 4.783700045581515e-05, "loss": 2.6964, "step": 955 }, { "epoch": 0.2202384380579393, "grad_norm": 7.418359279632568, "learning_rate": 4.782881166649808e-05, "loss": 2.9413, "step": 956 }, { "epoch": 0.22046881299314636, "grad_norm": 5.814896106719971, "learning_rate": 4.782060810928759e-05, "loss": 2.7091, "step": 957 }, { "epoch": 0.22069918792835339, "grad_norm": 4.879452705383301, "learning_rate": 4.781238978949054e-05, "loss": 2.5247, "step": 958 }, { "epoch": 0.22092956286356044, "grad_norm": 9.947772979736328, "learning_rate": 4.780415671242334e-05, "loss": 2.5546, "step": 959 }, { "epoch": 0.2211599377987675, "grad_norm": 4.335864543914795, "learning_rate": 4.779590888341193e-05, "loss": 2.4045, "step": 960 }, { "epoch": 0.22139031273397455, "grad_norm": 3.079728603363037, "learning_rate": 4.778764630779183e-05, "loss": 2.3689, "step": 961 }, { "epoch": 0.22162068766918158, "grad_norm": 84.57737731933594, "learning_rate": 4.7779368990908064e-05, "loss": 5.3862, "step": 962 }, { "epoch": 0.22185106260438864, "grad_norm": 13.086831092834473, "learning_rate": 4.77710769381152e-05, "loss": 5.4628, "step": 963 }, { "epoch": 0.2220814375395957, "grad_norm": 10.120466232299805, "learning_rate": 4.7762770154777355e-05, "loss": 4.2093, "step": 964 }, { "epoch": 0.22231181247480275, "grad_norm": 6.079960823059082, "learning_rate": 4.775444864626816e-05, "loss": 3.9091, "step": 965 }, { "epoch": 0.22254218741000978, "grad_norm": 7.753759860992432, "learning_rate": 4.7746112417970766e-05, "loss": 3.721, "step": 966 }, { "epoch": 0.22277256234521683, "grad_norm": 11.135107040405273, "learning_rate": 4.7737761475277866e-05, "loss": 3.6669, "step": 967 }, { "epoch": 0.2230029372804239, "grad_norm": 6.873994827270508, "learning_rate": 4.772939582359166e-05, "loss": 3.494, "step": 968 }, { "epoch": 0.22323331221563095, "grad_norm": 4.145374774932861, "learning_rate": 4.772101546832386e-05, "loss": 2.7548, "step": 969 }, { "epoch": 0.22346368715083798, "grad_norm": 4.056282043457031, "learning_rate": 4.7712620414895693e-05, "loss": 2.6502, "step": 970 }, { "epoch": 0.22369406208604503, "grad_norm": 6.68391227722168, "learning_rate": 4.77042106687379e-05, "loss": 2.7443, "step": 971 }, { "epoch": 0.2239244370212521, "grad_norm": 2.9102392196655273, "learning_rate": 4.769578623529073e-05, "loss": 2.539, "step": 972 }, { "epoch": 0.22415481195645914, "grad_norm": 2.0660698413848877, "learning_rate": 4.76873471200039e-05, "loss": 2.4053, "step": 973 }, { "epoch": 0.2243851868916662, "grad_norm": 1.6773111820220947, "learning_rate": 4.767889332833667e-05, "loss": 2.3998, "step": 974 }, { "epoch": 0.22461556182687323, "grad_norm": 1.476398229598999, "learning_rate": 4.767042486575777e-05, "loss": 2.2869, "step": 975 }, { "epoch": 0.22484593676208028, "grad_norm": 1.44740891456604, "learning_rate": 4.766194173774543e-05, "loss": 2.227, "step": 976 }, { "epoch": 0.22507631169728734, "grad_norm": 1.517426609992981, "learning_rate": 4.7653443949787345e-05, "loss": 2.1922, "step": 977 }, { "epoch": 0.2253066866324944, "grad_norm": 2.1111059188842773, "learning_rate": 4.7644931507380725e-05, "loss": 2.2598, "step": 978 }, { "epoch": 0.22553706156770142, "grad_norm": 0.8521562218666077, "learning_rate": 4.7636404416032234e-05, "loss": 2.2054, "step": 979 }, { "epoch": 0.22576743650290848, "grad_norm": 1.4148415327072144, "learning_rate": 4.7627862681258037e-05, "loss": 2.2075, "step": 980 }, { "epoch": 0.22599781143811554, "grad_norm": 1.2091388702392578, "learning_rate": 4.761930630858374e-05, "loss": 2.1845, "step": 981 }, { "epoch": 0.2262281863733226, "grad_norm": 2.2387495040893555, "learning_rate": 4.761073530354445e-05, "loss": 2.1932, "step": 982 }, { "epoch": 0.22645856130852962, "grad_norm": 0.8243889212608337, "learning_rate": 4.760214967168472e-05, "loss": 2.1451, "step": 983 }, { "epoch": 0.22668893624373668, "grad_norm": 1.1334463357925415, "learning_rate": 4.759354941855857e-05, "loss": 2.1866, "step": 984 }, { "epoch": 0.22691931117894373, "grad_norm": 0.7079467177391052, "learning_rate": 4.7584934549729484e-05, "loss": 2.1209, "step": 985 }, { "epoch": 0.2271496861141508, "grad_norm": 0.704230785369873, "learning_rate": 4.7576305070770396e-05, "loss": 2.092, "step": 986 }, { "epoch": 0.22738006104935782, "grad_norm": 0.6850078701972961, "learning_rate": 4.756766098726368e-05, "loss": 2.1518, "step": 987 }, { "epoch": 0.22761043598456487, "grad_norm": 0.7150455713272095, "learning_rate": 4.755900230480119e-05, "loss": 2.0746, "step": 988 }, { "epoch": 0.22784081091977193, "grad_norm": 0.5962232947349548, "learning_rate": 4.7550329028984184e-05, "loss": 2.1416, "step": 989 }, { "epoch": 0.22807118585497899, "grad_norm": 0.6270462274551392, "learning_rate": 4.75416411654234e-05, "loss": 2.0475, "step": 990 }, { "epoch": 0.22830156079018601, "grad_norm": 0.6985633969306946, "learning_rate": 4.753293871973896e-05, "loss": 2.1416, "step": 991 }, { "epoch": 0.22853193572539307, "grad_norm": 0.7326110005378723, "learning_rate": 4.752422169756048e-05, "loss": 2.0612, "step": 992 }, { "epoch": 0.22876231066060013, "grad_norm": 0.6179402470588684, "learning_rate": 4.7515490104526964e-05, "loss": 2.0654, "step": 993 }, { "epoch": 0.22899268559580718, "grad_norm": 0.7471519112586975, "learning_rate": 4.750674394628687e-05, "loss": 2.021, "step": 994 }, { "epoch": 0.22922306053101424, "grad_norm": 0.6335756182670593, "learning_rate": 4.749798322849803e-05, "loss": 2.1516, "step": 995 }, { "epoch": 0.22945343546622127, "grad_norm": 0.6244813203811646, "learning_rate": 4.748920795682776e-05, "loss": 2.1022, "step": 996 }, { "epoch": 0.22968381040142832, "grad_norm": 0.5891891717910767, "learning_rate": 4.7480418136952755e-05, "loss": 2.0653, "step": 997 }, { "epoch": 0.22991418533663538, "grad_norm": 0.5867310762405396, "learning_rate": 4.7471613774559104e-05, "loss": 2.1058, "step": 998 }, { "epoch": 0.23014456027184244, "grad_norm": 0.5713858604431152, "learning_rate": 4.746279487534234e-05, "loss": 2.0689, "step": 999 }, { "epoch": 0.23037493520704946, "grad_norm": 0.5579650402069092, "learning_rate": 4.745396144500738e-05, "loss": 2.0643, "step": 1000 }, { "epoch": 0.23060531014225652, "grad_norm": 0.5547041893005371, "learning_rate": 4.7445113489268544e-05, "loss": 2.0634, "step": 1001 }, { "epoch": 0.23083568507746358, "grad_norm": 0.5505419373512268, "learning_rate": 4.7436251013849556e-05, "loss": 2.0568, "step": 1002 }, { "epoch": 0.23106606001267063, "grad_norm": 0.4974428713321686, "learning_rate": 4.742737402448351e-05, "loss": 2.0249, "step": 1003 }, { "epoch": 0.23129643494787766, "grad_norm": 0.5056861042976379, "learning_rate": 4.741848252691292e-05, "loss": 2.091, "step": 1004 }, { "epoch": 0.23152680988308472, "grad_norm": 0.5382207036018372, "learning_rate": 4.740957652688968e-05, "loss": 2.0606, "step": 1005 }, { "epoch": 0.23175718481829177, "grad_norm": 0.545324444770813, "learning_rate": 4.740065603017503e-05, "loss": 2.0455, "step": 1006 }, { "epoch": 0.23198755975349883, "grad_norm": 0.5591619610786438, "learning_rate": 4.739172104253962e-05, "loss": 2.0233, "step": 1007 }, { "epoch": 0.23221793468870586, "grad_norm": 0.46150362491607666, "learning_rate": 4.7382771569763485e-05, "loss": 2.0342, "step": 1008 }, { "epoch": 0.2324483096239129, "grad_norm": 0.6455093026161194, "learning_rate": 4.7373807617636004e-05, "loss": 2.056, "step": 1009 }, { "epoch": 0.23267868455911997, "grad_norm": 0.4814119338989258, "learning_rate": 4.736482919195593e-05, "loss": 2.0655, "step": 1010 }, { "epoch": 0.23290905949432703, "grad_norm": 0.5634877681732178, "learning_rate": 4.735583629853138e-05, "loss": 2.0934, "step": 1011 }, { "epoch": 0.23313943442953405, "grad_norm": 0.4708523750305176, "learning_rate": 4.7346828943179835e-05, "loss": 2.0705, "step": 1012 }, { "epoch": 0.2333698093647411, "grad_norm": 0.5644976496696472, "learning_rate": 4.7337807131728116e-05, "loss": 2.0142, "step": 1013 }, { "epoch": 0.23360018429994817, "grad_norm": 0.5271159410476685, "learning_rate": 4.732877087001243e-05, "loss": 1.9968, "step": 1014 }, { "epoch": 0.23383055923515522, "grad_norm": 0.5308985114097595, "learning_rate": 4.731972016387829e-05, "loss": 2.0506, "step": 1015 }, { "epoch": 0.23406093417036228, "grad_norm": 0.5247502326965332, "learning_rate": 4.7310655019180584e-05, "loss": 2.0583, "step": 1016 }, { "epoch": 0.2342913091055693, "grad_norm": 0.4533621370792389, "learning_rate": 4.7301575441783525e-05, "loss": 2.0807, "step": 1017 }, { "epoch": 0.23452168404077636, "grad_norm": 0.45207834243774414, "learning_rate": 4.7292481437560656e-05, "loss": 2.0368, "step": 1018 }, { "epoch": 0.23475205897598342, "grad_norm": 0.506277322769165, "learning_rate": 4.728337301239487e-05, "loss": 2.0541, "step": 1019 }, { "epoch": 0.23498243391119047, "grad_norm": 0.5450207591056824, "learning_rate": 4.727425017217839e-05, "loss": 2.0524, "step": 1020 }, { "epoch": 0.2352128088463975, "grad_norm": 0.9947420954704285, "learning_rate": 4.7265112922812745e-05, "loss": 2.0177, "step": 1021 }, { "epoch": 0.23544318378160456, "grad_norm": 0.4634150266647339, "learning_rate": 4.725596127020879e-05, "loss": 2.0954, "step": 1022 }, { "epoch": 0.23567355871681162, "grad_norm": 0.4866515100002289, "learning_rate": 4.724679522028672e-05, "loss": 2.0134, "step": 1023 }, { "epoch": 0.23590393365201867, "grad_norm": 0.7651410698890686, "learning_rate": 4.723761477897601e-05, "loss": 2.0339, "step": 1024 }, { "epoch": 0.2361343085872257, "grad_norm": 0.44987305998802185, "learning_rate": 4.7228419952215475e-05, "loss": 1.9922, "step": 1025 }, { "epoch": 0.23636468352243276, "grad_norm": 0.536471426486969, "learning_rate": 4.721921074595321e-05, "loss": 2.0278, "step": 1026 }, { "epoch": 0.2365950584576398, "grad_norm": 0.4902123212814331, "learning_rate": 4.720998716614663e-05, "loss": 2.0402, "step": 1027 }, { "epoch": 0.23682543339284687, "grad_norm": 0.4873715341091156, "learning_rate": 4.720074921876245e-05, "loss": 2.0202, "step": 1028 }, { "epoch": 0.2370558083280539, "grad_norm": 0.5346301794052124, "learning_rate": 4.719149690977666e-05, "loss": 2.0375, "step": 1029 }, { "epoch": 0.23728618326326095, "grad_norm": 0.49098095297813416, "learning_rate": 4.7182230245174567e-05, "loss": 2.0505, "step": 1030 }, { "epoch": 0.237516558198468, "grad_norm": 0.48706039786338806, "learning_rate": 4.717294923095073e-05, "loss": 2.0116, "step": 1031 }, { "epoch": 0.23774693313367506, "grad_norm": 0.6115936040878296, "learning_rate": 4.716365387310902e-05, "loss": 1.9638, "step": 1032 }, { "epoch": 0.2379773080688821, "grad_norm": 0.45162615180015564, "learning_rate": 4.7154344177662576e-05, "loss": 2.051, "step": 1033 }, { "epoch": 0.23820768300408915, "grad_norm": 0.48450273275375366, "learning_rate": 4.714502015063383e-05, "loss": 1.9951, "step": 1034 }, { "epoch": 0.2384380579392962, "grad_norm": 0.4850042462348938, "learning_rate": 4.7135681798054444e-05, "loss": 2.0191, "step": 1035 }, { "epoch": 0.23866843287450326, "grad_norm": 0.4766961932182312, "learning_rate": 4.712632912596538e-05, "loss": 1.9856, "step": 1036 }, { "epoch": 0.23889880780971032, "grad_norm": 0.5031050443649292, "learning_rate": 4.711696214041687e-05, "loss": 2.0517, "step": 1037 }, { "epoch": 0.23912918274491735, "grad_norm": 0.5670180916786194, "learning_rate": 4.710758084746837e-05, "loss": 2.0422, "step": 1038 }, { "epoch": 0.2393595576801244, "grad_norm": 0.49376407265663147, "learning_rate": 4.709818525318862e-05, "loss": 1.9816, "step": 1039 }, { "epoch": 0.23958993261533146, "grad_norm": 0.44807344675064087, "learning_rate": 4.7088775363655605e-05, "loss": 2.0387, "step": 1040 }, { "epoch": 0.23982030755053851, "grad_norm": 0.49379485845565796, "learning_rate": 4.707935118495656e-05, "loss": 2.0385, "step": 1041 }, { "epoch": 0.24005068248574554, "grad_norm": 0.4397232234477997, "learning_rate": 4.706991272318794e-05, "loss": 2.0022, "step": 1042 }, { "epoch": 0.2402810574209526, "grad_norm": 0.4754374325275421, "learning_rate": 4.706045998445548e-05, "loss": 1.9952, "step": 1043 }, { "epoch": 0.24051143235615965, "grad_norm": 0.47624948620796204, "learning_rate": 4.705099297487412e-05, "loss": 1.9878, "step": 1044 }, { "epoch": 0.2407418072913667, "grad_norm": 0.5898109078407288, "learning_rate": 4.704151170056804e-05, "loss": 1.9921, "step": 1045 }, { "epoch": 0.24097218222657374, "grad_norm": 0.48343077301979065, "learning_rate": 4.703201616767067e-05, "loss": 1.9645, "step": 1046 }, { "epoch": 0.2412025571617808, "grad_norm": 0.5166682004928589, "learning_rate": 4.7022506382324606e-05, "loss": 1.9908, "step": 1047 }, { "epoch": 0.24143293209698785, "grad_norm": 0.48362958431243896, "learning_rate": 4.701298235068173e-05, "loss": 1.9868, "step": 1048 }, { "epoch": 0.2416633070321949, "grad_norm": 0.4480111598968506, "learning_rate": 4.70034440789031e-05, "loss": 1.9922, "step": 1049 }, { "epoch": 0.24189368196740194, "grad_norm": 0.47226545214653015, "learning_rate": 4.6993891573159006e-05, "loss": 2.0396, "step": 1050 }, { "epoch": 0.242124056902609, "grad_norm": 0.4919982850551605, "learning_rate": 4.6984324839628926e-05, "loss": 2.0081, "step": 1051 }, { "epoch": 0.24235443183781605, "grad_norm": 0.5027483701705933, "learning_rate": 4.697474388450155e-05, "loss": 1.9677, "step": 1052 }, { "epoch": 0.2425848067730231, "grad_norm": 0.47407644987106323, "learning_rate": 4.696514871397479e-05, "loss": 1.992, "step": 1053 }, { "epoch": 0.24281518170823013, "grad_norm": 0.43526914715766907, "learning_rate": 4.6955539334255716e-05, "loss": 2.0272, "step": 1054 }, { "epoch": 0.2430455566434372, "grad_norm": 0.47365888953208923, "learning_rate": 4.694591575156061e-05, "loss": 2.0388, "step": 1055 }, { "epoch": 0.24327593157864424, "grad_norm": 0.4722292721271515, "learning_rate": 4.693627797211496e-05, "loss": 2.0091, "step": 1056 }, { "epoch": 0.2435063065138513, "grad_norm": 0.4864807426929474, "learning_rate": 4.692662600215339e-05, "loss": 1.9619, "step": 1057 }, { "epoch": 0.24373668144905833, "grad_norm": 0.42004841566085815, "learning_rate": 4.691695984791975e-05, "loss": 2.0005, "step": 1058 }, { "epoch": 0.24396705638426538, "grad_norm": 0.5233227014541626, "learning_rate": 4.690727951566704e-05, "loss": 2.0148, "step": 1059 }, { "epoch": 0.24419743131947244, "grad_norm": 0.4346723258495331, "learning_rate": 4.689758501165744e-05, "loss": 2.0135, "step": 1060 }, { "epoch": 0.2444278062546795, "grad_norm": 0.4188937246799469, "learning_rate": 4.688787634216231e-05, "loss": 2.037, "step": 1061 }, { "epoch": 0.24465818118988655, "grad_norm": 0.5445407629013062, "learning_rate": 4.687815351346214e-05, "loss": 2.0151, "step": 1062 }, { "epoch": 0.24488855612509358, "grad_norm": 0.517763614654541, "learning_rate": 4.686841653184662e-05, "loss": 2.0322, "step": 1063 }, { "epoch": 0.24511893106030064, "grad_norm": 0.48924747109413147, "learning_rate": 4.685866540361456e-05, "loss": 2.0327, "step": 1064 }, { "epoch": 0.2453493059955077, "grad_norm": 0.4441649317741394, "learning_rate": 4.6848900135073936e-05, "loss": 2.065, "step": 1065 }, { "epoch": 0.24557968093071475, "grad_norm": 0.47886329889297485, "learning_rate": 4.6839120732541894e-05, "loss": 2.0264, "step": 1066 }, { "epoch": 0.24581005586592178, "grad_norm": 0.4595479369163513, "learning_rate": 4.6829327202344675e-05, "loss": 1.9709, "step": 1067 }, { "epoch": 0.24604043080112883, "grad_norm": 0.42346882820129395, "learning_rate": 4.6819519550817706e-05, "loss": 2.0082, "step": 1068 }, { "epoch": 0.2462708057363359, "grad_norm": 0.4591730833053589, "learning_rate": 4.6809697784305514e-05, "loss": 2.0097, "step": 1069 }, { "epoch": 0.24650118067154295, "grad_norm": 0.48160478472709656, "learning_rate": 4.679986190916179e-05, "loss": 2.0095, "step": 1070 }, { "epoch": 0.24673155560674997, "grad_norm": 0.4517980217933655, "learning_rate": 4.6790011931749314e-05, "loss": 2.0457, "step": 1071 }, { "epoch": 0.24696193054195703, "grad_norm": 0.48921072483062744, "learning_rate": 4.678014785844002e-05, "loss": 1.9841, "step": 1072 }, { "epoch": 0.2471923054771641, "grad_norm": 0.45237547159194946, "learning_rate": 4.677026969561494e-05, "loss": 2.0069, "step": 1073 }, { "epoch": 0.24742268041237114, "grad_norm": 0.45617493987083435, "learning_rate": 4.676037744966425e-05, "loss": 2.0386, "step": 1074 }, { "epoch": 0.24765305534757817, "grad_norm": 0.49525871872901917, "learning_rate": 4.675047112698719e-05, "loss": 2.0346, "step": 1075 }, { "epoch": 0.24788343028278523, "grad_norm": 0.47472235560417175, "learning_rate": 4.674055073399215e-05, "loss": 2.0339, "step": 1076 }, { "epoch": 0.24811380521799228, "grad_norm": 0.4820954203605652, "learning_rate": 4.6730616277096596e-05, "loss": 1.9942, "step": 1077 }, { "epoch": 0.24834418015319934, "grad_norm": 0.49959203600883484, "learning_rate": 4.67206677627271e-05, "loss": 1.9899, "step": 1078 }, { "epoch": 0.24857455508840637, "grad_norm": 0.544087827205658, "learning_rate": 4.671070519731933e-05, "loss": 2.0513, "step": 1079 }, { "epoch": 0.24880493002361342, "grad_norm": 0.5082846879959106, "learning_rate": 4.670072858731804e-05, "loss": 2.0109, "step": 1080 }, { "epoch": 0.24903530495882048, "grad_norm": 0.513576328754425, "learning_rate": 4.6690737939177065e-05, "loss": 2.0296, "step": 1081 }, { "epoch": 0.24926567989402754, "grad_norm": 0.5070263743400574, "learning_rate": 4.6680733259359346e-05, "loss": 2.0493, "step": 1082 }, { "epoch": 0.2494960548292346, "grad_norm": 0.5043009519577026, "learning_rate": 4.6670714554336855e-05, "loss": 2.0505, "step": 1083 }, { "epoch": 0.24972642976444162, "grad_norm": 0.4745994210243225, "learning_rate": 4.666068183059068e-05, "loss": 2.0279, "step": 1084 }, { "epoch": 0.24995680469964868, "grad_norm": 0.46335962414741516, "learning_rate": 4.665063509461097e-05, "loss": 2.0011, "step": 1085 }, { "epoch": 0.2501871796348557, "grad_norm": 0.5431767702102661, "learning_rate": 4.6640574352896915e-05, "loss": 1.9867, "step": 1086 }, { "epoch": 0.25041755457006276, "grad_norm": 0.5184893608093262, "learning_rate": 4.663049961195678e-05, "loss": 1.9652, "step": 1087 }, { "epoch": 0.2506479295052698, "grad_norm": 0.4528964161872864, "learning_rate": 4.662041087830789e-05, "loss": 2.0153, "step": 1088 }, { "epoch": 0.2508783044404769, "grad_norm": 0.49012291431427, "learning_rate": 4.6610308158476624e-05, "loss": 1.9138, "step": 1089 }, { "epoch": 0.25110867937568393, "grad_norm": 0.44640523195266724, "learning_rate": 4.6600191458998396e-05, "loss": 1.96, "step": 1090 }, { "epoch": 0.251339054310891, "grad_norm": 0.41204601526260376, "learning_rate": 4.659006078641767e-05, "loss": 1.98, "step": 1091 }, { "epoch": 0.25156942924609804, "grad_norm": 0.4923611581325531, "learning_rate": 4.657991614728795e-05, "loss": 2.0084, "step": 1092 }, { "epoch": 0.2517998041813051, "grad_norm": 0.4453749358654022, "learning_rate": 4.6569757548171776e-05, "loss": 2.0035, "step": 1093 }, { "epoch": 0.2520301791165121, "grad_norm": 0.4155834913253784, "learning_rate": 4.655958499564072e-05, "loss": 1.9834, "step": 1094 }, { "epoch": 0.25226055405171915, "grad_norm": 0.4551054835319519, "learning_rate": 4.654939849627538e-05, "loss": 1.9988, "step": 1095 }, { "epoch": 0.2524909289869262, "grad_norm": 0.5109996795654297, "learning_rate": 4.653919805666535e-05, "loss": 1.9888, "step": 1096 }, { "epoch": 0.25272130392213327, "grad_norm": 0.45431843400001526, "learning_rate": 4.65289836834093e-05, "loss": 1.9685, "step": 1097 }, { "epoch": 0.2529516788573403, "grad_norm": 0.6104994416236877, "learning_rate": 4.651875538311484e-05, "loss": 2.0004, "step": 1098 }, { "epoch": 0.2531820537925474, "grad_norm": 0.4249247908592224, "learning_rate": 4.650851316239867e-05, "loss": 2.0696, "step": 1099 }, { "epoch": 0.25341242872775444, "grad_norm": 0.5120058655738831, "learning_rate": 4.649825702788643e-05, "loss": 1.9629, "step": 1100 }, { "epoch": 0.2536428036629615, "grad_norm": 0.4191873073577881, "learning_rate": 4.648798698621278e-05, "loss": 2.0072, "step": 1101 }, { "epoch": 0.2538731785981685, "grad_norm": 0.4350895583629608, "learning_rate": 4.647770304402139e-05, "loss": 2.005, "step": 1102 }, { "epoch": 0.25410355353337555, "grad_norm": 0.4393550753593445, "learning_rate": 4.6467405207964914e-05, "loss": 2.0247, "step": 1103 }, { "epoch": 0.2543339284685826, "grad_norm": 0.44355711340904236, "learning_rate": 4.645709348470499e-05, "loss": 2.0081, "step": 1104 }, { "epoch": 0.25456430340378966, "grad_norm": 0.5045775771141052, "learning_rate": 4.6446767880912246e-05, "loss": 2.0123, "step": 1105 }, { "epoch": 0.2547946783389967, "grad_norm": 0.5198317170143127, "learning_rate": 4.643642840326627e-05, "loss": 2.0316, "step": 1106 }, { "epoch": 0.2550250532742038, "grad_norm": 0.5485312938690186, "learning_rate": 4.642607505845567e-05, "loss": 2.0354, "step": 1107 }, { "epoch": 0.25525542820941083, "grad_norm": 0.5868772864341736, "learning_rate": 4.641570785317797e-05, "loss": 2.0385, "step": 1108 }, { "epoch": 0.2554858031446179, "grad_norm": 0.5563178062438965, "learning_rate": 4.6405326794139696e-05, "loss": 2.0053, "step": 1109 }, { "epoch": 0.25571617807982494, "grad_norm": 0.4433397948741913, "learning_rate": 4.639493188805633e-05, "loss": 2.0008, "step": 1110 }, { "epoch": 0.25594655301503194, "grad_norm": 0.4703252613544464, "learning_rate": 4.6384523141652294e-05, "loss": 1.9649, "step": 1111 }, { "epoch": 0.256176927950239, "grad_norm": 0.4559331238269806, "learning_rate": 4.637410056166098e-05, "loss": 1.9873, "step": 1112 }, { "epoch": 0.25640730288544605, "grad_norm": 0.43682563304901123, "learning_rate": 4.636366415482474e-05, "loss": 2.0401, "step": 1113 }, { "epoch": 0.2566376778206531, "grad_norm": 0.5023305416107178, "learning_rate": 4.635321392789484e-05, "loss": 1.9717, "step": 1114 }, { "epoch": 0.25686805275586017, "grad_norm": 0.4302782416343689, "learning_rate": 4.63427498876315e-05, "loss": 2.0359, "step": 1115 }, { "epoch": 0.2570984276910672, "grad_norm": 0.40936514735221863, "learning_rate": 4.6332272040803895e-05, "loss": 2.0134, "step": 1116 }, { "epoch": 0.2573288026262743, "grad_norm": 0.9136181473731995, "learning_rate": 4.63217803941901e-05, "loss": 2.041, "step": 1117 }, { "epoch": 0.25755917756148133, "grad_norm": 0.5912984013557434, "learning_rate": 4.631127495457713e-05, "loss": 2.0251, "step": 1118 }, { "epoch": 0.25778955249668833, "grad_norm": 0.4754658341407776, "learning_rate": 4.630075572876094e-05, "loss": 2.0227, "step": 1119 }, { "epoch": 0.2580199274318954, "grad_norm": 0.4927350878715515, "learning_rate": 4.629022272354637e-05, "loss": 2.0279, "step": 1120 }, { "epoch": 0.25825030236710245, "grad_norm": 0.4874953627586365, "learning_rate": 4.6279675945747205e-05, "loss": 2.0493, "step": 1121 }, { "epoch": 0.2584806773023095, "grad_norm": 0.5171038508415222, "learning_rate": 4.626911540218611e-05, "loss": 2.0143, "step": 1122 }, { "epoch": 0.25871105223751656, "grad_norm": 0.47477221488952637, "learning_rate": 4.625854109969469e-05, "loss": 2.0106, "step": 1123 }, { "epoch": 0.2589414271727236, "grad_norm": 0.4957704544067383, "learning_rate": 4.6247953045113415e-05, "loss": 1.9792, "step": 1124 }, { "epoch": 0.25917180210793067, "grad_norm": 0.45437195897102356, "learning_rate": 4.623735124529168e-05, "loss": 1.9894, "step": 1125 }, { "epoch": 0.2594021770431377, "grad_norm": 0.41401028633117676, "learning_rate": 4.622673570708774e-05, "loss": 2.0012, "step": 1126 }, { "epoch": 0.2596325519783448, "grad_norm": 0.42897695302963257, "learning_rate": 4.621610643736878e-05, "loss": 1.9883, "step": 1127 }, { "epoch": 0.2598629269135518, "grad_norm": 0.40174806118011475, "learning_rate": 4.6205463443010824e-05, "loss": 2.0522, "step": 1128 }, { "epoch": 0.26009330184875884, "grad_norm": 0.4181232750415802, "learning_rate": 4.619480673089881e-05, "loss": 2.0041, "step": 1129 }, { "epoch": 0.2603236767839659, "grad_norm": 0.43979576230049133, "learning_rate": 4.618413630792653e-05, "loss": 2.0322, "step": 1130 }, { "epoch": 0.26055405171917295, "grad_norm": 0.45921531319618225, "learning_rate": 4.6173452180996646e-05, "loss": 2.0324, "step": 1131 }, { "epoch": 0.26078442665438, "grad_norm": 0.4865514039993286, "learning_rate": 4.6162754357020686e-05, "loss": 1.998, "step": 1132 }, { "epoch": 0.26101480158958706, "grad_norm": 0.4185102581977844, "learning_rate": 4.615204284291906e-05, "loss": 1.9672, "step": 1133 }, { "epoch": 0.2612451765247941, "grad_norm": 0.45340755581855774, "learning_rate": 4.6141317645621e-05, "loss": 2.0604, "step": 1134 }, { "epoch": 0.2614755514600012, "grad_norm": 0.4647906720638275, "learning_rate": 4.6130578772064604e-05, "loss": 2.0133, "step": 1135 }, { "epoch": 0.2617059263952082, "grad_norm": 0.4452959895133972, "learning_rate": 4.611982622919683e-05, "loss": 1.9691, "step": 1136 }, { "epoch": 0.26193630133041523, "grad_norm": 0.44108179211616516, "learning_rate": 4.6109060023973464e-05, "loss": 1.9782, "step": 1137 }, { "epoch": 0.2621666762656223, "grad_norm": 0.4275703728199005, "learning_rate": 4.609828016335913e-05, "loss": 2.0123, "step": 1138 }, { "epoch": 0.26239705120082935, "grad_norm": 0.4818561375141144, "learning_rate": 4.60874866543273e-05, "loss": 1.9439, "step": 1139 }, { "epoch": 0.2626274261360364, "grad_norm": 0.43627381324768066, "learning_rate": 4.607667950386024e-05, "loss": 2.022, "step": 1140 }, { "epoch": 0.26285780107124346, "grad_norm": 0.41125020384788513, "learning_rate": 4.60658587189491e-05, "loss": 1.9659, "step": 1141 }, { "epoch": 0.2630881760064505, "grad_norm": 0.4451124370098114, "learning_rate": 4.605502430659378e-05, "loss": 1.9957, "step": 1142 }, { "epoch": 0.26331855094165757, "grad_norm": 0.46051162481307983, "learning_rate": 4.6044176273803044e-05, "loss": 1.9923, "step": 1143 }, { "epoch": 0.26354892587686457, "grad_norm": 0.4516449272632599, "learning_rate": 4.603331462759446e-05, "loss": 1.883, "step": 1144 }, { "epoch": 0.2637793008120716, "grad_norm": 0.39930105209350586, "learning_rate": 4.6022439374994396e-05, "loss": 1.9987, "step": 1145 }, { "epoch": 0.2640096757472787, "grad_norm": 0.4588770866394043, "learning_rate": 4.6011550523038015e-05, "loss": 1.9533, "step": 1146 }, { "epoch": 0.26424005068248574, "grad_norm": 0.4188314378261566, "learning_rate": 4.600064807876929e-05, "loss": 2.0023, "step": 1147 }, { "epoch": 0.2644704256176928, "grad_norm": 0.4290444850921631, "learning_rate": 4.598973204924097e-05, "loss": 2.0072, "step": 1148 }, { "epoch": 0.26470080055289985, "grad_norm": 0.4421130120754242, "learning_rate": 4.597880244151462e-05, "loss": 1.9938, "step": 1149 }, { "epoch": 0.2649311754881069, "grad_norm": 0.41364961862564087, "learning_rate": 4.5967859262660565e-05, "loss": 2.0077, "step": 1150 }, { "epoch": 0.26516155042331396, "grad_norm": 0.4541112780570984, "learning_rate": 4.595690251975791e-05, "loss": 2.004, "step": 1151 }, { "epoch": 0.265391925358521, "grad_norm": 0.46321403980255127, "learning_rate": 4.594593221989455e-05, "loss": 2.0155, "step": 1152 }, { "epoch": 0.265622300293728, "grad_norm": 0.45901423692703247, "learning_rate": 4.593494837016714e-05, "loss": 1.9941, "step": 1153 }, { "epoch": 0.2658526752289351, "grad_norm": 0.47772926092147827, "learning_rate": 4.5923950977681084e-05, "loss": 1.9913, "step": 1154 }, { "epoch": 0.26608305016414213, "grad_norm": 0.46886539459228516, "learning_rate": 4.591294004955059e-05, "loss": 1.9754, "step": 1155 }, { "epoch": 0.2663134250993492, "grad_norm": 0.517394483089447, "learning_rate": 4.590191559289858e-05, "loss": 1.9657, "step": 1156 }, { "epoch": 0.26654380003455624, "grad_norm": 0.574374258518219, "learning_rate": 4.589087761485675e-05, "loss": 1.9425, "step": 1157 }, { "epoch": 0.2667741749697633, "grad_norm": 0.5233098864555359, "learning_rate": 4.587982612256554e-05, "loss": 2.01, "step": 1158 }, { "epoch": 0.26700454990497036, "grad_norm": 0.49842244386672974, "learning_rate": 4.586876112317411e-05, "loss": 1.9881, "step": 1159 }, { "epoch": 0.2672349248401774, "grad_norm": 0.4192771315574646, "learning_rate": 4.5857682623840407e-05, "loss": 1.9768, "step": 1160 }, { "epoch": 0.2674652997753844, "grad_norm": 0.48836037516593933, "learning_rate": 4.584659063173106e-05, "loss": 2.0049, "step": 1161 }, { "epoch": 0.26769567471059147, "grad_norm": 0.49941563606262207, "learning_rate": 4.583548515402144e-05, "loss": 2.0068, "step": 1162 }, { "epoch": 0.2679260496457985, "grad_norm": 0.4294678270816803, "learning_rate": 4.582436619789566e-05, "loss": 2.0067, "step": 1163 }, { "epoch": 0.2681564245810056, "grad_norm": 0.421304851770401, "learning_rate": 4.581323377054656e-05, "loss": 1.9671, "step": 1164 }, { "epoch": 0.26838679951621264, "grad_norm": 0.437620609998703, "learning_rate": 4.5802087879175636e-05, "loss": 1.9994, "step": 1165 }, { "epoch": 0.2686171744514197, "grad_norm": 0.48360562324523926, "learning_rate": 4.579092853099317e-05, "loss": 1.9707, "step": 1166 }, { "epoch": 0.26884754938662675, "grad_norm": 0.4446389675140381, "learning_rate": 4.577975573321809e-05, "loss": 1.9621, "step": 1167 }, { "epoch": 0.2690779243218338, "grad_norm": 0.4460415840148926, "learning_rate": 4.576856949307805e-05, "loss": 1.9827, "step": 1168 }, { "epoch": 0.2693082992570408, "grad_norm": 0.4295342266559601, "learning_rate": 4.5757369817809415e-05, "loss": 1.9863, "step": 1169 }, { "epoch": 0.26953867419224786, "grad_norm": 0.47251060605049133, "learning_rate": 4.5746156714657194e-05, "loss": 1.9905, "step": 1170 }, { "epoch": 0.2697690491274549, "grad_norm": 0.44118693470954895, "learning_rate": 4.573493019087514e-05, "loss": 1.9841, "step": 1171 }, { "epoch": 0.269999424062662, "grad_norm": 0.4521366059780121, "learning_rate": 4.572369025372564e-05, "loss": 1.9757, "step": 1172 }, { "epoch": 0.27022979899786903, "grad_norm": 0.4282824993133545, "learning_rate": 4.5712436910479786e-05, "loss": 1.9983, "step": 1173 }, { "epoch": 0.2704601739330761, "grad_norm": 0.443317711353302, "learning_rate": 4.570117016841732e-05, "loss": 1.9855, "step": 1174 }, { "epoch": 0.27069054886828314, "grad_norm": 0.41566309332847595, "learning_rate": 4.5689890034826685e-05, "loss": 2.006, "step": 1175 }, { "epoch": 0.2709209238034902, "grad_norm": 0.380582332611084, "learning_rate": 4.5678596517004966e-05, "loss": 2.012, "step": 1176 }, { "epoch": 0.27115129873869726, "grad_norm": 0.4477771520614624, "learning_rate": 4.566728962225789e-05, "loss": 1.9696, "step": 1177 }, { "epoch": 0.27138167367390426, "grad_norm": 0.464642196893692, "learning_rate": 4.5655969357899874e-05, "loss": 2.0072, "step": 1178 }, { "epoch": 0.2716120486091113, "grad_norm": 0.4519462287425995, "learning_rate": 4.5644635731253954e-05, "loss": 1.9619, "step": 1179 }, { "epoch": 0.27184242354431837, "grad_norm": 0.40411317348480225, "learning_rate": 4.563328874965183e-05, "loss": 1.9512, "step": 1180 }, { "epoch": 0.2720727984795254, "grad_norm": 0.3961813151836395, "learning_rate": 4.562192842043381e-05, "loss": 1.9402, "step": 1181 }, { "epoch": 0.2723031734147325, "grad_norm": 0.4482713043689728, "learning_rate": 4.5610554750948885e-05, "loss": 1.9765, "step": 1182 }, { "epoch": 0.27253354834993954, "grad_norm": 0.45736047625541687, "learning_rate": 4.559916774855464e-05, "loss": 1.9935, "step": 1183 }, { "epoch": 0.2727639232851466, "grad_norm": 0.5070540308952332, "learning_rate": 4.558776742061729e-05, "loss": 1.991, "step": 1184 }, { "epoch": 0.27299429822035365, "grad_norm": 0.447232723236084, "learning_rate": 4.5576353774511685e-05, "loss": 2.0016, "step": 1185 }, { "epoch": 0.27322467315556065, "grad_norm": 0.4516638219356537, "learning_rate": 4.5564926817621266e-05, "loss": 1.9814, "step": 1186 }, { "epoch": 0.2734550480907677, "grad_norm": 0.4511248767375946, "learning_rate": 4.555348655733811e-05, "loss": 1.9824, "step": 1187 }, { "epoch": 0.27368542302597476, "grad_norm": 0.4162772297859192, "learning_rate": 4.554203300106288e-05, "loss": 2.0271, "step": 1188 }, { "epoch": 0.2739157979611818, "grad_norm": 0.4219911992549896, "learning_rate": 4.553056615620486e-05, "loss": 1.958, "step": 1189 }, { "epoch": 0.2741461728963889, "grad_norm": 0.5325164794921875, "learning_rate": 4.551908603018191e-05, "loss": 1.958, "step": 1190 }, { "epoch": 0.27437654783159593, "grad_norm": 0.45626816153526306, "learning_rate": 4.5507592630420496e-05, "loss": 1.9864, "step": 1191 }, { "epoch": 0.274606922766803, "grad_norm": 0.5443227887153625, "learning_rate": 4.549608596435567e-05, "loss": 2.0008, "step": 1192 }, { "epoch": 0.27483729770201004, "grad_norm": 0.4431226849555969, "learning_rate": 4.5484566039431046e-05, "loss": 2.0284, "step": 1193 }, { "epoch": 0.2750676726372171, "grad_norm": 0.4685644507408142, "learning_rate": 4.547303286309885e-05, "loss": 1.9906, "step": 1194 }, { "epoch": 0.2752980475724241, "grad_norm": 0.45829036831855774, "learning_rate": 4.5461486442819846e-05, "loss": 2.0243, "step": 1195 }, { "epoch": 0.27552842250763115, "grad_norm": 0.4777814745903015, "learning_rate": 4.5449926786063385e-05, "loss": 1.9779, "step": 1196 }, { "epoch": 0.2757587974428382, "grad_norm": 0.5358420014381409, "learning_rate": 4.5438353900307376e-05, "loss": 1.9868, "step": 1197 }, { "epoch": 0.27598917237804527, "grad_norm": 0.517702579498291, "learning_rate": 4.5426767793038297e-05, "loss": 2.0126, "step": 1198 }, { "epoch": 0.2762195473132523, "grad_norm": 0.44542059302330017, "learning_rate": 4.541516847175115e-05, "loss": 1.9905, "step": 1199 }, { "epoch": 0.2764499222484594, "grad_norm": 0.7564400434494019, "learning_rate": 4.540355594394952e-05, "loss": 2.0035, "step": 1200 }, { "epoch": 0.27668029718366643, "grad_norm": 1.277319073677063, "learning_rate": 4.539193021714549e-05, "loss": 1.9903, "step": 1201 }, { "epoch": 0.2769106721188735, "grad_norm": 0.4937991499900818, "learning_rate": 4.5380291298859745e-05, "loss": 1.9686, "step": 1202 }, { "epoch": 0.2771410470540805, "grad_norm": 0.4801248610019684, "learning_rate": 4.536863919662145e-05, "loss": 2.0282, "step": 1203 }, { "epoch": 0.27737142198928755, "grad_norm": 0.5159435272216797, "learning_rate": 4.535697391796832e-05, "loss": 1.9425, "step": 1204 }, { "epoch": 0.2776017969244946, "grad_norm": 0.7543884515762329, "learning_rate": 4.534529547044658e-05, "loss": 2.0224, "step": 1205 }, { "epoch": 0.27783217185970166, "grad_norm": 0.5126155614852905, "learning_rate": 4.5333603861611005e-05, "loss": 1.943, "step": 1206 }, { "epoch": 0.2780625467949087, "grad_norm": 0.7033566236495972, "learning_rate": 4.532189909902485e-05, "loss": 1.9821, "step": 1207 }, { "epoch": 0.27829292173011577, "grad_norm": 0.6307174563407898, "learning_rate": 4.531018119025989e-05, "loss": 1.928, "step": 1208 }, { "epoch": 0.27852329666532283, "grad_norm": 0.6608896255493164, "learning_rate": 4.529845014289642e-05, "loss": 2.0082, "step": 1209 }, { "epoch": 0.2787536716005299, "grad_norm": 2.7194433212280273, "learning_rate": 4.52867059645232e-05, "loss": 2.0731, "step": 1210 }, { "epoch": 0.2789840465357369, "grad_norm": 0.6987528800964355, "learning_rate": 4.527494866273753e-05, "loss": 1.9623, "step": 1211 }, { "epoch": 0.27921442147094394, "grad_norm": 0.8021842241287231, "learning_rate": 4.526317824514517e-05, "loss": 1.975, "step": 1212 }, { "epoch": 0.279444796406151, "grad_norm": 0.9080991744995117, "learning_rate": 4.525139471936035e-05, "loss": 2.0011, "step": 1213 }, { "epoch": 0.27967517134135805, "grad_norm": 0.7359468936920166, "learning_rate": 4.523959809300582e-05, "loss": 2.0692, "step": 1214 }, { "epoch": 0.2799055462765651, "grad_norm": 0.6720209717750549, "learning_rate": 4.5227788373712774e-05, "loss": 2.0308, "step": 1215 }, { "epoch": 0.28013592121177217, "grad_norm": 0.6358092427253723, "learning_rate": 4.5215965569120895e-05, "loss": 2.0109, "step": 1216 }, { "epoch": 0.2803662961469792, "grad_norm": 0.6011826992034912, "learning_rate": 4.520412968687832e-05, "loss": 1.9883, "step": 1217 }, { "epoch": 0.2805966710821863, "grad_norm": 0.5865610837936401, "learning_rate": 4.5192280734641626e-05, "loss": 1.9761, "step": 1218 }, { "epoch": 0.28082704601739333, "grad_norm": 0.4863257110118866, "learning_rate": 4.5180418720075896e-05, "loss": 2.037, "step": 1219 }, { "epoch": 0.28105742095260033, "grad_norm": 0.511873722076416, "learning_rate": 4.5168543650854625e-05, "loss": 1.9805, "step": 1220 }, { "epoch": 0.2812877958878074, "grad_norm": 0.7932138442993164, "learning_rate": 4.515665553465975e-05, "loss": 2.0578, "step": 1221 }, { "epoch": 0.28151817082301445, "grad_norm": 0.5467917323112488, "learning_rate": 4.514475437918167e-05, "loss": 1.993, "step": 1222 }, { "epoch": 0.2817485457582215, "grad_norm": 0.4667700529098511, "learning_rate": 4.513284019211921e-05, "loss": 2.0516, "step": 1223 }, { "epoch": 0.28197892069342856, "grad_norm": 0.5111719965934753, "learning_rate": 4.512091298117961e-05, "loss": 2.0527, "step": 1224 }, { "epoch": 0.2822092956286356, "grad_norm": 0.5882595181465149, "learning_rate": 4.5108972754078564e-05, "loss": 1.9834, "step": 1225 }, { "epoch": 0.28243967056384267, "grad_norm": 0.5105330348014832, "learning_rate": 4.509701951854017e-05, "loss": 1.9588, "step": 1226 }, { "epoch": 0.2826700454990497, "grad_norm": 0.4765574336051941, "learning_rate": 4.5085053282296936e-05, "loss": 1.974, "step": 1227 }, { "epoch": 0.2829004204342567, "grad_norm": 0.4568116366863251, "learning_rate": 4.5073074053089785e-05, "loss": 2.0351, "step": 1228 }, { "epoch": 0.2831307953694638, "grad_norm": 0.5132972002029419, "learning_rate": 4.506108183866805e-05, "loss": 2.0151, "step": 1229 }, { "epoch": 0.28336117030467084, "grad_norm": 0.4892562925815582, "learning_rate": 4.504907664678947e-05, "loss": 2.0218, "step": 1230 }, { "epoch": 0.2835915452398779, "grad_norm": 0.4133303165435791, "learning_rate": 4.503705848522015e-05, "loss": 1.9441, "step": 1231 }, { "epoch": 0.28382192017508495, "grad_norm": 0.39028528332710266, "learning_rate": 4.502502736173462e-05, "loss": 1.9639, "step": 1232 }, { "epoch": 0.284052295110292, "grad_norm": 0.4189188778400421, "learning_rate": 4.501298328411577e-05, "loss": 1.9994, "step": 1233 }, { "epoch": 0.28428267004549906, "grad_norm": 0.3742779791355133, "learning_rate": 4.500092626015488e-05, "loss": 1.9816, "step": 1234 }, { "epoch": 0.2845130449807061, "grad_norm": 0.3991760313510895, "learning_rate": 4.498885629765162e-05, "loss": 1.9595, "step": 1235 }, { "epoch": 0.2847434199159132, "grad_norm": 0.42409294843673706, "learning_rate": 4.497677340441399e-05, "loss": 1.9614, "step": 1236 }, { "epoch": 0.2849737948511202, "grad_norm": 0.9434719085693359, "learning_rate": 4.4964677588258395e-05, "loss": 1.9451, "step": 1237 }, { "epoch": 0.28520416978632723, "grad_norm": 35.5501594543457, "learning_rate": 4.495256885700958e-05, "loss": 2.4398, "step": 1238 }, { "epoch": 0.2854345447215343, "grad_norm": 5.579110145568848, "learning_rate": 4.4940447218500656e-05, "loss": 2.2089, "step": 1239 }, { "epoch": 0.28566491965674135, "grad_norm": 15.789441108703613, "learning_rate": 4.4928312680573064e-05, "loss": 2.7528, "step": 1240 }, { "epoch": 0.2858952945919484, "grad_norm": 2.2256863117218018, "learning_rate": 4.491616525107661e-05, "loss": 2.1847, "step": 1241 }, { "epoch": 0.28612566952715546, "grad_norm": 1.4774065017700195, "learning_rate": 4.490400493786943e-05, "loss": 2.0875, "step": 1242 }, { "epoch": 0.2863560444623625, "grad_norm": 4.073440074920654, "learning_rate": 4.4891831748818e-05, "loss": 2.2073, "step": 1243 }, { "epoch": 0.28658641939756957, "grad_norm": 2.535773277282715, "learning_rate": 4.487964569179711e-05, "loss": 2.1702, "step": 1244 }, { "epoch": 0.28681679433277657, "grad_norm": 15.29354476928711, "learning_rate": 4.48674467746899e-05, "loss": 2.2991, "step": 1245 }, { "epoch": 0.2870471692679836, "grad_norm": 2.492981433868408, "learning_rate": 4.48552350053878e-05, "loss": 2.1468, "step": 1246 }, { "epoch": 0.2872775442031907, "grad_norm": 1.5413533449172974, "learning_rate": 4.484301039179059e-05, "loss": 2.1203, "step": 1247 }, { "epoch": 0.28750791913839774, "grad_norm": 1.4065096378326416, "learning_rate": 4.4830772941806306e-05, "loss": 2.0485, "step": 1248 }, { "epoch": 0.2877382940736048, "grad_norm": 0.8545441627502441, "learning_rate": 4.481852266335135e-05, "loss": 2.0991, "step": 1249 }, { "epoch": 0.28796866900881185, "grad_norm": 0.5797882080078125, "learning_rate": 4.4806259564350385e-05, "loss": 2.0505, "step": 1250 }, { "epoch": 0.2881990439440189, "grad_norm": 0.7976332306861877, "learning_rate": 4.479398365273636e-05, "loss": 2.0373, "step": 1251 }, { "epoch": 0.28842941887922596, "grad_norm": 0.5346152186393738, "learning_rate": 4.4781694936450545e-05, "loss": 1.9974, "step": 1252 }, { "epoch": 0.28865979381443296, "grad_norm": 0.5867125988006592, "learning_rate": 4.476939342344246e-05, "loss": 2.0851, "step": 1253 }, { "epoch": 0.28889016874964, "grad_norm": 0.5291160941123962, "learning_rate": 4.475707912166994e-05, "loss": 2.0157, "step": 1254 }, { "epoch": 0.2891205436848471, "grad_norm": 0.5215042233467102, "learning_rate": 4.4744752039099055e-05, "loss": 1.9563, "step": 1255 }, { "epoch": 0.28935091862005413, "grad_norm": 0.5312508344650269, "learning_rate": 4.4732412183704165e-05, "loss": 2.0794, "step": 1256 }, { "epoch": 0.2895812935552612, "grad_norm": 0.479836642742157, "learning_rate": 4.472005956346788e-05, "loss": 1.9856, "step": 1257 }, { "epoch": 0.28981166849046824, "grad_norm": 0.4491432011127472, "learning_rate": 4.4707694186381085e-05, "loss": 1.997, "step": 1258 }, { "epoch": 0.2900420434256753, "grad_norm": 0.445182740688324, "learning_rate": 4.46953160604429e-05, "loss": 1.9705, "step": 1259 }, { "epoch": 0.29027241836088236, "grad_norm": 0.4425063729286194, "learning_rate": 4.468292519366071e-05, "loss": 1.9734, "step": 1260 }, { "epoch": 0.2905027932960894, "grad_norm": 0.48247766494750977, "learning_rate": 4.4670521594050114e-05, "loss": 1.9918, "step": 1261 }, { "epoch": 0.2907331682312964, "grad_norm": 0.4533904492855072, "learning_rate": 4.465810526963499e-05, "loss": 1.9459, "step": 1262 }, { "epoch": 0.29096354316650347, "grad_norm": 0.45437920093536377, "learning_rate": 4.4645676228447394e-05, "loss": 1.9682, "step": 1263 }, { "epoch": 0.2911939181017105, "grad_norm": 0.509579598903656, "learning_rate": 4.463323447852766e-05, "loss": 1.9407, "step": 1264 }, { "epoch": 0.2914242930369176, "grad_norm": 0.41746291518211365, "learning_rate": 4.4620780027924315e-05, "loss": 2.0062, "step": 1265 }, { "epoch": 0.29165466797212464, "grad_norm": 0.38011595606803894, "learning_rate": 4.46083128846941e-05, "loss": 1.9852, "step": 1266 }, { "epoch": 0.2918850429073317, "grad_norm": 0.37877753376960754, "learning_rate": 4.459583305690198e-05, "loss": 1.9591, "step": 1267 }, { "epoch": 0.29211541784253875, "grad_norm": 0.38566359877586365, "learning_rate": 4.458334055262113e-05, "loss": 2.0112, "step": 1268 }, { "epoch": 0.2923457927777458, "grad_norm": 0.3856004476547241, "learning_rate": 4.4570835379932894e-05, "loss": 1.981, "step": 1269 }, { "epoch": 0.2925761677129528, "grad_norm": 0.38726675510406494, "learning_rate": 4.455831754692685e-05, "loss": 1.9902, "step": 1270 }, { "epoch": 0.29280654264815986, "grad_norm": 0.44926685094833374, "learning_rate": 4.454578706170075e-05, "loss": 2.0286, "step": 1271 }, { "epoch": 0.2930369175833669, "grad_norm": 0.4216504395008087, "learning_rate": 4.453324393236051e-05, "loss": 2.001, "step": 1272 }, { "epoch": 0.293267292518574, "grad_norm": 0.41741856932640076, "learning_rate": 4.452068816702027e-05, "loss": 1.9943, "step": 1273 }, { "epoch": 0.29349766745378103, "grad_norm": 0.42789503931999207, "learning_rate": 4.45081197738023e-05, "loss": 1.9423, "step": 1274 }, { "epoch": 0.2937280423889881, "grad_norm": 0.4192328155040741, "learning_rate": 4.4495538760837064e-05, "loss": 1.9961, "step": 1275 }, { "epoch": 0.29395841732419514, "grad_norm": 0.4168560802936554, "learning_rate": 4.448294513626318e-05, "loss": 1.9392, "step": 1276 }, { "epoch": 0.2941887922594022, "grad_norm": 0.38128766417503357, "learning_rate": 4.4470338908227436e-05, "loss": 1.9708, "step": 1277 }, { "epoch": 0.2944191671946092, "grad_norm": 0.40934619307518005, "learning_rate": 4.445772008488476e-05, "loss": 1.9847, "step": 1278 }, { "epoch": 0.29464954212981626, "grad_norm": 0.41707322001457214, "learning_rate": 4.444508867439824e-05, "loss": 1.9842, "step": 1279 }, { "epoch": 0.2948799170650233, "grad_norm": 0.36241573095321655, "learning_rate": 4.4432444684939077e-05, "loss": 1.9798, "step": 1280 }, { "epoch": 0.29511029200023037, "grad_norm": 0.4054236114025116, "learning_rate": 4.441978812468666e-05, "loss": 2.0223, "step": 1281 }, { "epoch": 0.2953406669354374, "grad_norm": 0.4257291257381439, "learning_rate": 4.440711900182847e-05, "loss": 1.99, "step": 1282 }, { "epoch": 0.2955710418706445, "grad_norm": 0.45871230959892273, "learning_rate": 4.4394437324560126e-05, "loss": 1.9592, "step": 1283 }, { "epoch": 0.29580141680585154, "grad_norm": 0.46741944551467896, "learning_rate": 4.4381743101085366e-05, "loss": 1.9628, "step": 1284 }, { "epoch": 0.2960317917410586, "grad_norm": 0.41539284586906433, "learning_rate": 4.436903633961606e-05, "loss": 1.9442, "step": 1285 }, { "epoch": 0.29626216667626565, "grad_norm": 0.4259786009788513, "learning_rate": 4.4356317048372155e-05, "loss": 1.9431, "step": 1286 }, { "epoch": 0.29649254161147265, "grad_norm": 0.4542396664619446, "learning_rate": 4.434358523558174e-05, "loss": 2.0065, "step": 1287 }, { "epoch": 0.2967229165466797, "grad_norm": 0.47848019003868103, "learning_rate": 4.433084090948099e-05, "loss": 1.9609, "step": 1288 }, { "epoch": 0.29695329148188676, "grad_norm": 0.409587562084198, "learning_rate": 4.431808407831416e-05, "loss": 1.9975, "step": 1289 }, { "epoch": 0.2971836664170938, "grad_norm": 0.44534924626350403, "learning_rate": 4.4305314750333615e-05, "loss": 2.0025, "step": 1290 }, { "epoch": 0.2974140413523009, "grad_norm": 0.5072950124740601, "learning_rate": 4.42925329337998e-05, "loss": 2.0018, "step": 1291 }, { "epoch": 0.29764441628750793, "grad_norm": 0.40814799070358276, "learning_rate": 4.427973863698124e-05, "loss": 1.9531, "step": 1292 }, { "epoch": 0.297874791222715, "grad_norm": 0.3979027569293976, "learning_rate": 4.426693186815451e-05, "loss": 1.9531, "step": 1293 }, { "epoch": 0.29810516615792204, "grad_norm": 0.3658965229988098, "learning_rate": 4.4254112635604294e-05, "loss": 1.9876, "step": 1294 }, { "epoch": 0.29833554109312904, "grad_norm": 0.4301181435585022, "learning_rate": 4.424128094762331e-05, "loss": 1.9897, "step": 1295 }, { "epoch": 0.2985659160283361, "grad_norm": 0.461128294467926, "learning_rate": 4.422843681251233e-05, "loss": 1.9895, "step": 1296 }, { "epoch": 0.29879629096354315, "grad_norm": 0.3780709207057953, "learning_rate": 4.4215580238580215e-05, "loss": 1.9578, "step": 1297 }, { "epoch": 0.2990266658987502, "grad_norm": 0.40954720973968506, "learning_rate": 4.420271123414381e-05, "loss": 1.864, "step": 1298 }, { "epoch": 0.29925704083395727, "grad_norm": 0.37914127111434937, "learning_rate": 4.4189829807528063e-05, "loss": 1.9599, "step": 1299 }, { "epoch": 0.2994874157691643, "grad_norm": 0.3957623243331909, "learning_rate": 4.417693596706592e-05, "loss": 1.9824, "step": 1300 }, { "epoch": 0.2997177907043714, "grad_norm": 0.40292397141456604, "learning_rate": 4.4164029721098384e-05, "loss": 1.9102, "step": 1301 }, { "epoch": 0.29994816563957843, "grad_norm": 0.38992854952812195, "learning_rate": 4.415111107797445e-05, "loss": 1.9255, "step": 1302 }, { "epoch": 0.3001785405747855, "grad_norm": 0.3994954526424408, "learning_rate": 4.413818004605117e-05, "loss": 2.0186, "step": 1303 }, { "epoch": 0.3004089155099925, "grad_norm": 0.41891562938690186, "learning_rate": 4.412523663369358e-05, "loss": 1.9707, "step": 1304 }, { "epoch": 0.30063929044519955, "grad_norm": 0.4329485595226288, "learning_rate": 4.411228084927473e-05, "loss": 1.9725, "step": 1305 }, { "epoch": 0.3008696653804066, "grad_norm": 0.41696685552597046, "learning_rate": 4.409931270117571e-05, "loss": 1.9238, "step": 1306 }, { "epoch": 0.30110004031561366, "grad_norm": 1.2155303955078125, "learning_rate": 4.408633219778555e-05, "loss": 1.9685, "step": 1307 }, { "epoch": 0.3013304152508207, "grad_norm": 0.39063194394111633, "learning_rate": 4.40733393475013e-05, "loss": 1.973, "step": 1308 }, { "epoch": 0.30156079018602777, "grad_norm": 0.44973525404930115, "learning_rate": 4.406033415872801e-05, "loss": 2.0077, "step": 1309 }, { "epoch": 0.30179116512123483, "grad_norm": 0.5031969547271729, "learning_rate": 4.40473166398787e-05, "loss": 1.9643, "step": 1310 }, { "epoch": 0.3020215400564419, "grad_norm": 0.4126640558242798, "learning_rate": 4.403428679937437e-05, "loss": 1.9442, "step": 1311 }, { "epoch": 0.3022519149916489, "grad_norm": 0.39144039154052734, "learning_rate": 4.4021244645643964e-05, "loss": 2.0066, "step": 1312 }, { "epoch": 0.30248228992685594, "grad_norm": 0.4299284815788269, "learning_rate": 4.400819018712444e-05, "loss": 2.0218, "step": 1313 }, { "epoch": 0.302712664862063, "grad_norm": 0.44448143243789673, "learning_rate": 4.399512343226068e-05, "loss": 1.9489, "step": 1314 }, { "epoch": 0.30294303979727005, "grad_norm": 0.3676280677318573, "learning_rate": 4.398204438950552e-05, "loss": 1.9816, "step": 1315 }, { "epoch": 0.3031734147324771, "grad_norm": 0.3802914023399353, "learning_rate": 4.3968953067319777e-05, "loss": 1.9473, "step": 1316 }, { "epoch": 0.30340378966768417, "grad_norm": 0.40930914878845215, "learning_rate": 4.395584947417217e-05, "loss": 1.9564, "step": 1317 }, { "epoch": 0.3036341646028912, "grad_norm": 0.4335686266422272, "learning_rate": 4.394273361853939e-05, "loss": 1.9631, "step": 1318 }, { "epoch": 0.3038645395380983, "grad_norm": 0.40449759364128113, "learning_rate": 4.392960550890604e-05, "loss": 1.9767, "step": 1319 }, { "epoch": 0.3040949144733053, "grad_norm": 0.4325851798057556, "learning_rate": 4.3916465153764665e-05, "loss": 2.0073, "step": 1320 }, { "epoch": 0.30432528940851233, "grad_norm": 0.398972749710083, "learning_rate": 4.390331256161571e-05, "loss": 1.9551, "step": 1321 }, { "epoch": 0.3045556643437194, "grad_norm": 0.41538795828819275, "learning_rate": 4.389014774096756e-05, "loss": 2.0091, "step": 1322 }, { "epoch": 0.30478603927892645, "grad_norm": 0.4294925630092621, "learning_rate": 4.3876970700336495e-05, "loss": 1.9736, "step": 1323 }, { "epoch": 0.3050164142141335, "grad_norm": 0.7676051259040833, "learning_rate": 4.386378144824671e-05, "loss": 2.025, "step": 1324 }, { "epoch": 0.30524678914934056, "grad_norm": 0.4097474217414856, "learning_rate": 4.3850579993230284e-05, "loss": 1.9144, "step": 1325 }, { "epoch": 0.3054771640845476, "grad_norm": 0.4256832003593445, "learning_rate": 4.3837366343827216e-05, "loss": 1.9741, "step": 1326 }, { "epoch": 0.30570753901975467, "grad_norm": 0.48232054710388184, "learning_rate": 4.3824140508585364e-05, "loss": 1.9825, "step": 1327 }, { "epoch": 0.3059379139549617, "grad_norm": 0.39831531047821045, "learning_rate": 4.38109024960605e-05, "loss": 1.9616, "step": 1328 }, { "epoch": 0.3061682888901687, "grad_norm": 0.38049742579460144, "learning_rate": 4.379765231481624e-05, "loss": 1.9646, "step": 1329 }, { "epoch": 0.3063986638253758, "grad_norm": 0.4633544087409973, "learning_rate": 4.378438997342409e-05, "loss": 1.9688, "step": 1330 }, { "epoch": 0.30662903876058284, "grad_norm": 0.4190623164176941, "learning_rate": 4.377111548046343e-05, "loss": 1.9887, "step": 1331 }, { "epoch": 0.3068594136957899, "grad_norm": 0.47097402811050415, "learning_rate": 4.3757828844521494e-05, "loss": 1.9429, "step": 1332 }, { "epoch": 0.30708978863099695, "grad_norm": 0.44552144408226013, "learning_rate": 4.374453007419336e-05, "loss": 1.9026, "step": 1333 }, { "epoch": 0.307320163566204, "grad_norm": 0.39273279905319214, "learning_rate": 4.373121917808196e-05, "loss": 1.9985, "step": 1334 }, { "epoch": 0.30755053850141106, "grad_norm": 0.4497288167476654, "learning_rate": 4.3717896164798086e-05, "loss": 1.9533, "step": 1335 }, { "epoch": 0.3077809134366181, "grad_norm": 0.47179916501045227, "learning_rate": 4.370456104296036e-05, "loss": 1.9487, "step": 1336 }, { "epoch": 0.3080112883718251, "grad_norm": 0.43219131231307983, "learning_rate": 4.369121382119523e-05, "loss": 1.9413, "step": 1337 }, { "epoch": 0.3082416633070322, "grad_norm": 0.3823307156562805, "learning_rate": 4.367785450813696e-05, "loss": 1.965, "step": 1338 }, { "epoch": 0.30847203824223923, "grad_norm": 0.3971143066883087, "learning_rate": 4.366448311242768e-05, "loss": 1.9777, "step": 1339 }, { "epoch": 0.3087024131774463, "grad_norm": 0.38856199383735657, "learning_rate": 4.3651099642717294e-05, "loss": 1.9886, "step": 1340 }, { "epoch": 0.30893278811265334, "grad_norm": 0.40740472078323364, "learning_rate": 4.363770410766353e-05, "loss": 1.97, "step": 1341 }, { "epoch": 0.3091631630478604, "grad_norm": 0.39918678998947144, "learning_rate": 4.3624296515931906e-05, "loss": 1.9758, "step": 1342 }, { "epoch": 0.30939353798306746, "grad_norm": 0.40038514137268066, "learning_rate": 4.361087687619579e-05, "loss": 1.9709, "step": 1343 }, { "epoch": 0.3096239129182745, "grad_norm": 0.381115585565567, "learning_rate": 4.359744519713628e-05, "loss": 1.9407, "step": 1344 }, { "epoch": 0.3098542878534815, "grad_norm": 0.39148303866386414, "learning_rate": 4.3584001487442305e-05, "loss": 1.9575, "step": 1345 }, { "epoch": 0.31008466278868857, "grad_norm": 0.4075348973274231, "learning_rate": 4.357054575581056e-05, "loss": 1.9155, "step": 1346 }, { "epoch": 0.3103150377238956, "grad_norm": 0.3681757152080536, "learning_rate": 4.3557078010945505e-05, "loss": 1.9583, "step": 1347 }, { "epoch": 0.3105454126591027, "grad_norm": 0.35530346632003784, "learning_rate": 4.354359826155941e-05, "loss": 1.9789, "step": 1348 }, { "epoch": 0.31077578759430974, "grad_norm": 0.40136104822158813, "learning_rate": 4.353010651637227e-05, "loss": 1.9528, "step": 1349 }, { "epoch": 0.3110061625295168, "grad_norm": 0.3641766905784607, "learning_rate": 4.351660278411187e-05, "loss": 1.9071, "step": 1350 }, { "epoch": 0.31123653746472385, "grad_norm": 0.3897402882575989, "learning_rate": 4.350308707351372e-05, "loss": 1.9968, "step": 1351 }, { "epoch": 0.3114669123999309, "grad_norm": 0.41000768542289734, "learning_rate": 4.348955939332111e-05, "loss": 1.953, "step": 1352 }, { "epoch": 0.31169728733513796, "grad_norm": 0.4121626317501068, "learning_rate": 4.3476019752285055e-05, "loss": 1.9652, "step": 1353 }, { "epoch": 0.31192766227034496, "grad_norm": 0.39340054988861084, "learning_rate": 4.346246815916429e-05, "loss": 1.9721, "step": 1354 }, { "epoch": 0.312158037205552, "grad_norm": 0.4831576645374298, "learning_rate": 4.3448904622725336e-05, "loss": 2.0114, "step": 1355 }, { "epoch": 0.3123884121407591, "grad_norm": 0.42604851722717285, "learning_rate": 4.3435329151742375e-05, "loss": 2.0156, "step": 1356 }, { "epoch": 0.31261878707596613, "grad_norm": 0.4031117260456085, "learning_rate": 4.3421741754997366e-05, "loss": 1.9717, "step": 1357 }, { "epoch": 0.3128491620111732, "grad_norm": 0.40696123242378235, "learning_rate": 4.340814244127993e-05, "loss": 1.9567, "step": 1358 }, { "epoch": 0.31307953694638024, "grad_norm": 0.3925931453704834, "learning_rate": 4.339453121938746e-05, "loss": 1.9775, "step": 1359 }, { "epoch": 0.3133099118815873, "grad_norm": 0.43161678314208984, "learning_rate": 4.338090809812498e-05, "loss": 2.0043, "step": 1360 }, { "epoch": 0.31354028681679436, "grad_norm": 0.3907691240310669, "learning_rate": 4.336727308630527e-05, "loss": 1.9607, "step": 1361 }, { "epoch": 0.31377066175200136, "grad_norm": 0.40125641226768494, "learning_rate": 4.335362619274877e-05, "loss": 1.9932, "step": 1362 }, { "epoch": 0.3140010366872084, "grad_norm": 0.42564111948013306, "learning_rate": 4.3339967426283634e-05, "loss": 1.9332, "step": 1363 }, { "epoch": 0.31423141162241547, "grad_norm": 0.3949706256389618, "learning_rate": 4.332629679574566e-05, "loss": 1.9579, "step": 1364 }, { "epoch": 0.3144617865576225, "grad_norm": 0.3712620735168457, "learning_rate": 4.331261430997835e-05, "loss": 2.0093, "step": 1365 }, { "epoch": 0.3146921614928296, "grad_norm": 0.4260379672050476, "learning_rate": 4.3298919977832864e-05, "loss": 1.9519, "step": 1366 }, { "epoch": 0.31492253642803664, "grad_norm": 0.4004679322242737, "learning_rate": 4.3285213808168025e-05, "loss": 1.9265, "step": 1367 }, { "epoch": 0.3151529113632437, "grad_norm": 0.46473443508148193, "learning_rate": 4.327149580985031e-05, "loss": 1.9716, "step": 1368 }, { "epoch": 0.31538328629845075, "grad_norm": 0.4140304625034332, "learning_rate": 4.325776599175386e-05, "loss": 1.9958, "step": 1369 }, { "epoch": 0.3156136612336578, "grad_norm": 0.3849419951438904, "learning_rate": 4.324402436276046e-05, "loss": 1.9613, "step": 1370 }, { "epoch": 0.3158440361688648, "grad_norm": 0.3877829313278198, "learning_rate": 4.323027093175952e-05, "loss": 1.9382, "step": 1371 }, { "epoch": 0.31607441110407186, "grad_norm": 0.49516478180885315, "learning_rate": 4.3216505707648095e-05, "loss": 1.9654, "step": 1372 }, { "epoch": 0.3163047860392789, "grad_norm": 0.424976646900177, "learning_rate": 4.320272869933088e-05, "loss": 1.9705, "step": 1373 }, { "epoch": 0.316535160974486, "grad_norm": 0.3968179225921631, "learning_rate": 4.318893991572018e-05, "loss": 1.979, "step": 1374 }, { "epoch": 0.31676553590969303, "grad_norm": 0.3566516041755676, "learning_rate": 4.317513936573591e-05, "loss": 1.9334, "step": 1375 }, { "epoch": 0.3169959108449001, "grad_norm": 0.39388224482536316, "learning_rate": 4.3161327058305625e-05, "loss": 1.9336, "step": 1376 }, { "epoch": 0.31722628578010714, "grad_norm": 0.39778998494148254, "learning_rate": 4.314750300236444e-05, "loss": 1.92, "step": 1377 }, { "epoch": 0.3174566607153142, "grad_norm": 0.36897575855255127, "learning_rate": 4.313366720685512e-05, "loss": 1.9798, "step": 1378 }, { "epoch": 0.3176870356505212, "grad_norm": 0.38385993242263794, "learning_rate": 4.3119819680728e-05, "loss": 1.9997, "step": 1379 }, { "epoch": 0.31791741058572826, "grad_norm": 0.4081646203994751, "learning_rate": 4.3105960432941e-05, "loss": 1.9897, "step": 1380 }, { "epoch": 0.3181477855209353, "grad_norm": 0.41725945472717285, "learning_rate": 4.3092089472459627e-05, "loss": 1.9871, "step": 1381 }, { "epoch": 0.31837816045614237, "grad_norm": 0.37174081802368164, "learning_rate": 4.3078206808256965e-05, "loss": 1.9387, "step": 1382 }, { "epoch": 0.3186085353913494, "grad_norm": 0.3995252251625061, "learning_rate": 4.306431244931367e-05, "loss": 1.9233, "step": 1383 }, { "epoch": 0.3188389103265565, "grad_norm": 0.3897140324115753, "learning_rate": 4.3050406404617976e-05, "loss": 1.9754, "step": 1384 }, { "epoch": 0.31906928526176354, "grad_norm": 0.4150521457195282, "learning_rate": 4.303648868316565e-05, "loss": 1.9205, "step": 1385 }, { "epoch": 0.3192996601969706, "grad_norm": 0.4289949834346771, "learning_rate": 4.302255929396003e-05, "loss": 1.9471, "step": 1386 }, { "epoch": 0.3195300351321776, "grad_norm": 0.46279653906822205, "learning_rate": 4.3008618246011994e-05, "loss": 1.9793, "step": 1387 }, { "epoch": 0.31976041006738465, "grad_norm": 0.40949854254722595, "learning_rate": 4.299466554833997e-05, "loss": 1.9407, "step": 1388 }, { "epoch": 0.3199907850025917, "grad_norm": 0.391126811504364, "learning_rate": 4.298070120996993e-05, "loss": 1.9409, "step": 1389 }, { "epoch": 0.32022115993779876, "grad_norm": 0.4208923876285553, "learning_rate": 4.296672523993535e-05, "loss": 1.9207, "step": 1390 }, { "epoch": 0.3204515348730058, "grad_norm": 0.41361409425735474, "learning_rate": 4.295273764727724e-05, "loss": 1.935, "step": 1391 }, { "epoch": 0.3206819098082129, "grad_norm": 0.41230857372283936, "learning_rate": 4.293873844104416e-05, "loss": 1.9537, "step": 1392 }, { "epoch": 0.32091228474341993, "grad_norm": 0.3788338005542755, "learning_rate": 4.292472763029213e-05, "loss": 1.9461, "step": 1393 }, { "epoch": 0.321142659678627, "grad_norm": 0.3740299642086029, "learning_rate": 4.291070522408471e-05, "loss": 1.9625, "step": 1394 }, { "epoch": 0.32137303461383404, "grad_norm": 0.3848860561847687, "learning_rate": 4.2896671231492966e-05, "loss": 1.9739, "step": 1395 }, { "epoch": 0.32160340954904104, "grad_norm": 0.37779903411865234, "learning_rate": 4.288262566159543e-05, "loss": 1.984, "step": 1396 }, { "epoch": 0.3218337844842481, "grad_norm": 0.3924288749694824, "learning_rate": 4.286856852347816e-05, "loss": 1.9606, "step": 1397 }, { "epoch": 0.32206415941945515, "grad_norm": 0.37626758217811584, "learning_rate": 4.2854499826234675e-05, "loss": 1.9396, "step": 1398 }, { "epoch": 0.3222945343546622, "grad_norm": 0.3774159848690033, "learning_rate": 4.284041957896597e-05, "loss": 1.9705, "step": 1399 }, { "epoch": 0.32252490928986927, "grad_norm": 0.37494441866874695, "learning_rate": 4.282632779078051e-05, "loss": 1.9799, "step": 1400 }, { "epoch": 0.3227552842250763, "grad_norm": 0.36470282077789307, "learning_rate": 4.281222447079425e-05, "loss": 1.9505, "step": 1401 }, { "epoch": 0.3229856591602834, "grad_norm": 0.38674598932266235, "learning_rate": 4.279810962813057e-05, "loss": 1.9031, "step": 1402 }, { "epoch": 0.32321603409549043, "grad_norm": 0.35078057646751404, "learning_rate": 4.278398327192033e-05, "loss": 1.9768, "step": 1403 }, { "epoch": 0.32344640903069743, "grad_norm": 0.3840884268283844, "learning_rate": 4.276984541130183e-05, "loss": 1.94, "step": 1404 }, { "epoch": 0.3236767839659045, "grad_norm": 0.4160512685775757, "learning_rate": 4.2755696055420814e-05, "loss": 1.9873, "step": 1405 }, { "epoch": 0.32390715890111155, "grad_norm": 0.3752957582473755, "learning_rate": 4.274153521343046e-05, "loss": 2.0136, "step": 1406 }, { "epoch": 0.3241375338363186, "grad_norm": 0.38170021772384644, "learning_rate": 4.272736289449137e-05, "loss": 1.9362, "step": 1407 }, { "epoch": 0.32436790877152566, "grad_norm": 0.36675554513931274, "learning_rate": 4.2713179107771585e-05, "loss": 1.941, "step": 1408 }, { "epoch": 0.3245982837067327, "grad_norm": 0.3555692732334137, "learning_rate": 4.269898386244655e-05, "loss": 1.9341, "step": 1409 }, { "epoch": 0.32482865864193977, "grad_norm": 0.36068880558013916, "learning_rate": 4.2684777167699126e-05, "loss": 1.9355, "step": 1410 }, { "epoch": 0.3250590335771468, "grad_norm": 0.3696048855781555, "learning_rate": 4.2670559032719594e-05, "loss": 1.9448, "step": 1411 }, { "epoch": 0.3252894085123539, "grad_norm": 0.37011706829071045, "learning_rate": 4.265632946670561e-05, "loss": 1.9561, "step": 1412 }, { "epoch": 0.3255197834475609, "grad_norm": 0.40141618251800537, "learning_rate": 4.264208847886226e-05, "loss": 1.9411, "step": 1413 }, { "epoch": 0.32575015838276794, "grad_norm": 0.39319750666618347, "learning_rate": 4.262783607840199e-05, "loss": 1.9612, "step": 1414 }, { "epoch": 0.325980533317975, "grad_norm": 0.36553192138671875, "learning_rate": 4.261357227454463e-05, "loss": 1.8583, "step": 1415 }, { "epoch": 0.32621090825318205, "grad_norm": 0.35435524582862854, "learning_rate": 4.2599297076517395e-05, "loss": 1.9677, "step": 1416 }, { "epoch": 0.3264412831883891, "grad_norm": 0.3706662058830261, "learning_rate": 4.2585010493554884e-05, "loss": 1.9356, "step": 1417 }, { "epoch": 0.32667165812359616, "grad_norm": 0.4043411314487457, "learning_rate": 4.257071253489904e-05, "loss": 1.9286, "step": 1418 }, { "epoch": 0.3269020330588032, "grad_norm": 0.3486737310886383, "learning_rate": 4.255640320979916e-05, "loss": 1.9368, "step": 1419 }, { "epoch": 0.3271324079940103, "grad_norm": 0.35730016231536865, "learning_rate": 4.254208252751192e-05, "loss": 1.939, "step": 1420 }, { "epoch": 0.3273627829292173, "grad_norm": 0.4232202172279358, "learning_rate": 4.2527750497301323e-05, "loss": 1.9885, "step": 1421 }, { "epoch": 0.32759315786442433, "grad_norm": 0.4691898226737976, "learning_rate": 4.2513407128438714e-05, "loss": 1.9441, "step": 1422 }, { "epoch": 0.3278235327996314, "grad_norm": 0.4625113606452942, "learning_rate": 4.249905243020279e-05, "loss": 1.9671, "step": 1423 }, { "epoch": 0.32805390773483845, "grad_norm": 0.39329707622528076, "learning_rate": 4.2484686411879554e-05, "loss": 1.969, "step": 1424 }, { "epoch": 0.3282842826700455, "grad_norm": 0.4042659401893616, "learning_rate": 4.247030908276235e-05, "loss": 1.9439, "step": 1425 }, { "epoch": 0.32851465760525256, "grad_norm": 0.3877203166484833, "learning_rate": 4.245592045215182e-05, "loss": 1.9729, "step": 1426 }, { "epoch": 0.3287450325404596, "grad_norm": 0.3769465684890747, "learning_rate": 4.244152052935594e-05, "loss": 1.933, "step": 1427 }, { "epoch": 0.32897540747566667, "grad_norm": 0.3585154116153717, "learning_rate": 4.242710932368998e-05, "loss": 1.9239, "step": 1428 }, { "epoch": 0.32920578241087367, "grad_norm": 0.4347648322582245, "learning_rate": 4.24126868444765e-05, "loss": 1.9291, "step": 1429 }, { "epoch": 0.3294361573460807, "grad_norm": 0.4156748354434967, "learning_rate": 4.239825310104536e-05, "loss": 1.9682, "step": 1430 }, { "epoch": 0.3296665322812878, "grad_norm": 0.42165103554725647, "learning_rate": 4.238380810273371e-05, "loss": 1.9264, "step": 1431 }, { "epoch": 0.32989690721649484, "grad_norm": 0.3995850384235382, "learning_rate": 4.236935185888599e-05, "loss": 1.9642, "step": 1432 }, { "epoch": 0.3301272821517019, "grad_norm": 0.3906514346599579, "learning_rate": 4.235488437885388e-05, "loss": 1.9748, "step": 1433 }, { "epoch": 0.33035765708690895, "grad_norm": 0.41584986448287964, "learning_rate": 4.234040567199637e-05, "loss": 1.9616, "step": 1434 }, { "epoch": 0.330588032022116, "grad_norm": 0.4118795692920685, "learning_rate": 4.23259157476797e-05, "loss": 1.9347, "step": 1435 }, { "epoch": 0.33081840695732306, "grad_norm": 0.3886025846004486, "learning_rate": 4.2311414615277334e-05, "loss": 1.9822, "step": 1436 }, { "epoch": 0.3310487818925301, "grad_norm": 0.37518784403800964, "learning_rate": 4.229690228417004e-05, "loss": 1.9932, "step": 1437 }, { "epoch": 0.3312791568277371, "grad_norm": 0.36315637826919556, "learning_rate": 4.2282378763745776e-05, "loss": 1.9413, "step": 1438 }, { "epoch": 0.3315095317629442, "grad_norm": 0.3806985318660736, "learning_rate": 4.2267844063399795e-05, "loss": 1.9609, "step": 1439 }, { "epoch": 0.33173990669815123, "grad_norm": 0.4256606996059418, "learning_rate": 4.225329819253454e-05, "loss": 1.9546, "step": 1440 }, { "epoch": 0.3319702816333583, "grad_norm": 0.39796510338783264, "learning_rate": 4.22387411605597e-05, "loss": 1.8865, "step": 1441 }, { "epoch": 0.33220065656856534, "grad_norm": 0.3639500141143799, "learning_rate": 4.222417297689217e-05, "loss": 1.9707, "step": 1442 }, { "epoch": 0.3324310315037724, "grad_norm": 0.37752386927604675, "learning_rate": 4.2209593650956066e-05, "loss": 1.9363, "step": 1443 }, { "epoch": 0.33266140643897946, "grad_norm": 0.3830733597278595, "learning_rate": 4.2195003192182716e-05, "loss": 1.9302, "step": 1444 }, { "epoch": 0.3328917813741865, "grad_norm": 0.372428297996521, "learning_rate": 4.218040161001065e-05, "loss": 1.9245, "step": 1445 }, { "epoch": 0.3331221563093935, "grad_norm": 0.3669029176235199, "learning_rate": 4.216578891388558e-05, "loss": 1.9324, "step": 1446 }, { "epoch": 0.33335253124460057, "grad_norm": 0.3642967641353607, "learning_rate": 4.215116511326043e-05, "loss": 1.9444, "step": 1447 }, { "epoch": 0.3335829061798076, "grad_norm": 0.40914565324783325, "learning_rate": 4.213653021759528e-05, "loss": 1.9866, "step": 1448 }, { "epoch": 0.3338132811150147, "grad_norm": 0.422703355550766, "learning_rate": 4.212188423635741e-05, "loss": 1.9406, "step": 1449 }, { "epoch": 0.33404365605022174, "grad_norm": 0.39528700709342957, "learning_rate": 4.210722717902128e-05, "loss": 1.8815, "step": 1450 }, { "epoch": 0.3342740309854288, "grad_norm": 0.38393792510032654, "learning_rate": 4.209255905506847e-05, "loss": 1.9102, "step": 1451 }, { "epoch": 0.33450440592063585, "grad_norm": 0.4099547266960144, "learning_rate": 4.207787987398777e-05, "loss": 1.9308, "step": 1452 }, { "epoch": 0.3347347808558429, "grad_norm": 0.3750695586204529, "learning_rate": 4.2063189645275084e-05, "loss": 1.9501, "step": 1453 }, { "epoch": 0.3349651557910499, "grad_norm": 0.35555824637413025, "learning_rate": 4.2048488378433493e-05, "loss": 1.9634, "step": 1454 }, { "epoch": 0.33519553072625696, "grad_norm": 0.38299718499183655, "learning_rate": 4.203377608297319e-05, "loss": 1.9288, "step": 1455 }, { "epoch": 0.335425905661464, "grad_norm": 0.36809664964675903, "learning_rate": 4.201905276841153e-05, "loss": 1.9593, "step": 1456 }, { "epoch": 0.3356562805966711, "grad_norm": 0.3629557192325592, "learning_rate": 4.2004318444272985e-05, "loss": 1.9725, "step": 1457 }, { "epoch": 0.33588665553187813, "grad_norm": 0.3620203137397766, "learning_rate": 4.198957312008914e-05, "loss": 1.9426, "step": 1458 }, { "epoch": 0.3361170304670852, "grad_norm": 0.36575835943222046, "learning_rate": 4.197481680539871e-05, "loss": 1.9179, "step": 1459 }, { "epoch": 0.33634740540229224, "grad_norm": 0.3952338397502899, "learning_rate": 4.19600495097475e-05, "loss": 1.9442, "step": 1460 }, { "epoch": 0.3365777803374993, "grad_norm": 0.3748922646045685, "learning_rate": 4.194527124268844e-05, "loss": 1.9427, "step": 1461 }, { "epoch": 0.33680815527270636, "grad_norm": 0.3708333671092987, "learning_rate": 4.193048201378155e-05, "loss": 1.9762, "step": 1462 }, { "epoch": 0.33703853020791336, "grad_norm": 0.3731519877910614, "learning_rate": 4.191568183259394e-05, "loss": 1.9741, "step": 1463 }, { "epoch": 0.3372689051431204, "grad_norm": 0.35474276542663574, "learning_rate": 4.1900870708699804e-05, "loss": 1.9412, "step": 1464 }, { "epoch": 0.33749928007832747, "grad_norm": 0.3560710549354553, "learning_rate": 4.188604865168041e-05, "loss": 1.9283, "step": 1465 }, { "epoch": 0.3377296550135345, "grad_norm": 0.37630435824394226, "learning_rate": 4.187121567112412e-05, "loss": 1.9231, "step": 1466 }, { "epoch": 0.3379600299487416, "grad_norm": 0.3470880389213562, "learning_rate": 4.185637177662633e-05, "loss": 1.9477, "step": 1467 }, { "epoch": 0.33819040488394864, "grad_norm": 0.3840074837207794, "learning_rate": 4.184151697778953e-05, "loss": 1.9141, "step": 1468 }, { "epoch": 0.3384207798191557, "grad_norm": 0.3576875925064087, "learning_rate": 4.182665128422323e-05, "loss": 1.9395, "step": 1469 }, { "epoch": 0.33865115475436275, "grad_norm": 0.3345407247543335, "learning_rate": 4.181177470554401e-05, "loss": 1.9307, "step": 1470 }, { "epoch": 0.33888152968956975, "grad_norm": 0.4247853457927704, "learning_rate": 4.1796887251375496e-05, "loss": 1.8874, "step": 1471 }, { "epoch": 0.3391119046247768, "grad_norm": 0.379141241312027, "learning_rate": 4.178198893134833e-05, "loss": 1.9457, "step": 1472 }, { "epoch": 0.33934227955998386, "grad_norm": 0.3600916564464569, "learning_rate": 4.17670797551002e-05, "loss": 1.9315, "step": 1473 }, { "epoch": 0.3395726544951909, "grad_norm": 0.3637843132019043, "learning_rate": 4.17521597322758e-05, "loss": 1.9686, "step": 1474 }, { "epoch": 0.339803029430398, "grad_norm": 0.37155434489250183, "learning_rate": 4.1737228872526864e-05, "loss": 1.9212, "step": 1475 }, { "epoch": 0.34003340436560503, "grad_norm": 0.4029313623905182, "learning_rate": 4.172228718551211e-05, "loss": 1.9496, "step": 1476 }, { "epoch": 0.3402637793008121, "grad_norm": 0.41199514269828796, "learning_rate": 4.170733468089728e-05, "loss": 1.9517, "step": 1477 }, { "epoch": 0.34049415423601914, "grad_norm": 0.3815010190010071, "learning_rate": 4.16923713683551e-05, "loss": 1.9396, "step": 1478 }, { "epoch": 0.3407245291712262, "grad_norm": 0.37317416071891785, "learning_rate": 4.1677397257565305e-05, "loss": 1.9512, "step": 1479 }, { "epoch": 0.3409549041064332, "grad_norm": 0.37405267357826233, "learning_rate": 4.16624123582146e-05, "loss": 1.9269, "step": 1480 }, { "epoch": 0.34118527904164025, "grad_norm": 0.39356833696365356, "learning_rate": 4.164741667999666e-05, "loss": 1.9229, "step": 1481 }, { "epoch": 0.3414156539768473, "grad_norm": 0.4954525828361511, "learning_rate": 4.163241023261217e-05, "loss": 1.9657, "step": 1482 }, { "epoch": 0.34164602891205437, "grad_norm": 0.36887621879577637, "learning_rate": 4.161739302576875e-05, "loss": 1.9445, "step": 1483 }, { "epoch": 0.3418764038472614, "grad_norm": 0.39489662647247314, "learning_rate": 4.160236506918098e-05, "loss": 1.9651, "step": 1484 }, { "epoch": 0.3421067787824685, "grad_norm": 0.41998618841171265, "learning_rate": 4.158732637257041e-05, "loss": 1.8975, "step": 1485 }, { "epoch": 0.34233715371767554, "grad_norm": 0.4081173539161682, "learning_rate": 4.1572276945665525e-05, "loss": 1.9623, "step": 1486 }, { "epoch": 0.3425675286528826, "grad_norm": 0.4193333685398102, "learning_rate": 4.155721679820176e-05, "loss": 1.9407, "step": 1487 }, { "epoch": 0.3427979035880896, "grad_norm": 0.38516321778297424, "learning_rate": 4.154214593992149e-05, "loss": 1.9089, "step": 1488 }, { "epoch": 0.34302827852329665, "grad_norm": 0.3748013377189636, "learning_rate": 4.1527064380573996e-05, "loss": 1.9391, "step": 1489 }, { "epoch": 0.3432586534585037, "grad_norm": 0.39825043082237244, "learning_rate": 4.15119721299155e-05, "loss": 1.9173, "step": 1490 }, { "epoch": 0.34348902839371076, "grad_norm": 0.36681148409843445, "learning_rate": 4.1496869197709146e-05, "loss": 1.9324, "step": 1491 }, { "epoch": 0.3437194033289178, "grad_norm": 0.43164342641830444, "learning_rate": 4.1481755593724956e-05, "loss": 1.9227, "step": 1492 }, { "epoch": 0.3439497782641249, "grad_norm": 0.3709244430065155, "learning_rate": 4.14666313277399e-05, "loss": 1.9637, "step": 1493 }, { "epoch": 0.34418015319933193, "grad_norm": 0.34462210536003113, "learning_rate": 4.145149640953782e-05, "loss": 1.9426, "step": 1494 }, { "epoch": 0.344410528134539, "grad_norm": 0.3492080569267273, "learning_rate": 4.1436350848909435e-05, "loss": 1.9336, "step": 1495 }, { "epoch": 0.344640903069746, "grad_norm": 0.3844585716724396, "learning_rate": 4.142119465565238e-05, "loss": 1.9343, "step": 1496 }, { "epoch": 0.34487127800495304, "grad_norm": 0.3697526156902313, "learning_rate": 4.1406027839571146e-05, "loss": 1.9456, "step": 1497 }, { "epoch": 0.3451016529401601, "grad_norm": 0.3694004416465759, "learning_rate": 4.139085041047711e-05, "loss": 1.9886, "step": 1498 }, { "epoch": 0.34533202787536715, "grad_norm": 0.3863580822944641, "learning_rate": 4.137566237818851e-05, "loss": 1.9512, "step": 1499 }, { "epoch": 0.3455624028105742, "grad_norm": 0.37870219349861145, "learning_rate": 4.136046375253042e-05, "loss": 1.9451, "step": 1500 }, { "epoch": 0.34579277774578127, "grad_norm": 0.3615133762359619, "learning_rate": 4.134525454333481e-05, "loss": 1.9057, "step": 1501 }, { "epoch": 0.3460231526809883, "grad_norm": 0.3739986717700958, "learning_rate": 4.133003476044047e-05, "loss": 1.9588, "step": 1502 }, { "epoch": 0.3462535276161954, "grad_norm": 0.4588441550731659, "learning_rate": 4.131480441369303e-05, "loss": 1.9799, "step": 1503 }, { "epoch": 0.34648390255140243, "grad_norm": 0.38694971799850464, "learning_rate": 4.1299563512944964e-05, "loss": 1.9262, "step": 1504 }, { "epoch": 0.34671427748660943, "grad_norm": 0.38883253931999207, "learning_rate": 4.128431206805557e-05, "loss": 1.9379, "step": 1505 }, { "epoch": 0.3469446524218165, "grad_norm": 0.43418657779693604, "learning_rate": 4.126905008889093e-05, "loss": 1.9353, "step": 1506 }, { "epoch": 0.34717502735702355, "grad_norm": 0.589042603969574, "learning_rate": 4.1253777585324024e-05, "loss": 1.998, "step": 1507 }, { "epoch": 0.3474054022922306, "grad_norm": 0.4346330463886261, "learning_rate": 4.1238494567234565e-05, "loss": 1.9268, "step": 1508 }, { "epoch": 0.34763577722743766, "grad_norm": 0.4667739272117615, "learning_rate": 4.12232010445091e-05, "loss": 1.9709, "step": 1509 }, { "epoch": 0.3478661521626447, "grad_norm": 0.4551084637641907, "learning_rate": 4.120789702704095e-05, "loss": 1.9171, "step": 1510 }, { "epoch": 0.34809652709785177, "grad_norm": 0.4133882224559784, "learning_rate": 4.119258252473026e-05, "loss": 1.9291, "step": 1511 }, { "epoch": 0.3483269020330588, "grad_norm": 0.4728242754936218, "learning_rate": 4.1177257547483945e-05, "loss": 1.9584, "step": 1512 }, { "epoch": 0.34855727696826583, "grad_norm": 0.40352413058280945, "learning_rate": 4.1161922105215665e-05, "loss": 1.9011, "step": 1513 }, { "epoch": 0.3487876519034729, "grad_norm": 0.3841739892959595, "learning_rate": 4.114657620784589e-05, "loss": 1.9561, "step": 1514 }, { "epoch": 0.34901802683867994, "grad_norm": 0.42041391134262085, "learning_rate": 4.113121986530183e-05, "loss": 1.9396, "step": 1515 }, { "epoch": 0.349248401773887, "grad_norm": 0.3948105275630951, "learning_rate": 4.1115853087517465e-05, "loss": 1.9614, "step": 1516 }, { "epoch": 0.34947877670909405, "grad_norm": 0.42983347177505493, "learning_rate": 4.110047588443352e-05, "loss": 1.8982, "step": 1517 }, { "epoch": 0.3497091516443011, "grad_norm": 0.3702937662601471, "learning_rate": 4.1085088265997454e-05, "loss": 1.9158, "step": 1518 }, { "epoch": 0.34993952657950816, "grad_norm": 0.4027533233165741, "learning_rate": 4.1069690242163484e-05, "loss": 1.9576, "step": 1519 }, { "epoch": 0.3501699015147152, "grad_norm": 0.3931228816509247, "learning_rate": 4.105428182289255e-05, "loss": 1.9349, "step": 1520 }, { "epoch": 0.3504002764499222, "grad_norm": 0.3869069516658783, "learning_rate": 4.10388630181523e-05, "loss": 1.9224, "step": 1521 }, { "epoch": 0.3506306513851293, "grad_norm": 0.3997800648212433, "learning_rate": 4.102343383791713e-05, "loss": 1.9604, "step": 1522 }, { "epoch": 0.35086102632033633, "grad_norm": 0.36577335000038147, "learning_rate": 4.1007994292168126e-05, "loss": 1.9504, "step": 1523 }, { "epoch": 0.3510914012555434, "grad_norm": 0.37628257274627686, "learning_rate": 4.099254439089309e-05, "loss": 1.9246, "step": 1524 }, { "epoch": 0.35132177619075045, "grad_norm": 0.37803083658218384, "learning_rate": 4.097708414408651e-05, "loss": 1.9334, "step": 1525 }, { "epoch": 0.3515521511259575, "grad_norm": 0.35693293809890747, "learning_rate": 4.096161356174959e-05, "loss": 1.9456, "step": 1526 }, { "epoch": 0.35178252606116456, "grad_norm": 0.3918249011039734, "learning_rate": 4.094613265389019e-05, "loss": 1.9452, "step": 1527 }, { "epoch": 0.3520129009963716, "grad_norm": 0.3750636577606201, "learning_rate": 4.093064143052288e-05, "loss": 1.9855, "step": 1528 }, { "epoch": 0.35224327593157867, "grad_norm": 0.376988023519516, "learning_rate": 4.091513990166889e-05, "loss": 1.9647, "step": 1529 }, { "epoch": 0.35247365086678567, "grad_norm": 0.5802303552627563, "learning_rate": 4.08996280773561e-05, "loss": 1.9323, "step": 1530 }, { "epoch": 0.3527040258019927, "grad_norm": 0.3800950348377228, "learning_rate": 4.088410596761906e-05, "loss": 1.8699, "step": 1531 }, { "epoch": 0.3529344007371998, "grad_norm": 0.35629037022590637, "learning_rate": 4.0868573582499004e-05, "loss": 1.9295, "step": 1532 }, { "epoch": 0.35316477567240684, "grad_norm": 0.3810647428035736, "learning_rate": 4.0853030932043775e-05, "loss": 1.8973, "step": 1533 }, { "epoch": 0.3533951506076139, "grad_norm": 0.3725988268852234, "learning_rate": 4.0837478026307864e-05, "loss": 1.9272, "step": 1534 }, { "epoch": 0.35362552554282095, "grad_norm": 0.3774009346961975, "learning_rate": 4.0821914875352404e-05, "loss": 1.9333, "step": 1535 }, { "epoch": 0.353855900478028, "grad_norm": 0.38072624802589417, "learning_rate": 4.080634148924516e-05, "loss": 1.9219, "step": 1536 }, { "epoch": 0.35408627541323506, "grad_norm": 0.3501121997833252, "learning_rate": 4.07907578780605e-05, "loss": 1.95, "step": 1537 }, { "epoch": 0.35431665034844206, "grad_norm": 0.3714202642440796, "learning_rate": 4.077516405187944e-05, "loss": 1.9313, "step": 1538 }, { "epoch": 0.3545470252836491, "grad_norm": 0.37214115262031555, "learning_rate": 4.0759560020789555e-05, "loss": 1.9514, "step": 1539 }, { "epoch": 0.3547774002188562, "grad_norm": 0.370298832654953, "learning_rate": 4.0743945794885063e-05, "loss": 1.9098, "step": 1540 }, { "epoch": 0.35500777515406323, "grad_norm": 0.34349122643470764, "learning_rate": 4.072832138426676e-05, "loss": 1.9875, "step": 1541 }, { "epoch": 0.3552381500892703, "grad_norm": 0.3742373585700989, "learning_rate": 4.0712686799042035e-05, "loss": 1.9755, "step": 1542 }, { "epoch": 0.35546852502447734, "grad_norm": 0.3748011291027069, "learning_rate": 4.069704204932484e-05, "loss": 1.9568, "step": 1543 }, { "epoch": 0.3556988999596844, "grad_norm": 0.34564530849456787, "learning_rate": 4.068138714523575e-05, "loss": 1.9401, "step": 1544 }, { "epoch": 0.35592927489489146, "grad_norm": 0.359476238489151, "learning_rate": 4.0665722096901856e-05, "loss": 1.8966, "step": 1545 }, { "epoch": 0.3561596498300985, "grad_norm": 0.35321754217147827, "learning_rate": 4.065004691445684e-05, "loss": 1.9723, "step": 1546 }, { "epoch": 0.3563900247653055, "grad_norm": 0.34854206442832947, "learning_rate": 4.063436160804092e-05, "loss": 1.8924, "step": 1547 }, { "epoch": 0.35662039970051257, "grad_norm": 0.3743334412574768, "learning_rate": 4.06186661878009e-05, "loss": 1.9701, "step": 1548 }, { "epoch": 0.3568507746357196, "grad_norm": 0.3434080481529236, "learning_rate": 4.060296066389009e-05, "loss": 1.9572, "step": 1549 }, { "epoch": 0.3570811495709267, "grad_norm": 0.36528316140174866, "learning_rate": 4.058724504646834e-05, "loss": 1.9193, "step": 1550 }, { "epoch": 0.35731152450613374, "grad_norm": 0.35197341442108154, "learning_rate": 4.0571519345702045e-05, "loss": 1.9112, "step": 1551 }, { "epoch": 0.3575418994413408, "grad_norm": 0.3360370695590973, "learning_rate": 4.0555783571764135e-05, "loss": 1.9754, "step": 1552 }, { "epoch": 0.35777227437654785, "grad_norm": 0.3877345025539398, "learning_rate": 4.054003773483401e-05, "loss": 1.9304, "step": 1553 }, { "epoch": 0.3580026493117549, "grad_norm": 0.362036794424057, "learning_rate": 4.052428184509762e-05, "loss": 1.9739, "step": 1554 }, { "epoch": 0.3582330242469619, "grad_norm": 0.3832316994667053, "learning_rate": 4.050851591274741e-05, "loss": 1.916, "step": 1555 }, { "epoch": 0.35846339918216896, "grad_norm": 0.3269989788532257, "learning_rate": 4.04927399479823e-05, "loss": 1.945, "step": 1556 }, { "epoch": 0.358693774117376, "grad_norm": 0.3928363621234894, "learning_rate": 4.047695396100773e-05, "loss": 1.9307, "step": 1557 }, { "epoch": 0.3589241490525831, "grad_norm": 0.36872532963752747, "learning_rate": 4.0461157962035614e-05, "loss": 1.96, "step": 1558 }, { "epoch": 0.35915452398779013, "grad_norm": 0.3670792281627655, "learning_rate": 4.0445351961284326e-05, "loss": 1.943, "step": 1559 }, { "epoch": 0.3593848989229972, "grad_norm": 0.37492701411247253, "learning_rate": 4.042953596897873e-05, "loss": 1.9402, "step": 1560 }, { "epoch": 0.35961527385820424, "grad_norm": 0.3917204439640045, "learning_rate": 4.0413709995350145e-05, "loss": 1.9024, "step": 1561 }, { "epoch": 0.3598456487934113, "grad_norm": 0.41000211238861084, "learning_rate": 4.0397874050636345e-05, "loss": 1.8763, "step": 1562 }, { "epoch": 0.3600760237286183, "grad_norm": 0.3671495020389557, "learning_rate": 4.038202814508157e-05, "loss": 1.9075, "step": 1563 }, { "epoch": 0.36030639866382536, "grad_norm": 0.368155300617218, "learning_rate": 4.0366172288936474e-05, "loss": 1.9192, "step": 1564 }, { "epoch": 0.3605367735990324, "grad_norm": 0.4022560715675354, "learning_rate": 4.035030649245818e-05, "loss": 1.9751, "step": 1565 }, { "epoch": 0.36076714853423947, "grad_norm": 0.3688800036907196, "learning_rate": 4.0334430765910223e-05, "loss": 1.9335, "step": 1566 }, { "epoch": 0.3609975234694465, "grad_norm": 0.3616844117641449, "learning_rate": 4.0318545119562556e-05, "loss": 1.9162, "step": 1567 }, { "epoch": 0.3612278984046536, "grad_norm": 0.3811258375644684, "learning_rate": 4.030264956369157e-05, "loss": 1.9399, "step": 1568 }, { "epoch": 0.36145827333986064, "grad_norm": 0.3331355154514313, "learning_rate": 4.028674410858006e-05, "loss": 1.956, "step": 1569 }, { "epoch": 0.3616886482750677, "grad_norm": 0.36625009775161743, "learning_rate": 4.02708287645172e-05, "loss": 1.9378, "step": 1570 }, { "epoch": 0.36191902321027475, "grad_norm": 0.3907724916934967, "learning_rate": 4.0254903541798595e-05, "loss": 1.8813, "step": 1571 }, { "epoch": 0.36214939814548175, "grad_norm": 0.36563193798065186, "learning_rate": 4.023896845072621e-05, "loss": 1.9154, "step": 1572 }, { "epoch": 0.3623797730806888, "grad_norm": 0.345651239156723, "learning_rate": 4.022302350160844e-05, "loss": 1.8994, "step": 1573 }, { "epoch": 0.36261014801589586, "grad_norm": 0.38833436369895935, "learning_rate": 4.020706870476e-05, "loss": 1.9321, "step": 1574 }, { "epoch": 0.3628405229511029, "grad_norm": 0.4348444640636444, "learning_rate": 4.0191104070502015e-05, "loss": 1.9467, "step": 1575 }, { "epoch": 0.36307089788631, "grad_norm": 0.3536776006221771, "learning_rate": 4.017512960916196e-05, "loss": 1.9344, "step": 1576 }, { "epoch": 0.36330127282151703, "grad_norm": 0.38053643703460693, "learning_rate": 4.015914533107367e-05, "loss": 1.9119, "step": 1577 }, { "epoch": 0.3635316477567241, "grad_norm": 0.3606123924255371, "learning_rate": 4.014315124657733e-05, "loss": 1.9185, "step": 1578 }, { "epoch": 0.36376202269193114, "grad_norm": 0.34685027599334717, "learning_rate": 4.0127147366019456e-05, "loss": 1.9647, "step": 1579 }, { "epoch": 0.36399239762713814, "grad_norm": 0.4053340554237366, "learning_rate": 4.011113369975293e-05, "loss": 1.9724, "step": 1580 }, { "epoch": 0.3642227725623452, "grad_norm": 0.3490343689918518, "learning_rate": 4.009511025813694e-05, "loss": 1.9134, "step": 1581 }, { "epoch": 0.36445314749755225, "grad_norm": 0.35589826107025146, "learning_rate": 4.007907705153699e-05, "loss": 1.9411, "step": 1582 }, { "epoch": 0.3646835224327593, "grad_norm": 0.34367847442626953, "learning_rate": 4.006303409032495e-05, "loss": 1.8974, "step": 1583 }, { "epoch": 0.36491389736796637, "grad_norm": 0.36804187297821045, "learning_rate": 4.0046981384878936e-05, "loss": 1.9529, "step": 1584 }, { "epoch": 0.3651442723031734, "grad_norm": 0.3537735342979431, "learning_rate": 4.00309189455834e-05, "loss": 1.9573, "step": 1585 }, { "epoch": 0.3653746472383805, "grad_norm": 0.35024622082710266, "learning_rate": 4.0014846782829104e-05, "loss": 1.864, "step": 1586 }, { "epoch": 0.36560502217358753, "grad_norm": 0.35079026222229004, "learning_rate": 3.9998764907013074e-05, "loss": 1.9077, "step": 1587 }, { "epoch": 0.36583539710879454, "grad_norm": 0.3617739975452423, "learning_rate": 3.998267332853864e-05, "loss": 1.9649, "step": 1588 }, { "epoch": 0.3660657720440016, "grad_norm": 0.36223849654197693, "learning_rate": 3.9966572057815373e-05, "loss": 1.9065, "step": 1589 }, { "epoch": 0.36629614697920865, "grad_norm": 0.36305031180381775, "learning_rate": 3.9950461105259175e-05, "loss": 1.9585, "step": 1590 }, { "epoch": 0.3665265219144157, "grad_norm": 0.344015896320343, "learning_rate": 3.993434048129215e-05, "loss": 1.8981, "step": 1591 }, { "epoch": 0.36675689684962276, "grad_norm": 0.36218586564064026, "learning_rate": 3.9918210196342685e-05, "loss": 1.9237, "step": 1592 }, { "epoch": 0.3669872717848298, "grad_norm": 0.3735540509223938, "learning_rate": 3.9902070260845426e-05, "loss": 1.9258, "step": 1593 }, { "epoch": 0.36721764672003687, "grad_norm": 0.35511258244514465, "learning_rate": 3.988592068524125e-05, "loss": 1.9075, "step": 1594 }, { "epoch": 0.36744802165524393, "grad_norm": 0.33883020281791687, "learning_rate": 3.9869761479977266e-05, "loss": 1.9599, "step": 1595 }, { "epoch": 0.367678396590451, "grad_norm": 0.3741529583930969, "learning_rate": 3.985359265550682e-05, "loss": 1.8877, "step": 1596 }, { "epoch": 0.367908771525658, "grad_norm": 0.3367209732532501, "learning_rate": 3.9837414222289476e-05, "loss": 1.9414, "step": 1597 }, { "epoch": 0.36813914646086504, "grad_norm": 0.32879960536956787, "learning_rate": 3.982122619079102e-05, "loss": 1.9077, "step": 1598 }, { "epoch": 0.3683695213960721, "grad_norm": 0.3689107596874237, "learning_rate": 3.980502857148345e-05, "loss": 1.9302, "step": 1599 }, { "epoch": 0.36859989633127915, "grad_norm": 0.3504229485988617, "learning_rate": 3.9788821374844956e-05, "loss": 1.937, "step": 1600 }, { "epoch": 0.3688302712664862, "grad_norm": 0.33713072538375854, "learning_rate": 3.977260461135991e-05, "loss": 1.932, "step": 1601 }, { "epoch": 0.36906064620169327, "grad_norm": 0.4081937372684479, "learning_rate": 3.975637829151891e-05, "loss": 1.9286, "step": 1602 }, { "epoch": 0.3692910211369003, "grad_norm": 0.34088289737701416, "learning_rate": 3.974014242581871e-05, "loss": 1.8767, "step": 1603 }, { "epoch": 0.3695213960721074, "grad_norm": 0.3356504440307617, "learning_rate": 3.9723897024762255e-05, "loss": 1.9126, "step": 1604 }, { "epoch": 0.3697517710073144, "grad_norm": 0.3483469486236572, "learning_rate": 3.970764209885863e-05, "loss": 1.937, "step": 1605 }, { "epoch": 0.36998214594252143, "grad_norm": 2.2713401317596436, "learning_rate": 3.969137765862312e-05, "loss": 1.9248, "step": 1606 }, { "epoch": 0.3702125208777285, "grad_norm": 0.39293310046195984, "learning_rate": 3.967510371457713e-05, "loss": 1.9552, "step": 1607 }, { "epoch": 0.37044289581293555, "grad_norm": 0.3952845335006714, "learning_rate": 3.965882027724823e-05, "loss": 1.9101, "step": 1608 }, { "epoch": 0.3706732707481426, "grad_norm": 0.39205652475357056, "learning_rate": 3.964252735717014e-05, "loss": 1.9256, "step": 1609 }, { "epoch": 0.37090364568334966, "grad_norm": 0.39870941638946533, "learning_rate": 3.962622496488269e-05, "loss": 1.9164, "step": 1610 }, { "epoch": 0.3711340206185567, "grad_norm": 0.374203085899353, "learning_rate": 3.960991311093187e-05, "loss": 1.9472, "step": 1611 }, { "epoch": 0.37136439555376377, "grad_norm": 0.4283386766910553, "learning_rate": 3.959359180586975e-05, "loss": 1.9129, "step": 1612 }, { "epoch": 0.3715947704889708, "grad_norm": 0.36452382802963257, "learning_rate": 3.957726106025455e-05, "loss": 1.9018, "step": 1613 }, { "epoch": 0.3718251454241778, "grad_norm": 0.3665986955165863, "learning_rate": 3.956092088465058e-05, "loss": 1.9662, "step": 1614 }, { "epoch": 0.3720555203593849, "grad_norm": 0.5787962079048157, "learning_rate": 3.954457128962825e-05, "loss": 1.9627, "step": 1615 }, { "epoch": 0.37228589529459194, "grad_norm": 0.36544492840766907, "learning_rate": 3.9528212285764066e-05, "loss": 1.9236, "step": 1616 }, { "epoch": 0.372516270229799, "grad_norm": 0.39418160915374756, "learning_rate": 3.951184388364063e-05, "loss": 1.9573, "step": 1617 }, { "epoch": 0.37274664516500605, "grad_norm": 0.34204772114753723, "learning_rate": 3.94954660938466e-05, "loss": 1.9312, "step": 1618 }, { "epoch": 0.3729770201002131, "grad_norm": 0.36322134733200073, "learning_rate": 3.947907892697674e-05, "loss": 1.9066, "step": 1619 }, { "epoch": 0.37320739503542016, "grad_norm": 0.3690900504589081, "learning_rate": 3.946268239363185e-05, "loss": 1.8902, "step": 1620 }, { "epoch": 0.3734377699706272, "grad_norm": 0.34493502974510193, "learning_rate": 3.94462765044188e-05, "loss": 1.941, "step": 1621 }, { "epoch": 0.3736681449058342, "grad_norm": 0.4338090717792511, "learning_rate": 3.942986126995052e-05, "loss": 1.905, "step": 1622 }, { "epoch": 0.3738985198410413, "grad_norm": 0.37507712841033936, "learning_rate": 3.941343670084597e-05, "loss": 1.9268, "step": 1623 }, { "epoch": 0.37412889477624833, "grad_norm": 0.3609171211719513, "learning_rate": 3.9397002807730166e-05, "loss": 1.9002, "step": 1624 }, { "epoch": 0.3743592697114554, "grad_norm": 0.36175623536109924, "learning_rate": 3.9380559601234145e-05, "loss": 1.9413, "step": 1625 }, { "epoch": 0.37458964464666245, "grad_norm": 0.3407040536403656, "learning_rate": 3.9364107091994965e-05, "loss": 1.935, "step": 1626 }, { "epoch": 0.3748200195818695, "grad_norm": 0.40740305185317993, "learning_rate": 3.93476452906557e-05, "loss": 1.9456, "step": 1627 }, { "epoch": 0.37505039451707656, "grad_norm": 0.3318836987018585, "learning_rate": 3.9331174207865466e-05, "loss": 1.9209, "step": 1628 }, { "epoch": 0.3752807694522836, "grad_norm": 0.338209867477417, "learning_rate": 3.931469385427935e-05, "loss": 1.9072, "step": 1629 }, { "epoch": 0.3755111443874906, "grad_norm": 0.33552974462509155, "learning_rate": 3.929820424055843e-05, "loss": 1.941, "step": 1630 }, { "epoch": 0.37574151932269767, "grad_norm": 0.3562411367893219, "learning_rate": 3.928170537736981e-05, "loss": 1.9351, "step": 1631 }, { "epoch": 0.3759718942579047, "grad_norm": 0.35798653960227966, "learning_rate": 3.926519727538655e-05, "loss": 1.9587, "step": 1632 }, { "epoch": 0.3762022691931118, "grad_norm": 0.3655996024608612, "learning_rate": 3.92486799452877e-05, "loss": 1.9277, "step": 1633 }, { "epoch": 0.37643264412831884, "grad_norm": 0.36109066009521484, "learning_rate": 3.923215339775826e-05, "loss": 1.9184, "step": 1634 }, { "epoch": 0.3766630190635259, "grad_norm": 0.376675009727478, "learning_rate": 3.921561764348921e-05, "loss": 1.9359, "step": 1635 }, { "epoch": 0.37689339399873295, "grad_norm": 0.3392348289489746, "learning_rate": 3.919907269317751e-05, "loss": 1.9422, "step": 1636 }, { "epoch": 0.37712376893394, "grad_norm": 0.32072538137435913, "learning_rate": 3.9182518557526006e-05, "loss": 1.9317, "step": 1637 }, { "epoch": 0.37735414386914706, "grad_norm": 0.3707393705844879, "learning_rate": 3.916595524724353e-05, "loss": 1.9108, "step": 1638 }, { "epoch": 0.37758451880435406, "grad_norm": 0.35337114334106445, "learning_rate": 3.914938277304485e-05, "loss": 1.8794, "step": 1639 }, { "epoch": 0.3778148937395611, "grad_norm": 0.3372167646884918, "learning_rate": 3.913280114565066e-05, "loss": 1.9056, "step": 1640 }, { "epoch": 0.3780452686747682, "grad_norm": 0.3371385633945465, "learning_rate": 3.911621037578754e-05, "loss": 1.9428, "step": 1641 }, { "epoch": 0.37827564360997523, "grad_norm": 0.3663035035133362, "learning_rate": 3.9099610474188026e-05, "loss": 1.9048, "step": 1642 }, { "epoch": 0.3785060185451823, "grad_norm": 0.3384578227996826, "learning_rate": 3.908300145159055e-05, "loss": 1.9883, "step": 1643 }, { "epoch": 0.37873639348038934, "grad_norm": 0.36134329438209534, "learning_rate": 3.906638331873945e-05, "loss": 1.9309, "step": 1644 }, { "epoch": 0.3789667684155964, "grad_norm": 0.3585767447948456, "learning_rate": 3.9049756086384926e-05, "loss": 1.9004, "step": 1645 }, { "epoch": 0.37919714335080346, "grad_norm": 0.33487164974212646, "learning_rate": 3.90331197652831e-05, "loss": 1.9145, "step": 1646 }, { "epoch": 0.37942751828601046, "grad_norm": 0.3215219974517822, "learning_rate": 3.9016474366195976e-05, "loss": 1.9412, "step": 1647 }, { "epoch": 0.3796578932212175, "grad_norm": 0.3521794378757477, "learning_rate": 3.8999819899891385e-05, "loss": 1.8827, "step": 1648 }, { "epoch": 0.37988826815642457, "grad_norm": 0.3507380783557892, "learning_rate": 3.898315637714308e-05, "loss": 1.8878, "step": 1649 }, { "epoch": 0.3801186430916316, "grad_norm": 0.3519982397556305, "learning_rate": 3.896648380873063e-05, "loss": 1.9218, "step": 1650 }, { "epoch": 0.3803490180268387, "grad_norm": 0.3586079180240631, "learning_rate": 3.894980220543948e-05, "loss": 1.9148, "step": 1651 }, { "epoch": 0.38057939296204574, "grad_norm": 0.3516524136066437, "learning_rate": 3.893311157806091e-05, "loss": 1.9472, "step": 1652 }, { "epoch": 0.3808097678972528, "grad_norm": 0.3286772072315216, "learning_rate": 3.8916411937392046e-05, "loss": 1.9373, "step": 1653 }, { "epoch": 0.38104014283245985, "grad_norm": 0.3573770523071289, "learning_rate": 3.8899703294235825e-05, "loss": 1.9507, "step": 1654 }, { "epoch": 0.3812705177676669, "grad_norm": 0.35250598192214966, "learning_rate": 3.888298565940101e-05, "loss": 1.9322, "step": 1655 }, { "epoch": 0.3815008927028739, "grad_norm": 0.349297434091568, "learning_rate": 3.886625904370224e-05, "loss": 1.9144, "step": 1656 }, { "epoch": 0.38173126763808096, "grad_norm": 0.35248780250549316, "learning_rate": 3.8849523457959867e-05, "loss": 1.9203, "step": 1657 }, { "epoch": 0.381961642573288, "grad_norm": 0.370328426361084, "learning_rate": 3.883277891300011e-05, "loss": 1.908, "step": 1658 }, { "epoch": 0.3821920175084951, "grad_norm": 0.35625332593917847, "learning_rate": 3.881602541965497e-05, "loss": 1.935, "step": 1659 }, { "epoch": 0.38242239244370213, "grad_norm": 0.3555578291416168, "learning_rate": 3.8799262988762236e-05, "loss": 1.8716, "step": 1660 }, { "epoch": 0.3826527673789092, "grad_norm": 0.3558085858821869, "learning_rate": 3.8782491631165475e-05, "loss": 1.9226, "step": 1661 }, { "epoch": 0.38288314231411624, "grad_norm": 0.34992340207099915, "learning_rate": 3.876571135771402e-05, "loss": 1.9565, "step": 1662 }, { "epoch": 0.3831135172493233, "grad_norm": 0.35160091519355774, "learning_rate": 3.8748922179262995e-05, "loss": 1.9325, "step": 1663 }, { "epoch": 0.3833438921845303, "grad_norm": 0.369723379611969, "learning_rate": 3.8732124106673277e-05, "loss": 1.9233, "step": 1664 }, { "epoch": 0.38357426711973736, "grad_norm": 0.3171616196632385, "learning_rate": 3.871531715081147e-05, "loss": 1.9016, "step": 1665 }, { "epoch": 0.3838046420549444, "grad_norm": 0.3430429697036743, "learning_rate": 3.869850132254996e-05, "loss": 1.9491, "step": 1666 }, { "epoch": 0.38403501699015147, "grad_norm": 0.3790530562400818, "learning_rate": 3.868167663276686e-05, "loss": 1.9447, "step": 1667 }, { "epoch": 0.3842653919253585, "grad_norm": 0.36168840527534485, "learning_rate": 3.866484309234601e-05, "loss": 1.9059, "step": 1668 }, { "epoch": 0.3844957668605656, "grad_norm": 0.3549191951751709, "learning_rate": 3.864800071217699e-05, "loss": 1.8917, "step": 1669 }, { "epoch": 0.38472614179577264, "grad_norm": 0.3389270305633545, "learning_rate": 3.863114950315507e-05, "loss": 1.916, "step": 1670 }, { "epoch": 0.3849565167309797, "grad_norm": 0.3506709337234497, "learning_rate": 3.861428947618125e-05, "loss": 1.8774, "step": 1671 }, { "epoch": 0.3851868916661867, "grad_norm": 0.36808672547340393, "learning_rate": 3.8597420642162266e-05, "loss": 1.8936, "step": 1672 }, { "epoch": 0.38541726660139375, "grad_norm": 0.32629942893981934, "learning_rate": 3.858054301201047e-05, "loss": 1.9222, "step": 1673 }, { "epoch": 0.3856476415366008, "grad_norm": 0.3632679283618927, "learning_rate": 3.856365659664399e-05, "loss": 1.8962, "step": 1674 }, { "epoch": 0.38587801647180786, "grad_norm": 0.37167853116989136, "learning_rate": 3.854676140698657e-05, "loss": 1.9164, "step": 1675 }, { "epoch": 0.3861083914070149, "grad_norm": 0.3535881042480469, "learning_rate": 3.85298574539677e-05, "loss": 1.9174, "step": 1676 }, { "epoch": 0.386338766342222, "grad_norm": 0.347840279340744, "learning_rate": 3.851294474852247e-05, "loss": 1.9248, "step": 1677 }, { "epoch": 0.38656914127742903, "grad_norm": 0.33015891909599304, "learning_rate": 3.849602330159166e-05, "loss": 1.9183, "step": 1678 }, { "epoch": 0.3867995162126361, "grad_norm": 0.35760658979415894, "learning_rate": 3.8479093124121724e-05, "loss": 1.8769, "step": 1679 }, { "epoch": 0.38702989114784314, "grad_norm": 0.373217910528183, "learning_rate": 3.8462154227064725e-05, "loss": 1.9621, "step": 1680 }, { "epoch": 0.38726026608305014, "grad_norm": 0.3560134172439575, "learning_rate": 3.84452066213784e-05, "loss": 1.8822, "step": 1681 }, { "epoch": 0.3874906410182572, "grad_norm": 0.45551949739456177, "learning_rate": 3.842825031802612e-05, "loss": 1.9287, "step": 1682 }, { "epoch": 0.38772101595346425, "grad_norm": 0.35330089926719666, "learning_rate": 3.841128532797683e-05, "loss": 1.9213, "step": 1683 }, { "epoch": 0.3879513908886713, "grad_norm": 0.34239888191223145, "learning_rate": 3.839431166220517e-05, "loss": 1.9538, "step": 1684 }, { "epoch": 0.38818176582387837, "grad_norm": 0.34091871976852417, "learning_rate": 3.837732933169135e-05, "loss": 1.9035, "step": 1685 }, { "epoch": 0.3884121407590854, "grad_norm": 0.3558197319507599, "learning_rate": 3.836033834742118e-05, "loss": 1.9644, "step": 1686 }, { "epoch": 0.3886425156942925, "grad_norm": 0.3456445634365082, "learning_rate": 3.834333872038608e-05, "loss": 1.9075, "step": 1687 }, { "epoch": 0.38887289062949953, "grad_norm": 0.36084702610969543, "learning_rate": 3.832633046158307e-05, "loss": 1.9316, "step": 1688 }, { "epoch": 0.38910326556470654, "grad_norm": 0.33976197242736816, "learning_rate": 3.830931358201474e-05, "loss": 1.9229, "step": 1689 }, { "epoch": 0.3893336404999136, "grad_norm": 0.35322508215904236, "learning_rate": 3.8292288092689263e-05, "loss": 1.9309, "step": 1690 }, { "epoch": 0.38956401543512065, "grad_norm": 0.3712320625782013, "learning_rate": 3.827525400462038e-05, "loss": 1.9539, "step": 1691 }, { "epoch": 0.3897943903703277, "grad_norm": 0.3901910185813904, "learning_rate": 3.825821132882738e-05, "loss": 1.9087, "step": 1692 }, { "epoch": 0.39002476530553476, "grad_norm": 0.3463098406791687, "learning_rate": 3.8241160076335144e-05, "loss": 1.9202, "step": 1693 }, { "epoch": 0.3902551402407418, "grad_norm": 0.360189288854599, "learning_rate": 3.822410025817406e-05, "loss": 1.9265, "step": 1694 }, { "epoch": 0.39048551517594887, "grad_norm": 0.36670225858688354, "learning_rate": 3.8207031885380094e-05, "loss": 1.9054, "step": 1695 }, { "epoch": 0.39071589011115593, "grad_norm": 0.3397066295146942, "learning_rate": 3.8189954968994714e-05, "loss": 1.9142, "step": 1696 }, { "epoch": 0.39094626504636293, "grad_norm": 0.35477685928344727, "learning_rate": 3.817286952006494e-05, "loss": 1.8898, "step": 1697 }, { "epoch": 0.39117663998157, "grad_norm": 0.334024578332901, "learning_rate": 3.815577554964328e-05, "loss": 1.9463, "step": 1698 }, { "epoch": 0.39140701491677704, "grad_norm": 0.3477817177772522, "learning_rate": 3.813867306878779e-05, "loss": 1.9466, "step": 1699 }, { "epoch": 0.3916373898519841, "grad_norm": 0.3450094759464264, "learning_rate": 3.812156208856201e-05, "loss": 1.901, "step": 1700 }, { "epoch": 0.39186776478719115, "grad_norm": 0.33830526471138, "learning_rate": 3.8104442620035e-05, "loss": 1.845, "step": 1701 }, { "epoch": 0.3920981397223982, "grad_norm": 0.33891260623931885, "learning_rate": 3.8087314674281284e-05, "loss": 1.9365, "step": 1702 }, { "epoch": 0.39232851465760527, "grad_norm": 0.338767945766449, "learning_rate": 3.8070178262380876e-05, "loss": 1.9024, "step": 1703 }, { "epoch": 0.3925588895928123, "grad_norm": 0.314849317073822, "learning_rate": 3.805303339541927e-05, "loss": 1.9012, "step": 1704 }, { "epoch": 0.3927892645280194, "grad_norm": 0.33434951305389404, "learning_rate": 3.803588008448745e-05, "loss": 1.9509, "step": 1705 }, { "epoch": 0.3930196394632264, "grad_norm": 0.31531068682670593, "learning_rate": 3.8018718340681834e-05, "loss": 1.9107, "step": 1706 }, { "epoch": 0.39325001439843343, "grad_norm": 0.33060523867607117, "learning_rate": 3.8001548175104306e-05, "loss": 1.9295, "step": 1707 }, { "epoch": 0.3934803893336405, "grad_norm": 0.3422759473323822, "learning_rate": 3.798436959886219e-05, "loss": 1.9118, "step": 1708 }, { "epoch": 0.39371076426884755, "grad_norm": 0.3475729823112488, "learning_rate": 3.796718262306827e-05, "loss": 1.9112, "step": 1709 }, { "epoch": 0.3939411392040546, "grad_norm": 0.3494788408279419, "learning_rate": 3.7949987258840745e-05, "loss": 1.9169, "step": 1710 }, { "epoch": 0.39417151413926166, "grad_norm": 0.3506164252758026, "learning_rate": 3.793278351730325e-05, "loss": 1.9286, "step": 1711 }, { "epoch": 0.3944018890744687, "grad_norm": 0.34723806381225586, "learning_rate": 3.7915571409584836e-05, "loss": 1.8879, "step": 1712 }, { "epoch": 0.39463226400967577, "grad_norm": 0.3473455011844635, "learning_rate": 3.789835094681997e-05, "loss": 1.8786, "step": 1713 }, { "epoch": 0.39486263894488277, "grad_norm": 0.3923957645893097, "learning_rate": 3.7881122140148505e-05, "loss": 1.9194, "step": 1714 }, { "epoch": 0.3950930138800898, "grad_norm": 0.3376977741718292, "learning_rate": 3.786388500071572e-05, "loss": 1.8793, "step": 1715 }, { "epoch": 0.3953233888152969, "grad_norm": 0.3347991108894348, "learning_rate": 3.784663953967228e-05, "loss": 1.9162, "step": 1716 }, { "epoch": 0.39555376375050394, "grad_norm": 0.34383389353752136, "learning_rate": 3.78293857681742e-05, "loss": 1.8971, "step": 1717 }, { "epoch": 0.395784138685711, "grad_norm": 0.32244226336479187, "learning_rate": 3.781212369738292e-05, "loss": 1.8976, "step": 1718 }, { "epoch": 0.39601451362091805, "grad_norm": 0.3223436772823334, "learning_rate": 3.7794853338465215e-05, "loss": 1.9276, "step": 1719 }, { "epoch": 0.3962448885561251, "grad_norm": 0.34223121404647827, "learning_rate": 3.7777574702593234e-05, "loss": 1.9268, "step": 1720 }, { "epoch": 0.39647526349133216, "grad_norm": 0.3519672453403473, "learning_rate": 3.776028780094446e-05, "loss": 1.858, "step": 1721 }, { "epoch": 0.3967056384265392, "grad_norm": 0.3373509645462036, "learning_rate": 3.7742992644701766e-05, "loss": 1.9315, "step": 1722 }, { "epoch": 0.3969360133617462, "grad_norm": 0.3214115500450134, "learning_rate": 3.7725689245053334e-05, "loss": 1.923, "step": 1723 }, { "epoch": 0.3971663882969533, "grad_norm": 0.40592867136001587, "learning_rate": 3.770837761319267e-05, "loss": 1.8679, "step": 1724 }, { "epoch": 0.39739676323216033, "grad_norm": 0.3447992503643036, "learning_rate": 3.7691057760318625e-05, "loss": 1.9408, "step": 1725 }, { "epoch": 0.3976271381673674, "grad_norm": 0.4922841191291809, "learning_rate": 3.7673729697635374e-05, "loss": 1.8719, "step": 1726 }, { "epoch": 0.39785751310257444, "grad_norm": 0.3543698191642761, "learning_rate": 3.765639343635238e-05, "loss": 1.9423, "step": 1727 }, { "epoch": 0.3980878880377815, "grad_norm": 0.33376234769821167, "learning_rate": 3.763904898768442e-05, "loss": 1.9474, "step": 1728 }, { "epoch": 0.39831826297298856, "grad_norm": 0.4011029899120331, "learning_rate": 3.762169636285158e-05, "loss": 1.8988, "step": 1729 }, { "epoch": 0.3985486379081956, "grad_norm": 0.3603111505508423, "learning_rate": 3.760433557307922e-05, "loss": 1.9615, "step": 1730 }, { "epoch": 0.3987790128434026, "grad_norm": 0.37484145164489746, "learning_rate": 3.7586966629597975e-05, "loss": 1.9525, "step": 1731 }, { "epoch": 0.39900938777860967, "grad_norm": 0.3409552574157715, "learning_rate": 3.756958954364378e-05, "loss": 1.936, "step": 1732 }, { "epoch": 0.3992397627138167, "grad_norm": 0.35565927624702454, "learning_rate": 3.7552204326457794e-05, "loss": 1.904, "step": 1733 }, { "epoch": 0.3994701376490238, "grad_norm": 0.4190678000450134, "learning_rate": 3.7534810989286506e-05, "loss": 1.9057, "step": 1734 }, { "epoch": 0.39970051258423084, "grad_norm": 0.38595113158226013, "learning_rate": 3.7517409543381596e-05, "loss": 1.8905, "step": 1735 }, { "epoch": 0.3999308875194379, "grad_norm": 0.35349395871162415, "learning_rate": 3.7500000000000003e-05, "loss": 1.9443, "step": 1736 }, { "epoch": 0.40016126245464495, "grad_norm": 0.4268406927585602, "learning_rate": 3.748258237040392e-05, "loss": 1.8805, "step": 1737 }, { "epoch": 0.400391637389852, "grad_norm": 0.35610559582710266, "learning_rate": 3.746515666586076e-05, "loss": 1.9184, "step": 1738 }, { "epoch": 0.400622012325059, "grad_norm": 0.3361878991127014, "learning_rate": 3.744772289764316e-05, "loss": 1.929, "step": 1739 }, { "epoch": 0.40085238726026606, "grad_norm": 0.36311545968055725, "learning_rate": 3.743028107702898e-05, "loss": 1.9389, "step": 1740 }, { "epoch": 0.4010827621954731, "grad_norm": 0.3935944437980652, "learning_rate": 3.741283121530128e-05, "loss": 1.9555, "step": 1741 }, { "epoch": 0.4013131371306802, "grad_norm": 0.3562100827693939, "learning_rate": 3.739537332374833e-05, "loss": 1.9172, "step": 1742 }, { "epoch": 0.40154351206588723, "grad_norm": 0.3655354976654053, "learning_rate": 3.737790741366358e-05, "loss": 1.9553, "step": 1743 }, { "epoch": 0.4017738870010943, "grad_norm": 0.49065765738487244, "learning_rate": 3.73604334963457e-05, "loss": 1.8961, "step": 1744 }, { "epoch": 0.40200426193630134, "grad_norm": 0.3873971402645111, "learning_rate": 3.734295158309849e-05, "loss": 1.9351, "step": 1745 }, { "epoch": 0.4022346368715084, "grad_norm": 0.35471728444099426, "learning_rate": 3.7325461685230966e-05, "loss": 1.9125, "step": 1746 }, { "epoch": 0.40246501180671546, "grad_norm": 0.3924931585788727, "learning_rate": 3.730796381405729e-05, "loss": 1.9339, "step": 1747 }, { "epoch": 0.40269538674192246, "grad_norm": 0.3919540047645569, "learning_rate": 3.7290457980896795e-05, "loss": 1.9592, "step": 1748 }, { "epoch": 0.4029257616771295, "grad_norm": 0.33894652128219604, "learning_rate": 3.727294419707393e-05, "loss": 1.9401, "step": 1749 }, { "epoch": 0.40315613661233657, "grad_norm": 0.38251495361328125, "learning_rate": 3.725542247391834e-05, "loss": 1.8633, "step": 1750 }, { "epoch": 0.4033865115475436, "grad_norm": 0.3283897936344147, "learning_rate": 3.723789282276476e-05, "loss": 1.9091, "step": 1751 }, { "epoch": 0.4036168864827507, "grad_norm": 0.3758709132671356, "learning_rate": 3.722035525495307e-05, "loss": 1.9068, "step": 1752 }, { "epoch": 0.40384726141795774, "grad_norm": 0.37312817573547363, "learning_rate": 3.7202809781828283e-05, "loss": 1.9103, "step": 1753 }, { "epoch": 0.4040776363531648, "grad_norm": 0.3470996618270874, "learning_rate": 3.718525641474052e-05, "loss": 1.8903, "step": 1754 }, { "epoch": 0.40430801128837185, "grad_norm": 0.34395453333854675, "learning_rate": 3.7167695165044986e-05, "loss": 1.9289, "step": 1755 }, { "epoch": 0.40453838622357885, "grad_norm": 0.3156757652759552, "learning_rate": 3.715012604410202e-05, "loss": 1.8927, "step": 1756 }, { "epoch": 0.4047687611587859, "grad_norm": 0.35384422540664673, "learning_rate": 3.713254906327703e-05, "loss": 1.8916, "step": 1757 }, { "epoch": 0.40499913609399296, "grad_norm": 0.3348173499107361, "learning_rate": 3.7114964233940506e-05, "loss": 1.8565, "step": 1758 }, { "epoch": 0.4052295110292, "grad_norm": 0.34171950817108154, "learning_rate": 3.7097371567468045e-05, "loss": 1.9164, "step": 1759 }, { "epoch": 0.4054598859644071, "grad_norm": 0.3141665458679199, "learning_rate": 3.707977107524028e-05, "loss": 1.9875, "step": 1760 }, { "epoch": 0.40569026089961413, "grad_norm": 0.32555219531059265, "learning_rate": 3.706216276864292e-05, "loss": 1.8866, "step": 1761 }, { "epoch": 0.4059206358348212, "grad_norm": 0.32577210664749146, "learning_rate": 3.704454665906673e-05, "loss": 1.8882, "step": 1762 }, { "epoch": 0.40615101077002824, "grad_norm": 0.30474087595939636, "learning_rate": 3.702692275790752e-05, "loss": 1.8966, "step": 1763 }, { "epoch": 0.40638138570523524, "grad_norm": 0.3348235487937927, "learning_rate": 3.700929107656614e-05, "loss": 1.8904, "step": 1764 }, { "epoch": 0.4066117606404423, "grad_norm": 0.33510616421699524, "learning_rate": 3.699165162644848e-05, "loss": 1.9349, "step": 1765 }, { "epoch": 0.40684213557564936, "grad_norm": 0.294624924659729, "learning_rate": 3.697400441896543e-05, "loss": 1.9066, "step": 1766 }, { "epoch": 0.4070725105108564, "grad_norm": 0.3191630244255066, "learning_rate": 3.695634946553296e-05, "loss": 1.9634, "step": 1767 }, { "epoch": 0.40730288544606347, "grad_norm": 0.3557821810245514, "learning_rate": 3.693868677757197e-05, "loss": 1.8855, "step": 1768 }, { "epoch": 0.4075332603812705, "grad_norm": 0.33995944261550903, "learning_rate": 3.6921016366508424e-05, "loss": 1.9363, "step": 1769 }, { "epoch": 0.4077636353164776, "grad_norm": 0.3526095151901245, "learning_rate": 3.690333824377325e-05, "loss": 1.8741, "step": 1770 }, { "epoch": 0.40799401025168464, "grad_norm": 0.33563828468322754, "learning_rate": 3.688565242080238e-05, "loss": 1.8659, "step": 1771 }, { "epoch": 0.4082243851868917, "grad_norm": 0.3715701997280121, "learning_rate": 3.6867958909036726e-05, "loss": 1.9239, "step": 1772 }, { "epoch": 0.4084547601220987, "grad_norm": 0.3576992452144623, "learning_rate": 3.6850257719922166e-05, "loss": 1.8865, "step": 1773 }, { "epoch": 0.40868513505730575, "grad_norm": 0.33448001742362976, "learning_rate": 3.6832548864909545e-05, "loss": 1.8836, "step": 1774 }, { "epoch": 0.4089155099925128, "grad_norm": 0.3518502414226532, "learning_rate": 3.681483235545468e-05, "loss": 1.8971, "step": 1775 }, { "epoch": 0.40914588492771986, "grad_norm": 0.3195299208164215, "learning_rate": 3.679710820301832e-05, "loss": 1.8917, "step": 1776 }, { "epoch": 0.4093762598629269, "grad_norm": 0.3630242347717285, "learning_rate": 3.677937641906618e-05, "loss": 1.9011, "step": 1777 }, { "epoch": 0.409606634798134, "grad_norm": 0.33866915106773376, "learning_rate": 3.67616370150689e-05, "loss": 1.8835, "step": 1778 }, { "epoch": 0.40983700973334103, "grad_norm": 0.36727771162986755, "learning_rate": 3.674389000250203e-05, "loss": 1.8861, "step": 1779 }, { "epoch": 0.4100673846685481, "grad_norm": 0.35044899582862854, "learning_rate": 3.672613539284609e-05, "loss": 1.8958, "step": 1780 }, { "epoch": 0.4102977596037551, "grad_norm": 0.3093191981315613, "learning_rate": 3.670837319758647e-05, "loss": 1.8791, "step": 1781 }, { "epoch": 0.41052813453896214, "grad_norm": 0.3338821530342102, "learning_rate": 3.6690603428213494e-05, "loss": 1.9114, "step": 1782 }, { "epoch": 0.4107585094741692, "grad_norm": 0.3147628605365753, "learning_rate": 3.667282609622236e-05, "loss": 1.8954, "step": 1783 }, { "epoch": 0.41098888440937625, "grad_norm": 0.3108806610107422, "learning_rate": 3.6655041213113184e-05, "loss": 1.8864, "step": 1784 }, { "epoch": 0.4112192593445833, "grad_norm": 0.34355536103248596, "learning_rate": 3.6637248790390967e-05, "loss": 1.9036, "step": 1785 }, { "epoch": 0.41144963427979037, "grad_norm": 0.30758917331695557, "learning_rate": 3.6619448839565554e-05, "loss": 1.921, "step": 1786 }, { "epoch": 0.4116800092149974, "grad_norm": 0.3178164064884186, "learning_rate": 3.660164137215169e-05, "loss": 1.8968, "step": 1787 }, { "epoch": 0.4119103841502045, "grad_norm": 0.3086645007133484, "learning_rate": 3.658382639966899e-05, "loss": 1.8528, "step": 1788 }, { "epoch": 0.41214075908541153, "grad_norm": 0.3302566707134247, "learning_rate": 3.6566003933641893e-05, "loss": 1.9303, "step": 1789 }, { "epoch": 0.41237113402061853, "grad_norm": 0.3561762273311615, "learning_rate": 3.654817398559971e-05, "loss": 1.8985, "step": 1790 }, { "epoch": 0.4126015089558256, "grad_norm": 0.35047781467437744, "learning_rate": 3.6530336567076574e-05, "loss": 1.88, "step": 1791 }, { "epoch": 0.41283188389103265, "grad_norm": 0.6997146010398865, "learning_rate": 3.651249168961146e-05, "loss": 1.9415, "step": 1792 }, { "epoch": 0.4130622588262397, "grad_norm": 0.3749224543571472, "learning_rate": 3.6494639364748184e-05, "loss": 1.8431, "step": 1793 }, { "epoch": 0.41329263376144676, "grad_norm": 0.3487214148044586, "learning_rate": 3.647677960403536e-05, "loss": 1.929, "step": 1794 }, { "epoch": 0.4135230086966538, "grad_norm": 0.3527573347091675, "learning_rate": 3.64589124190264e-05, "loss": 1.948, "step": 1795 }, { "epoch": 0.41375338363186087, "grad_norm": 0.49152007699012756, "learning_rate": 3.6441037821279554e-05, "loss": 1.9357, "step": 1796 }, { "epoch": 0.4139837585670679, "grad_norm": 0.3533341884613037, "learning_rate": 3.6423155822357845e-05, "loss": 1.9124, "step": 1797 }, { "epoch": 0.41421413350227493, "grad_norm": 0.37906500697135925, "learning_rate": 3.6405266433829075e-05, "loss": 1.9521, "step": 1798 }, { "epoch": 0.414444508437482, "grad_norm": 0.448466420173645, "learning_rate": 3.638736966726585e-05, "loss": 1.877, "step": 1799 }, { "epoch": 0.41467488337268904, "grad_norm": 0.3826608955860138, "learning_rate": 3.636946553424554e-05, "loss": 1.9194, "step": 1800 }, { "epoch": 0.4149052583078961, "grad_norm": 0.38648998737335205, "learning_rate": 3.6351554046350267e-05, "loss": 1.8868, "step": 1801 }, { "epoch": 0.41513563324310315, "grad_norm": 0.3210059404373169, "learning_rate": 3.633363521516693e-05, "loss": 1.8939, "step": 1802 }, { "epoch": 0.4153660081783102, "grad_norm": 0.3969893157482147, "learning_rate": 3.631570905228717e-05, "loss": 1.9247, "step": 1803 }, { "epoch": 0.41559638311351726, "grad_norm": 0.3520594835281372, "learning_rate": 3.629777556930736e-05, "loss": 1.9245, "step": 1804 }, { "epoch": 0.4158267580487243, "grad_norm": 0.3508718013763428, "learning_rate": 3.627983477782863e-05, "loss": 1.917, "step": 1805 }, { "epoch": 0.4160571329839313, "grad_norm": 0.3406522572040558, "learning_rate": 3.626188668945683e-05, "loss": 1.8694, "step": 1806 }, { "epoch": 0.4162875079191384, "grad_norm": 0.3827613890171051, "learning_rate": 3.624393131580252e-05, "loss": 1.9202, "step": 1807 }, { "epoch": 0.41651788285434543, "grad_norm": 0.34900593757629395, "learning_rate": 3.622596866848098e-05, "loss": 1.873, "step": 1808 }, { "epoch": 0.4167482577895525, "grad_norm": 0.3415798544883728, "learning_rate": 3.6207998759112185e-05, "loss": 1.901, "step": 1809 }, { "epoch": 0.41697863272475955, "grad_norm": 0.3316023349761963, "learning_rate": 3.6190021599320854e-05, "loss": 1.9404, "step": 1810 }, { "epoch": 0.4172090076599666, "grad_norm": 0.3614693880081177, "learning_rate": 3.6172037200736325e-05, "loss": 1.9401, "step": 1811 }, { "epoch": 0.41743938259517366, "grad_norm": 0.3571414053440094, "learning_rate": 3.615404557499266e-05, "loss": 1.9387, "step": 1812 }, { "epoch": 0.4176697575303807, "grad_norm": 0.4302150011062622, "learning_rate": 3.613604673372861e-05, "loss": 1.8983, "step": 1813 }, { "epoch": 0.41790013246558777, "grad_norm": 0.5588016510009766, "learning_rate": 3.611804068858756e-05, "loss": 1.968, "step": 1814 }, { "epoch": 0.41813050740079477, "grad_norm": 0.6616767048835754, "learning_rate": 3.6100027451217565e-05, "loss": 1.9029, "step": 1815 }, { "epoch": 0.4183608823360018, "grad_norm": 0.3734997808933258, "learning_rate": 3.608200703327135e-05, "loss": 1.8796, "step": 1816 }, { "epoch": 0.4185912572712089, "grad_norm": 0.47646579146385193, "learning_rate": 3.606397944640625e-05, "loss": 1.9506, "step": 1817 }, { "epoch": 0.41882163220641594, "grad_norm": 0.3549821376800537, "learning_rate": 3.604594470228429e-05, "loss": 1.8712, "step": 1818 }, { "epoch": 0.419052007141623, "grad_norm": 0.3732372522354126, "learning_rate": 3.6027902812572076e-05, "loss": 1.9185, "step": 1819 }, { "epoch": 0.41928238207683005, "grad_norm": 0.37816208600997925, "learning_rate": 3.600985378894086e-05, "loss": 1.8804, "step": 1820 }, { "epoch": 0.4195127570120371, "grad_norm": 0.36429494619369507, "learning_rate": 3.59917976430665e-05, "loss": 1.9161, "step": 1821 }, { "epoch": 0.41974313194724416, "grad_norm": 0.3471272587776184, "learning_rate": 3.597373438662947e-05, "loss": 1.9314, "step": 1822 }, { "epoch": 0.41997350688245116, "grad_norm": 0.34562811255455017, "learning_rate": 3.595566403131484e-05, "loss": 1.9339, "step": 1823 }, { "epoch": 0.4202038818176582, "grad_norm": 0.5671430826187134, "learning_rate": 3.5937586588812264e-05, "loss": 1.9521, "step": 1824 }, { "epoch": 0.4204342567528653, "grad_norm": 0.3227192461490631, "learning_rate": 3.5919502070816e-05, "loss": 1.9325, "step": 1825 }, { "epoch": 0.42066463168807233, "grad_norm": 0.3964260518550873, "learning_rate": 3.5901410489024866e-05, "loss": 1.8382, "step": 1826 }, { "epoch": 0.4208950066232794, "grad_norm": 0.3035931885242462, "learning_rate": 3.588331185514225e-05, "loss": 1.921, "step": 1827 }, { "epoch": 0.42112538155848644, "grad_norm": 0.389500230550766, "learning_rate": 3.5865206180876107e-05, "loss": 1.8443, "step": 1828 }, { "epoch": 0.4213557564936935, "grad_norm": 0.31843605637550354, "learning_rate": 3.5847093477938956e-05, "loss": 1.9195, "step": 1829 }, { "epoch": 0.42158613142890056, "grad_norm": 0.3211725354194641, "learning_rate": 3.5828973758047845e-05, "loss": 1.9613, "step": 1830 }, { "epoch": 0.42181650636410756, "grad_norm": 0.3130192756652832, "learning_rate": 3.581084703292437e-05, "loss": 1.8855, "step": 1831 }, { "epoch": 0.4220468812993146, "grad_norm": 0.3482496440410614, "learning_rate": 3.5792713314294654e-05, "loss": 1.9036, "step": 1832 }, { "epoch": 0.42227725623452167, "grad_norm": 0.34084779024124146, "learning_rate": 3.577457261388936e-05, "loss": 1.8849, "step": 1833 }, { "epoch": 0.4225076311697287, "grad_norm": 0.3785271644592285, "learning_rate": 3.575642494344365e-05, "loss": 1.9024, "step": 1834 }, { "epoch": 0.4227380061049358, "grad_norm": 0.8354285955429077, "learning_rate": 3.57382703146972e-05, "loss": 1.9921, "step": 1835 }, { "epoch": 0.42296838104014284, "grad_norm": 0.35760992765426636, "learning_rate": 3.572010873939418e-05, "loss": 1.9004, "step": 1836 }, { "epoch": 0.4231987559753499, "grad_norm": 0.36247122287750244, "learning_rate": 3.5701940229283275e-05, "loss": 1.8929, "step": 1837 }, { "epoch": 0.42342913091055695, "grad_norm": 0.4599815607070923, "learning_rate": 3.5683764796117634e-05, "loss": 1.9222, "step": 1838 }, { "epoch": 0.423659505845764, "grad_norm": 0.3539561927318573, "learning_rate": 3.56655824516549e-05, "loss": 1.9563, "step": 1839 }, { "epoch": 0.423889880780971, "grad_norm": 0.35233789682388306, "learning_rate": 3.564739320765716e-05, "loss": 1.9115, "step": 1840 }, { "epoch": 0.42412025571617806, "grad_norm": 0.3309990167617798, "learning_rate": 3.562919707589102e-05, "loss": 1.8869, "step": 1841 }, { "epoch": 0.4243506306513851, "grad_norm": 0.3469908833503723, "learning_rate": 3.561099406812748e-05, "loss": 1.9181, "step": 1842 }, { "epoch": 0.4245810055865922, "grad_norm": 0.35340532660484314, "learning_rate": 3.559278419614201e-05, "loss": 1.9165, "step": 1843 }, { "epoch": 0.42481138052179923, "grad_norm": 0.35693997144699097, "learning_rate": 3.5574567471714545e-05, "loss": 1.8476, "step": 1844 }, { "epoch": 0.4250417554570063, "grad_norm": 0.375667005777359, "learning_rate": 3.555634390662941e-05, "loss": 1.8776, "step": 1845 }, { "epoch": 0.42527213039221334, "grad_norm": 0.3520883023738861, "learning_rate": 3.553811351267539e-05, "loss": 1.8885, "step": 1846 }, { "epoch": 0.4255025053274204, "grad_norm": 0.3483614921569824, "learning_rate": 3.5519876301645684e-05, "loss": 1.9131, "step": 1847 }, { "epoch": 0.4257328802626274, "grad_norm": 0.32923805713653564, "learning_rate": 3.5501632285337875e-05, "loss": 1.8939, "step": 1848 }, { "epoch": 0.42596325519783446, "grad_norm": 0.3369605541229248, "learning_rate": 3.5483381475553964e-05, "loss": 1.8428, "step": 1849 }, { "epoch": 0.4261936301330415, "grad_norm": 0.336505651473999, "learning_rate": 3.546512388410038e-05, "loss": 1.9332, "step": 1850 }, { "epoch": 0.42642400506824857, "grad_norm": 0.3541886806488037, "learning_rate": 3.544685952278786e-05, "loss": 1.9125, "step": 1851 }, { "epoch": 0.4266543800034556, "grad_norm": 0.3934641182422638, "learning_rate": 3.5428588403431603e-05, "loss": 1.8961, "step": 1852 }, { "epoch": 0.4268847549386627, "grad_norm": 0.34315598011016846, "learning_rate": 3.5410310537851125e-05, "loss": 1.9057, "step": 1853 }, { "epoch": 0.42711512987386974, "grad_norm": 0.3639412820339203, "learning_rate": 3.539202593787033e-05, "loss": 1.8931, "step": 1854 }, { "epoch": 0.4273455048090768, "grad_norm": 0.34883126616477966, "learning_rate": 3.5373734615317485e-05, "loss": 1.9045, "step": 1855 }, { "epoch": 0.42757587974428385, "grad_norm": 0.3764306604862213, "learning_rate": 3.535543658202518e-05, "loss": 1.8911, "step": 1856 }, { "epoch": 0.42780625467949085, "grad_norm": 0.3472694158554077, "learning_rate": 3.533713184983037e-05, "loss": 1.9102, "step": 1857 }, { "epoch": 0.4280366296146979, "grad_norm": 0.34661728143692017, "learning_rate": 3.5318820430574324e-05, "loss": 1.9389, "step": 1858 }, { "epoch": 0.42826700454990496, "grad_norm": 0.34166961908340454, "learning_rate": 3.530050233610266e-05, "loss": 1.9316, "step": 1859 }, { "epoch": 0.428497379485112, "grad_norm": 0.7076864242553711, "learning_rate": 3.5282177578265296e-05, "loss": 1.9432, "step": 1860 }, { "epoch": 0.4287277544203191, "grad_norm": 0.3584575951099396, "learning_rate": 3.5263846168916454e-05, "loss": 1.9316, "step": 1861 }, { "epoch": 0.42895812935552613, "grad_norm": 0.36771586537361145, "learning_rate": 3.5245508119914687e-05, "loss": 1.8722, "step": 1862 }, { "epoch": 0.4291885042907332, "grad_norm": 0.33764737844467163, "learning_rate": 3.522716344312283e-05, "loss": 1.9543, "step": 1863 }, { "epoch": 0.42941887922594024, "grad_norm": 0.3303917944431305, "learning_rate": 3.520881215040798e-05, "loss": 1.8429, "step": 1864 }, { "epoch": 0.42964925416114724, "grad_norm": 0.36633944511413574, "learning_rate": 3.519045425364156e-05, "loss": 1.8515, "step": 1865 }, { "epoch": 0.4298796290963543, "grad_norm": 0.38361722230911255, "learning_rate": 3.517208976469922e-05, "loss": 1.8954, "step": 1866 }, { "epoch": 0.43011000403156135, "grad_norm": 0.3515476584434509, "learning_rate": 3.515371869546092e-05, "loss": 1.9054, "step": 1867 }, { "epoch": 0.4303403789667684, "grad_norm": 0.3104664981365204, "learning_rate": 3.5135341057810826e-05, "loss": 1.8843, "step": 1868 }, { "epoch": 0.43057075390197547, "grad_norm": 0.31419575214385986, "learning_rate": 3.5116956863637404e-05, "loss": 1.9489, "step": 1869 }, { "epoch": 0.4308011288371825, "grad_norm": 0.33232221007347107, "learning_rate": 3.509856612483332e-05, "loss": 1.8469, "step": 1870 }, { "epoch": 0.4310315037723896, "grad_norm": 0.3144303858280182, "learning_rate": 3.508016885329549e-05, "loss": 1.9236, "step": 1871 }, { "epoch": 0.43126187870759664, "grad_norm": 0.32932886481285095, "learning_rate": 3.5061765060925075e-05, "loss": 1.9118, "step": 1872 }, { "epoch": 0.43149225364280364, "grad_norm": 0.3393830358982086, "learning_rate": 3.504335475962742e-05, "loss": 1.9183, "step": 1873 }, { "epoch": 0.4317226285780107, "grad_norm": 0.3168615996837616, "learning_rate": 3.50249379613121e-05, "loss": 1.8928, "step": 1874 }, { "epoch": 0.43195300351321775, "grad_norm": 0.31635892391204834, "learning_rate": 3.5006514677892884e-05, "loss": 1.9285, "step": 1875 }, { "epoch": 0.4321833784484248, "grad_norm": 0.3267417550086975, "learning_rate": 3.498808492128776e-05, "loss": 1.8921, "step": 1876 }, { "epoch": 0.43241375338363186, "grad_norm": 0.4156677722930908, "learning_rate": 3.4969648703418866e-05, "loss": 1.8675, "step": 1877 }, { "epoch": 0.4326441283188389, "grad_norm": 0.32984650135040283, "learning_rate": 3.4951206036212544e-05, "loss": 1.8771, "step": 1878 }, { "epoch": 0.432874503254046, "grad_norm": 0.32255011796951294, "learning_rate": 3.4932756931599306e-05, "loss": 1.9108, "step": 1879 }, { "epoch": 0.43310487818925303, "grad_norm": 0.31511345505714417, "learning_rate": 3.491430140151383e-05, "loss": 1.8584, "step": 1880 }, { "epoch": 0.4333352531244601, "grad_norm": 0.3372921347618103, "learning_rate": 3.489583945789494e-05, "loss": 1.8806, "step": 1881 }, { "epoch": 0.4335656280596671, "grad_norm": 0.34172970056533813, "learning_rate": 3.487737111268561e-05, "loss": 1.8835, "step": 1882 }, { "epoch": 0.43379600299487414, "grad_norm": 0.4228559732437134, "learning_rate": 3.4858896377832966e-05, "loss": 1.8973, "step": 1883 }, { "epoch": 0.4340263779300812, "grad_norm": 0.32729268074035645, "learning_rate": 3.484041526528826e-05, "loss": 1.8808, "step": 1884 }, { "epoch": 0.43425675286528825, "grad_norm": 0.34893614053726196, "learning_rate": 3.482192778700688e-05, "loss": 1.8505, "step": 1885 }, { "epoch": 0.4344871278004953, "grad_norm": 0.3419465124607086, "learning_rate": 3.480343395494831e-05, "loss": 1.9206, "step": 1886 }, { "epoch": 0.43471750273570237, "grad_norm": 0.3270462155342102, "learning_rate": 3.4784933781076155e-05, "loss": 1.8851, "step": 1887 }, { "epoch": 0.4349478776709094, "grad_norm": 0.3103282153606415, "learning_rate": 3.476642727735815e-05, "loss": 1.9206, "step": 1888 }, { "epoch": 0.4351782526061165, "grad_norm": 0.3090256452560425, "learning_rate": 3.4747914455766065e-05, "loss": 1.9041, "step": 1889 }, { "epoch": 0.4354086275413235, "grad_norm": 0.34056711196899414, "learning_rate": 3.4729395328275824e-05, "loss": 1.8727, "step": 1890 }, { "epoch": 0.43563900247653053, "grad_norm": 0.30761727690696716, "learning_rate": 3.471086990686737e-05, "loss": 1.897, "step": 1891 }, { "epoch": 0.4358693774117376, "grad_norm": 0.33224913477897644, "learning_rate": 3.469233820352477e-05, "loss": 1.9076, "step": 1892 }, { "epoch": 0.43609975234694465, "grad_norm": 0.3398686647415161, "learning_rate": 3.467380023023611e-05, "loss": 1.9163, "step": 1893 }, { "epoch": 0.4363301272821517, "grad_norm": 0.33029648661613464, "learning_rate": 3.4655255998993555e-05, "loss": 1.8813, "step": 1894 }, { "epoch": 0.43656050221735876, "grad_norm": 0.3253343403339386, "learning_rate": 3.4636705521793336e-05, "loss": 1.9091, "step": 1895 }, { "epoch": 0.4367908771525658, "grad_norm": 0.29667147994041443, "learning_rate": 3.461814881063568e-05, "loss": 1.9187, "step": 1896 }, { "epoch": 0.43702125208777287, "grad_norm": 0.33738964796066284, "learning_rate": 3.4599585877524885e-05, "loss": 1.8884, "step": 1897 }, { "epoch": 0.4372516270229799, "grad_norm": 0.32036077976226807, "learning_rate": 3.458101673446925e-05, "loss": 1.8633, "step": 1898 }, { "epoch": 0.43748200195818693, "grad_norm": 0.7028136253356934, "learning_rate": 3.456244139348111e-05, "loss": 1.8695, "step": 1899 }, { "epoch": 0.437712376893394, "grad_norm": 0.3180197775363922, "learning_rate": 3.45438598665768e-05, "loss": 1.7932, "step": 1900 }, { "epoch": 0.43794275182860104, "grad_norm": 0.44959557056427, "learning_rate": 3.452527216577665e-05, "loss": 1.8788, "step": 1901 }, { "epoch": 0.4381731267638081, "grad_norm": 0.33280083537101746, "learning_rate": 3.4506678303104986e-05, "loss": 1.8738, "step": 1902 }, { "epoch": 0.43840350169901515, "grad_norm": 0.4023962616920471, "learning_rate": 3.448807829059014e-05, "loss": 1.866, "step": 1903 }, { "epoch": 0.4386338766342222, "grad_norm": 0.3226771950721741, "learning_rate": 3.44694721402644e-05, "loss": 1.9103, "step": 1904 }, { "epoch": 0.43886425156942926, "grad_norm": 0.3584219217300415, "learning_rate": 3.445085986416404e-05, "loss": 1.88, "step": 1905 }, { "epoch": 0.4390946265046363, "grad_norm": 0.30332258343696594, "learning_rate": 3.4432241474329266e-05, "loss": 1.9405, "step": 1906 }, { "epoch": 0.4393250014398433, "grad_norm": 0.32686078548431396, "learning_rate": 3.441361698280428e-05, "loss": 1.899, "step": 1907 }, { "epoch": 0.4395553763750504, "grad_norm": 0.3420411944389343, "learning_rate": 3.439498640163721e-05, "loss": 1.8937, "step": 1908 }, { "epoch": 0.43978575131025743, "grad_norm": 0.3320785462856293, "learning_rate": 3.437634974288013e-05, "loss": 1.8631, "step": 1909 }, { "epoch": 0.4400161262454645, "grad_norm": 0.32109856605529785, "learning_rate": 3.4357707018589036e-05, "loss": 1.871, "step": 1910 }, { "epoch": 0.44024650118067155, "grad_norm": 0.3242798447608948, "learning_rate": 3.4339058240823843e-05, "loss": 1.8854, "step": 1911 }, { "epoch": 0.4404768761158786, "grad_norm": 0.34858912229537964, "learning_rate": 3.432040342164841e-05, "loss": 1.863, "step": 1912 }, { "epoch": 0.44070725105108566, "grad_norm": 0.3213619887828827, "learning_rate": 3.430174257313048e-05, "loss": 1.9199, "step": 1913 }, { "epoch": 0.4409376259862927, "grad_norm": 0.33631157875061035, "learning_rate": 3.42830757073417e-05, "loss": 1.9152, "step": 1914 }, { "epoch": 0.4411680009214997, "grad_norm": 0.3073486387729645, "learning_rate": 3.426440283635762e-05, "loss": 1.9194, "step": 1915 }, { "epoch": 0.44139837585670677, "grad_norm": 0.34694913029670715, "learning_rate": 3.4245723972257654e-05, "loss": 1.897, "step": 1916 }, { "epoch": 0.4416287507919138, "grad_norm": 0.40663987398147583, "learning_rate": 3.422703912712511e-05, "loss": 1.8855, "step": 1917 }, { "epoch": 0.4418591257271209, "grad_norm": 0.3135856091976166, "learning_rate": 3.420834831304718e-05, "loss": 1.8932, "step": 1918 }, { "epoch": 0.44208950066232794, "grad_norm": 0.3198431134223938, "learning_rate": 3.4189651542114884e-05, "loss": 1.9065, "step": 1919 }, { "epoch": 0.442319875597535, "grad_norm": 0.33612358570098877, "learning_rate": 3.41709488264231e-05, "loss": 1.9219, "step": 1920 }, { "epoch": 0.44255025053274205, "grad_norm": 0.31804755330085754, "learning_rate": 3.415224017807057e-05, "loss": 1.8811, "step": 1921 }, { "epoch": 0.4427806254679491, "grad_norm": 0.34482526779174805, "learning_rate": 3.413352560915988e-05, "loss": 1.9008, "step": 1922 }, { "epoch": 0.44301100040315616, "grad_norm": 0.3382539749145508, "learning_rate": 3.4114805131797406e-05, "loss": 1.9233, "step": 1923 }, { "epoch": 0.44324137533836316, "grad_norm": 0.3132038712501526, "learning_rate": 3.409607875809339e-05, "loss": 1.8926, "step": 1924 }, { "epoch": 0.4434717502735702, "grad_norm": 0.32909148931503296, "learning_rate": 3.407734650016187e-05, "loss": 1.9247, "step": 1925 }, { "epoch": 0.4437021252087773, "grad_norm": 0.3467409312725067, "learning_rate": 3.405860837012068e-05, "loss": 1.9456, "step": 1926 }, { "epoch": 0.44393250014398433, "grad_norm": 0.3427455723285675, "learning_rate": 3.403986438009146e-05, "loss": 1.8679, "step": 1927 }, { "epoch": 0.4441628750791914, "grad_norm": 0.33227530121803284, "learning_rate": 3.4021114542199664e-05, "loss": 1.8892, "step": 1928 }, { "epoch": 0.44439325001439844, "grad_norm": 0.33937007188796997, "learning_rate": 3.400235886857448e-05, "loss": 1.8934, "step": 1929 }, { "epoch": 0.4446236249496055, "grad_norm": 0.38456541299819946, "learning_rate": 3.398359737134893e-05, "loss": 1.8953, "step": 1930 }, { "epoch": 0.44485399988481256, "grad_norm": 0.33009544014930725, "learning_rate": 3.3964830062659756e-05, "loss": 1.9143, "step": 1931 }, { "epoch": 0.44508437482001956, "grad_norm": 0.30539748072624207, "learning_rate": 3.3946056954647483e-05, "loss": 1.9129, "step": 1932 }, { "epoch": 0.4453147497552266, "grad_norm": 0.3888290822505951, "learning_rate": 3.3927278059456376e-05, "loss": 1.9377, "step": 1933 }, { "epoch": 0.44554512469043367, "grad_norm": 0.3384309411048889, "learning_rate": 3.390849338923446e-05, "loss": 1.8493, "step": 1934 }, { "epoch": 0.4457754996256407, "grad_norm": 0.3347501754760742, "learning_rate": 3.388970295613346e-05, "loss": 1.8226, "step": 1935 }, { "epoch": 0.4460058745608478, "grad_norm": 0.7981239557266235, "learning_rate": 3.387090677230888e-05, "loss": 1.9264, "step": 1936 }, { "epoch": 0.44623624949605484, "grad_norm": 0.3091081976890564, "learning_rate": 3.3852104849919905e-05, "loss": 1.9223, "step": 1937 }, { "epoch": 0.4464666244312619, "grad_norm": 0.3544897437095642, "learning_rate": 3.3833297201129456e-05, "loss": 1.831, "step": 1938 }, { "epoch": 0.44669699936646895, "grad_norm": 0.3182098865509033, "learning_rate": 3.3814483838104136e-05, "loss": 1.8898, "step": 1939 }, { "epoch": 0.44692737430167595, "grad_norm": 0.40588170289993286, "learning_rate": 3.3795664773014255e-05, "loss": 1.8902, "step": 1940 }, { "epoch": 0.447157749236883, "grad_norm": 0.35834670066833496, "learning_rate": 3.377684001803382e-05, "loss": 1.8973, "step": 1941 }, { "epoch": 0.44738812417209006, "grad_norm": 0.33762332797050476, "learning_rate": 3.375800958534051e-05, "loss": 1.9225, "step": 1942 }, { "epoch": 0.4476184991072971, "grad_norm": 0.3737618327140808, "learning_rate": 3.3739173487115675e-05, "loss": 1.8439, "step": 1943 }, { "epoch": 0.4478488740425042, "grad_norm": 0.3307756185531616, "learning_rate": 3.3720331735544344e-05, "loss": 1.8696, "step": 1944 }, { "epoch": 0.44807924897771123, "grad_norm": 0.3502967953681946, "learning_rate": 3.370148434281518e-05, "loss": 1.8563, "step": 1945 }, { "epoch": 0.4483096239129183, "grad_norm": 0.32781028747558594, "learning_rate": 3.3682631321120504e-05, "loss": 1.9412, "step": 1946 }, { "epoch": 0.44853999884812534, "grad_norm": 0.3885347247123718, "learning_rate": 3.366377268265631e-05, "loss": 1.894, "step": 1947 }, { "epoch": 0.4487703737833324, "grad_norm": 0.3702787458896637, "learning_rate": 3.364490843962216e-05, "loss": 1.8813, "step": 1948 }, { "epoch": 0.4490007487185394, "grad_norm": 0.37756234407424927, "learning_rate": 3.362603860422131e-05, "loss": 1.8988, "step": 1949 }, { "epoch": 0.44923112365374646, "grad_norm": 0.3386792838573456, "learning_rate": 3.360716318866058e-05, "loss": 1.9168, "step": 1950 }, { "epoch": 0.4494614985889535, "grad_norm": 0.37002140283584595, "learning_rate": 3.358828220515045e-05, "loss": 1.8943, "step": 1951 }, { "epoch": 0.44969187352416057, "grad_norm": 0.35103291273117065, "learning_rate": 3.356939566590494e-05, "loss": 1.843, "step": 1952 }, { "epoch": 0.4499222484593676, "grad_norm": 0.2959364056587219, "learning_rate": 3.355050358314172e-05, "loss": 1.9, "step": 1953 }, { "epoch": 0.4501526233945747, "grad_norm": 0.5808835625648499, "learning_rate": 3.353160596908202e-05, "loss": 1.923, "step": 1954 }, { "epoch": 0.45038299832978174, "grad_norm": 0.371018648147583, "learning_rate": 3.351270283595066e-05, "loss": 1.8394, "step": 1955 }, { "epoch": 0.4506133732649888, "grad_norm": 0.40660080313682556, "learning_rate": 3.349379419597602e-05, "loss": 1.8812, "step": 1956 }, { "epoch": 0.4508437482001958, "grad_norm": 0.36857450008392334, "learning_rate": 3.347488006139003e-05, "loss": 1.9552, "step": 1957 }, { "epoch": 0.45107412313540285, "grad_norm": 0.3250639736652374, "learning_rate": 3.3455960444428216e-05, "loss": 1.9092, "step": 1958 }, { "epoch": 0.4513044980706099, "grad_norm": 0.3327080011367798, "learning_rate": 3.343703535732961e-05, "loss": 1.895, "step": 1959 }, { "epoch": 0.45153487300581696, "grad_norm": 0.303562730550766, "learning_rate": 3.3418104812336786e-05, "loss": 1.9086, "step": 1960 }, { "epoch": 0.451765247941024, "grad_norm": 0.35320591926574707, "learning_rate": 3.3399168821695884e-05, "loss": 1.8689, "step": 1961 }, { "epoch": 0.4519956228762311, "grad_norm": 0.610374391078949, "learning_rate": 3.338022739765653e-05, "loss": 1.8895, "step": 1962 }, { "epoch": 0.45222599781143813, "grad_norm": 0.3237060010433197, "learning_rate": 3.336128055247186e-05, "loss": 1.885, "step": 1963 }, { "epoch": 0.4524563727466452, "grad_norm": 0.33701080083847046, "learning_rate": 3.3342328298398565e-05, "loss": 1.8555, "step": 1964 }, { "epoch": 0.45268674768185224, "grad_norm": 0.350799560546875, "learning_rate": 3.332337064769679e-05, "loss": 1.8513, "step": 1965 }, { "epoch": 0.45291712261705924, "grad_norm": 0.33354607224464417, "learning_rate": 3.33044076126302e-05, "loss": 1.9335, "step": 1966 }, { "epoch": 0.4531474975522663, "grad_norm": 0.3419067859649658, "learning_rate": 3.32854392054659e-05, "loss": 1.9055, "step": 1967 }, { "epoch": 0.45337787248747335, "grad_norm": 0.3493388891220093, "learning_rate": 3.3266465438474515e-05, "loss": 1.8485, "step": 1968 }, { "epoch": 0.4536082474226804, "grad_norm": 0.3299165666103363, "learning_rate": 3.3247486323930124e-05, "loss": 1.8995, "step": 1969 }, { "epoch": 0.45383862235788747, "grad_norm": 0.34239688515663147, "learning_rate": 3.322850187411025e-05, "loss": 1.8737, "step": 1970 }, { "epoch": 0.4540689972930945, "grad_norm": 0.31786563992500305, "learning_rate": 3.32095121012959e-05, "loss": 1.9373, "step": 1971 }, { "epoch": 0.4542993722283016, "grad_norm": 0.3350009024143219, "learning_rate": 3.319051701777149e-05, "loss": 1.9032, "step": 1972 }, { "epoch": 0.45452974716350864, "grad_norm": 0.4052259922027588, "learning_rate": 3.317151663582488e-05, "loss": 1.8459, "step": 1973 }, { "epoch": 0.45476012209871564, "grad_norm": 0.32080602645874023, "learning_rate": 3.315251096774737e-05, "loss": 1.8763, "step": 1974 }, { "epoch": 0.4549904970339227, "grad_norm": 0.33638209104537964, "learning_rate": 3.313350002583369e-05, "loss": 1.8297, "step": 1975 }, { "epoch": 0.45522087196912975, "grad_norm": 0.3176821768283844, "learning_rate": 3.3114483822381933e-05, "loss": 1.9407, "step": 1976 }, { "epoch": 0.4554512469043368, "grad_norm": 0.653043270111084, "learning_rate": 3.309546236969364e-05, "loss": 1.8737, "step": 1977 }, { "epoch": 0.45568162183954386, "grad_norm": 0.34810903668403625, "learning_rate": 3.3076435680073744e-05, "loss": 1.921, "step": 1978 }, { "epoch": 0.4559119967747509, "grad_norm": 0.3067249655723572, "learning_rate": 3.305740376583055e-05, "loss": 1.9188, "step": 1979 }, { "epoch": 0.45614237170995797, "grad_norm": 0.33782994747161865, "learning_rate": 3.303836663927574e-05, "loss": 1.9122, "step": 1980 }, { "epoch": 0.45637274664516503, "grad_norm": 0.3653903305530548, "learning_rate": 3.301932431272439e-05, "loss": 1.8482, "step": 1981 }, { "epoch": 0.45660312158037203, "grad_norm": 0.3116041123867035, "learning_rate": 3.300027679849492e-05, "loss": 1.9519, "step": 1982 }, { "epoch": 0.4568334965155791, "grad_norm": 0.33211299777030945, "learning_rate": 3.298122410890912e-05, "loss": 1.9031, "step": 1983 }, { "epoch": 0.45706387145078614, "grad_norm": 0.35082072019577026, "learning_rate": 3.2962166256292113e-05, "loss": 1.8735, "step": 1984 }, { "epoch": 0.4572942463859932, "grad_norm": 0.32899075746536255, "learning_rate": 3.2943103252972374e-05, "loss": 1.8479, "step": 1985 }, { "epoch": 0.45752462132120025, "grad_norm": 0.32282668352127075, "learning_rate": 3.29240351112817e-05, "loss": 1.9104, "step": 1986 }, { "epoch": 0.4577549962564073, "grad_norm": 0.32760533690452576, "learning_rate": 3.2904961843555224e-05, "loss": 1.871, "step": 1987 }, { "epoch": 0.45798537119161437, "grad_norm": 0.328036367893219, "learning_rate": 3.2885883462131394e-05, "loss": 1.8722, "step": 1988 }, { "epoch": 0.4582157461268214, "grad_norm": 0.3312572240829468, "learning_rate": 3.2866799979351934e-05, "loss": 1.8794, "step": 1989 }, { "epoch": 0.4584461210620285, "grad_norm": 0.33938702940940857, "learning_rate": 3.2847711407561935e-05, "loss": 1.8964, "step": 1990 }, { "epoch": 0.4586764959972355, "grad_norm": 0.5473262071609497, "learning_rate": 3.2828617759109714e-05, "loss": 1.9332, "step": 1991 }, { "epoch": 0.45890687093244253, "grad_norm": 0.2948043644428253, "learning_rate": 3.2809519046346895e-05, "loss": 1.8792, "step": 1992 }, { "epoch": 0.4591372458676496, "grad_norm": 0.3655003607273102, "learning_rate": 3.27904152816284e-05, "loss": 1.8906, "step": 1993 }, { "epoch": 0.45936762080285665, "grad_norm": 0.3211043179035187, "learning_rate": 3.277130647731238e-05, "loss": 1.9169, "step": 1994 }, { "epoch": 0.4595979957380637, "grad_norm": 0.5989583730697632, "learning_rate": 3.275219264576028e-05, "loss": 1.8957, "step": 1995 }, { "epoch": 0.45982837067327076, "grad_norm": 0.34726402163505554, "learning_rate": 3.273307379933677e-05, "loss": 1.911, "step": 1996 }, { "epoch": 0.4600587456084778, "grad_norm": 0.3949827253818512, "learning_rate": 3.2713949950409794e-05, "loss": 1.8444, "step": 1997 }, { "epoch": 0.46028912054368487, "grad_norm": 0.35668033361434937, "learning_rate": 3.269482111135051e-05, "loss": 1.8858, "step": 1998 }, { "epoch": 0.46051949547889187, "grad_norm": 0.3882494270801544, "learning_rate": 3.2675687294533306e-05, "loss": 1.9077, "step": 1999 }, { "epoch": 0.4607498704140989, "grad_norm": 0.36644718050956726, "learning_rate": 3.265654851233579e-05, "loss": 1.8666, "step": 2000 }, { "epoch": 0.460980245349306, "grad_norm": 0.38334405422210693, "learning_rate": 3.2637404777138795e-05, "loss": 1.8845, "step": 2001 }, { "epoch": 0.46121062028451304, "grad_norm": 0.4319254159927368, "learning_rate": 3.261825610132634e-05, "loss": 1.8717, "step": 2002 }, { "epoch": 0.4614409952197201, "grad_norm": 0.3764795660972595, "learning_rate": 3.259910249728567e-05, "loss": 1.8698, "step": 2003 }, { "epoch": 0.46167137015492715, "grad_norm": 0.34260714054107666, "learning_rate": 3.257994397740717e-05, "loss": 1.9143, "step": 2004 }, { "epoch": 0.4619017450901342, "grad_norm": 0.39034324884414673, "learning_rate": 3.2560780554084434e-05, "loss": 1.8354, "step": 2005 }, { "epoch": 0.46213212002534126, "grad_norm": 0.3039012551307678, "learning_rate": 3.254161223971425e-05, "loss": 1.908, "step": 2006 }, { "epoch": 0.46236249496054826, "grad_norm": 0.37466394901275635, "learning_rate": 3.2522439046696525e-05, "loss": 1.8646, "step": 2007 }, { "epoch": 0.4625928698957553, "grad_norm": 0.31759098172187805, "learning_rate": 3.2503260987434345e-05, "loss": 1.8625, "step": 2008 }, { "epoch": 0.4628232448309624, "grad_norm": 0.3401944637298584, "learning_rate": 3.2484078074333954e-05, "loss": 1.8543, "step": 2009 }, { "epoch": 0.46305361976616943, "grad_norm": 0.3446788191795349, "learning_rate": 3.246489031980471e-05, "loss": 1.908, "step": 2010 }, { "epoch": 0.4632839947013765, "grad_norm": 0.3461773693561554, "learning_rate": 3.244569773625912e-05, "loss": 1.926, "step": 2011 }, { "epoch": 0.46351436963658355, "grad_norm": 0.3300885260105133, "learning_rate": 3.242650033611283e-05, "loss": 1.8794, "step": 2012 }, { "epoch": 0.4637447445717906, "grad_norm": 0.38737356662750244, "learning_rate": 3.2407298131784556e-05, "loss": 1.8943, "step": 2013 }, { "epoch": 0.46397511950699766, "grad_norm": 0.349330872297287, "learning_rate": 3.238809113569617e-05, "loss": 1.8593, "step": 2014 }, { "epoch": 0.4642054944422047, "grad_norm": 0.3097901940345764, "learning_rate": 3.2368879360272606e-05, "loss": 1.8848, "step": 2015 }, { "epoch": 0.4644358693774117, "grad_norm": 0.33482876420021057, "learning_rate": 3.234966281794193e-05, "loss": 1.8752, "step": 2016 }, { "epoch": 0.46466624431261877, "grad_norm": 0.2988511919975281, "learning_rate": 3.233044152113524e-05, "loss": 1.859, "step": 2017 }, { "epoch": 0.4648966192478258, "grad_norm": 0.33722975850105286, "learning_rate": 3.231121548228676e-05, "loss": 1.847, "step": 2018 }, { "epoch": 0.4651269941830329, "grad_norm": 0.36222541332244873, "learning_rate": 3.229198471383375e-05, "loss": 1.8725, "step": 2019 }, { "epoch": 0.46535736911823994, "grad_norm": 0.30873048305511475, "learning_rate": 3.227274922821655e-05, "loss": 1.8866, "step": 2020 }, { "epoch": 0.465587744053447, "grad_norm": 0.3408706486225128, "learning_rate": 3.2253509037878524e-05, "loss": 1.8869, "step": 2021 }, { "epoch": 0.46581811898865405, "grad_norm": 0.29485610127449036, "learning_rate": 3.223426415526611e-05, "loss": 1.9509, "step": 2022 }, { "epoch": 0.4660484939238611, "grad_norm": 0.31372830271720886, "learning_rate": 3.2215014592828765e-05, "loss": 1.875, "step": 2023 }, { "epoch": 0.4662788688590681, "grad_norm": 0.3206598162651062, "learning_rate": 3.219576036301898e-05, "loss": 1.9077, "step": 2024 }, { "epoch": 0.46650924379427516, "grad_norm": 0.3051839768886566, "learning_rate": 3.217650147829225e-05, "loss": 1.9177, "step": 2025 }, { "epoch": 0.4667396187294822, "grad_norm": 0.30558058619499207, "learning_rate": 3.21572379511071e-05, "loss": 1.8696, "step": 2026 }, { "epoch": 0.4669699936646893, "grad_norm": 0.518369197845459, "learning_rate": 3.213796979392505e-05, "loss": 1.8871, "step": 2027 }, { "epoch": 0.46720036859989633, "grad_norm": 0.3500243127346039, "learning_rate": 3.2118697019210626e-05, "loss": 1.8837, "step": 2028 }, { "epoch": 0.4674307435351034, "grad_norm": 0.3151788115501404, "learning_rate": 3.2099419639431316e-05, "loss": 1.9031, "step": 2029 }, { "epoch": 0.46766111847031044, "grad_norm": 0.32070133090019226, "learning_rate": 3.2080137667057595e-05, "loss": 1.8591, "step": 2030 }, { "epoch": 0.4678914934055175, "grad_norm": 0.3211394250392914, "learning_rate": 3.206085111456295e-05, "loss": 1.8866, "step": 2031 }, { "epoch": 0.46812186834072456, "grad_norm": 0.311123788356781, "learning_rate": 3.204155999442377e-05, "loss": 1.8785, "step": 2032 }, { "epoch": 0.46835224327593156, "grad_norm": 0.3386189043521881, "learning_rate": 3.202226431911943e-05, "loss": 1.8796, "step": 2033 }, { "epoch": 0.4685826182111386, "grad_norm": 0.30959299206733704, "learning_rate": 3.200296410113225e-05, "loss": 1.879, "step": 2034 }, { "epoch": 0.46881299314634567, "grad_norm": 0.33946526050567627, "learning_rate": 3.198365935294748e-05, "loss": 1.9439, "step": 2035 }, { "epoch": 0.4690433680815527, "grad_norm": 0.326264351606369, "learning_rate": 3.196435008705332e-05, "loss": 1.87, "step": 2036 }, { "epoch": 0.4692737430167598, "grad_norm": 0.38723495602607727, "learning_rate": 3.194503631594088e-05, "loss": 1.8903, "step": 2037 }, { "epoch": 0.46950411795196684, "grad_norm": 0.2968451976776123, "learning_rate": 3.192571805210416e-05, "loss": 1.8951, "step": 2038 }, { "epoch": 0.4697344928871739, "grad_norm": 0.3314412832260132, "learning_rate": 3.190639530804011e-05, "loss": 1.8761, "step": 2039 }, { "epoch": 0.46996486782238095, "grad_norm": 0.30625784397125244, "learning_rate": 3.1887068096248565e-05, "loss": 1.9109, "step": 2040 }, { "epoch": 0.47019524275758795, "grad_norm": 0.31215766072273254, "learning_rate": 3.186773642923222e-05, "loss": 1.8758, "step": 2041 }, { "epoch": 0.470425617692795, "grad_norm": 0.33626314997673035, "learning_rate": 3.184840031949669e-05, "loss": 1.8688, "step": 2042 }, { "epoch": 0.47065599262800206, "grad_norm": 0.32674282789230347, "learning_rate": 3.1829059779550453e-05, "loss": 1.8514, "step": 2043 }, { "epoch": 0.4708863675632091, "grad_norm": 0.3363704979419708, "learning_rate": 3.1809714821904834e-05, "loss": 1.8783, "step": 2044 }, { "epoch": 0.4711167424984162, "grad_norm": 0.3303244113922119, "learning_rate": 3.179036545907405e-05, "loss": 1.8703, "step": 2045 }, { "epoch": 0.47134711743362323, "grad_norm": 0.3404719829559326, "learning_rate": 3.177101170357513e-05, "loss": 1.8922, "step": 2046 }, { "epoch": 0.4715774923688303, "grad_norm": 0.3304763436317444, "learning_rate": 3.1751653567927976e-05, "loss": 1.8832, "step": 2047 }, { "epoch": 0.47180786730403734, "grad_norm": 0.31446951627731323, "learning_rate": 3.173229106465531e-05, "loss": 1.8312, "step": 2048 }, { "epoch": 0.47203824223924434, "grad_norm": 0.2936405539512634, "learning_rate": 3.171292420628268e-05, "loss": 1.9099, "step": 2049 }, { "epoch": 0.4722686171744514, "grad_norm": 0.33247148990631104, "learning_rate": 3.169355300533846e-05, "loss": 1.9118, "step": 2050 }, { "epoch": 0.47249899210965846, "grad_norm": 0.2881184220314026, "learning_rate": 3.167417747435379e-05, "loss": 1.903, "step": 2051 }, { "epoch": 0.4727293670448655, "grad_norm": 0.3142097592353821, "learning_rate": 3.165479762586269e-05, "loss": 1.8632, "step": 2052 }, { "epoch": 0.47295974198007257, "grad_norm": 0.30988872051239014, "learning_rate": 3.1635413472401904e-05, "loss": 1.8835, "step": 2053 }, { "epoch": 0.4731901169152796, "grad_norm": 0.2985193431377411, "learning_rate": 3.161602502651099e-05, "loss": 1.8587, "step": 2054 }, { "epoch": 0.4734204918504867, "grad_norm": 0.32065480947494507, "learning_rate": 3.159663230073229e-05, "loss": 1.8992, "step": 2055 }, { "epoch": 0.47365086678569374, "grad_norm": 0.29845157265663147, "learning_rate": 3.157723530761088e-05, "loss": 1.9008, "step": 2056 }, { "epoch": 0.4738812417209008, "grad_norm": 0.303821861743927, "learning_rate": 3.155783405969464e-05, "loss": 1.9054, "step": 2057 }, { "epoch": 0.4741116166561078, "grad_norm": 0.3186984658241272, "learning_rate": 3.153842856953417e-05, "loss": 1.8453, "step": 2058 }, { "epoch": 0.47434199159131485, "grad_norm": 0.299918532371521, "learning_rate": 3.1519018849682836e-05, "loss": 1.9149, "step": 2059 }, { "epoch": 0.4745723665265219, "grad_norm": 0.3353325426578522, "learning_rate": 3.149960491269672e-05, "loss": 1.9113, "step": 2060 }, { "epoch": 0.47480274146172896, "grad_norm": 0.3339362144470215, "learning_rate": 3.148018677113466e-05, "loss": 1.8473, "step": 2061 }, { "epoch": 0.475033116396936, "grad_norm": 0.5428033471107483, "learning_rate": 3.146076443755819e-05, "loss": 1.9406, "step": 2062 }, { "epoch": 0.4752634913321431, "grad_norm": 0.3170464336872101, "learning_rate": 3.144133792453154e-05, "loss": 1.8561, "step": 2063 }, { "epoch": 0.47549386626735013, "grad_norm": 0.3091451823711395, "learning_rate": 3.1421907244621696e-05, "loss": 1.8843, "step": 2064 }, { "epoch": 0.4757242412025572, "grad_norm": 0.3191569447517395, "learning_rate": 3.14024724103983e-05, "loss": 1.8903, "step": 2065 }, { "epoch": 0.4759546161377642, "grad_norm": 0.31974974274635315, "learning_rate": 3.1383033434433676e-05, "loss": 1.851, "step": 2066 }, { "epoch": 0.47618499107297124, "grad_norm": 0.31645023822784424, "learning_rate": 3.136359032930287e-05, "loss": 1.92, "step": 2067 }, { "epoch": 0.4764153660081783, "grad_norm": 0.3247966468334198, "learning_rate": 3.1344143107583546e-05, "loss": 1.8538, "step": 2068 }, { "epoch": 0.47664574094338535, "grad_norm": 0.3283118009567261, "learning_rate": 3.132469178185607e-05, "loss": 1.8351, "step": 2069 }, { "epoch": 0.4768761158785924, "grad_norm": 0.3154730796813965, "learning_rate": 3.130523636470345e-05, "loss": 1.8761, "step": 2070 }, { "epoch": 0.47710649081379947, "grad_norm": 0.3232240378856659, "learning_rate": 3.128577686871133e-05, "loss": 1.8826, "step": 2071 }, { "epoch": 0.4773368657490065, "grad_norm": 0.2966149151325226, "learning_rate": 3.126631330646802e-05, "loss": 1.8897, "step": 2072 }, { "epoch": 0.4775672406842136, "grad_norm": 0.3043692409992218, "learning_rate": 3.124684569056442e-05, "loss": 1.9188, "step": 2073 }, { "epoch": 0.47779761561942063, "grad_norm": 0.4114401042461395, "learning_rate": 3.122737403359409e-05, "loss": 1.8942, "step": 2074 }, { "epoch": 0.47802799055462764, "grad_norm": 0.49076491594314575, "learning_rate": 3.1207898348153206e-05, "loss": 1.8989, "step": 2075 }, { "epoch": 0.4782583654898347, "grad_norm": 0.3024754524230957, "learning_rate": 3.118841864684049e-05, "loss": 1.8814, "step": 2076 }, { "epoch": 0.47848874042504175, "grad_norm": 0.35982057452201843, "learning_rate": 3.116893494225734e-05, "loss": 1.9027, "step": 2077 }, { "epoch": 0.4787191153602488, "grad_norm": 0.32756319642066956, "learning_rate": 3.114944724700771e-05, "loss": 1.8602, "step": 2078 }, { "epoch": 0.47894949029545586, "grad_norm": 0.31014108657836914, "learning_rate": 3.112995557369811e-05, "loss": 1.9065, "step": 2079 }, { "epoch": 0.4791798652306629, "grad_norm": 0.2880355715751648, "learning_rate": 3.111045993493767e-05, "loss": 1.8797, "step": 2080 }, { "epoch": 0.47941024016586997, "grad_norm": 0.3302212059497833, "learning_rate": 3.109096034333805e-05, "loss": 1.8886, "step": 2081 }, { "epoch": 0.47964061510107703, "grad_norm": 0.2964526414871216, "learning_rate": 3.107145681151349e-05, "loss": 1.8563, "step": 2082 }, { "epoch": 0.47987099003628403, "grad_norm": 0.3092183470726013, "learning_rate": 3.1051949352080765e-05, "loss": 1.8398, "step": 2083 }, { "epoch": 0.4801013649714911, "grad_norm": 0.38002800941467285, "learning_rate": 3.1032437977659196e-05, "loss": 1.8581, "step": 2084 }, { "epoch": 0.48033173990669814, "grad_norm": 0.36596715450286865, "learning_rate": 3.101292270087063e-05, "loss": 1.8248, "step": 2085 }, { "epoch": 0.4805621148419052, "grad_norm": 0.3856566548347473, "learning_rate": 3.099340353433946e-05, "loss": 1.8473, "step": 2086 }, { "epoch": 0.48079248977711225, "grad_norm": 0.29041215777397156, "learning_rate": 3.0973880490692567e-05, "loss": 1.9247, "step": 2087 }, { "epoch": 0.4810228647123193, "grad_norm": 0.362801194190979, "learning_rate": 3.0954353582559345e-05, "loss": 1.8845, "step": 2088 }, { "epoch": 0.48125323964752637, "grad_norm": 0.3191280663013458, "learning_rate": 3.093482282257171e-05, "loss": 1.8889, "step": 2089 }, { "epoch": 0.4814836145827334, "grad_norm": 0.2970114052295685, "learning_rate": 3.091528822336405e-05, "loss": 1.8911, "step": 2090 }, { "epoch": 0.4817139895179404, "grad_norm": 0.2995226979255676, "learning_rate": 3.089574979757324e-05, "loss": 1.8633, "step": 2091 }, { "epoch": 0.4819443644531475, "grad_norm": 0.31009718775749207, "learning_rate": 3.087620755783863e-05, "loss": 1.8721, "step": 2092 }, { "epoch": 0.48217473938835453, "grad_norm": 0.32472431659698486, "learning_rate": 3.0856661516802054e-05, "loss": 1.8958, "step": 2093 }, { "epoch": 0.4824051143235616, "grad_norm": 0.30643922090530396, "learning_rate": 3.083711168710778e-05, "loss": 1.8539, "step": 2094 }, { "epoch": 0.48263548925876865, "grad_norm": 0.3164253234863281, "learning_rate": 3.081755808140253e-05, "loss": 1.898, "step": 2095 }, { "epoch": 0.4828658641939757, "grad_norm": 0.3314495086669922, "learning_rate": 3.0798000712335476e-05, "loss": 1.8924, "step": 2096 }, { "epoch": 0.48309623912918276, "grad_norm": 0.3195517063140869, "learning_rate": 3.0778439592558246e-05, "loss": 1.8608, "step": 2097 }, { "epoch": 0.4833266140643898, "grad_norm": 0.30692628026008606, "learning_rate": 3.0758874734724845e-05, "loss": 1.9251, "step": 2098 }, { "epoch": 0.48355698899959687, "grad_norm": 0.3184828460216522, "learning_rate": 3.073930615149174e-05, "loss": 1.8543, "step": 2099 }, { "epoch": 0.48378736393480387, "grad_norm": 0.29281699657440186, "learning_rate": 3.07197338555178e-05, "loss": 1.857, "step": 2100 }, { "epoch": 0.4840177388700109, "grad_norm": 0.3221278786659241, "learning_rate": 3.0700157859464264e-05, "loss": 1.8292, "step": 2101 }, { "epoch": 0.484248113805218, "grad_norm": 0.3135239779949188, "learning_rate": 3.068057817599481e-05, "loss": 1.8829, "step": 2102 }, { "epoch": 0.48447848874042504, "grad_norm": 0.3343508243560791, "learning_rate": 3.066099481777547e-05, "loss": 1.8238, "step": 2103 }, { "epoch": 0.4847088636756321, "grad_norm": 0.30787092447280884, "learning_rate": 3.0641407797474656e-05, "loss": 1.9185, "step": 2104 }, { "epoch": 0.48493923861083915, "grad_norm": 0.29395854473114014, "learning_rate": 3.062181712776316e-05, "loss": 1.8907, "step": 2105 }, { "epoch": 0.4851696135460462, "grad_norm": 0.3390984833240509, "learning_rate": 3.0602222821314144e-05, "loss": 1.8619, "step": 2106 }, { "epoch": 0.48539998848125326, "grad_norm": 0.3072253465652466, "learning_rate": 3.05826248908031e-05, "loss": 1.8881, "step": 2107 }, { "epoch": 0.48563036341646026, "grad_norm": 0.3100559413433075, "learning_rate": 3.056302334890786e-05, "loss": 1.8952, "step": 2108 }, { "epoch": 0.4858607383516673, "grad_norm": 0.344031423330307, "learning_rate": 3.054341820830863e-05, "loss": 1.8624, "step": 2109 }, { "epoch": 0.4860911132868744, "grad_norm": 0.3052961230278015, "learning_rate": 3.05238094816879e-05, "loss": 1.8931, "step": 2110 }, { "epoch": 0.48632148822208143, "grad_norm": 0.28336939215660095, "learning_rate": 3.050419718173051e-05, "loss": 1.907, "step": 2111 }, { "epoch": 0.4865518631572885, "grad_norm": 0.3034708499908447, "learning_rate": 3.0484581321123605e-05, "loss": 1.8368, "step": 2112 }, { "epoch": 0.48678223809249554, "grad_norm": 0.3208880126476288, "learning_rate": 3.046496191255661e-05, "loss": 1.8841, "step": 2113 }, { "epoch": 0.4870126130277026, "grad_norm": 0.3165823817253113, "learning_rate": 3.0445338968721287e-05, "loss": 1.9126, "step": 2114 }, { "epoch": 0.48724298796290966, "grad_norm": 0.36998581886291504, "learning_rate": 3.042571250231166e-05, "loss": 1.8363, "step": 2115 }, { "epoch": 0.48747336289811666, "grad_norm": 0.2902930974960327, "learning_rate": 3.0406082526024016e-05, "loss": 1.9046, "step": 2116 }, { "epoch": 0.4877037378333237, "grad_norm": 0.30640485882759094, "learning_rate": 3.0386449052556943e-05, "loss": 1.8723, "step": 2117 }, { "epoch": 0.48793411276853077, "grad_norm": 0.33163848519325256, "learning_rate": 3.0366812094611285e-05, "loss": 1.8564, "step": 2118 }, { "epoch": 0.4881644877037378, "grad_norm": 0.3103655278682709, "learning_rate": 3.0347171664890127e-05, "loss": 1.9194, "step": 2119 }, { "epoch": 0.4883948626389449, "grad_norm": 0.3323903977870941, "learning_rate": 3.0327527776098808e-05, "loss": 1.8744, "step": 2120 }, { "epoch": 0.48862523757415194, "grad_norm": 0.3113563358783722, "learning_rate": 3.0307880440944902e-05, "loss": 1.9182, "step": 2121 }, { "epoch": 0.488855612509359, "grad_norm": 0.3238922357559204, "learning_rate": 3.028822967213822e-05, "loss": 1.8897, "step": 2122 }, { "epoch": 0.48908598744456605, "grad_norm": 0.2938486635684967, "learning_rate": 3.026857548239078e-05, "loss": 1.871, "step": 2123 }, { "epoch": 0.4893163623797731, "grad_norm": 0.28404197096824646, "learning_rate": 3.024891788441684e-05, "loss": 1.8861, "step": 2124 }, { "epoch": 0.4895467373149801, "grad_norm": 0.28772634267807007, "learning_rate": 3.022925689093281e-05, "loss": 1.8977, "step": 2125 }, { "epoch": 0.48977711225018716, "grad_norm": 0.3119567930698395, "learning_rate": 3.0209592514657365e-05, "loss": 1.8472, "step": 2126 }, { "epoch": 0.4900074871853942, "grad_norm": 0.30357545614242554, "learning_rate": 3.0189924768311324e-05, "loss": 1.8812, "step": 2127 }, { "epoch": 0.4902378621206013, "grad_norm": 0.29980507493019104, "learning_rate": 3.0170253664617686e-05, "loss": 1.8669, "step": 2128 }, { "epoch": 0.49046823705580833, "grad_norm": 0.3000200390815735, "learning_rate": 3.015057921630163e-05, "loss": 1.9096, "step": 2129 }, { "epoch": 0.4906986119910154, "grad_norm": 0.30154135823249817, "learning_rate": 3.013090143609053e-05, "loss": 1.8696, "step": 2130 }, { "epoch": 0.49092898692622244, "grad_norm": 0.313831090927124, "learning_rate": 3.0111220336713857e-05, "loss": 1.8513, "step": 2131 }, { "epoch": 0.4911593618614295, "grad_norm": 0.33681485056877136, "learning_rate": 3.009153593090327e-05, "loss": 1.8658, "step": 2132 }, { "epoch": 0.4913897367966365, "grad_norm": 0.31239965558052063, "learning_rate": 3.0071848231392546e-05, "loss": 1.8405, "step": 2133 }, { "epoch": 0.49162011173184356, "grad_norm": 0.28777411580085754, "learning_rate": 3.0052157250917613e-05, "loss": 1.8661, "step": 2134 }, { "epoch": 0.4918504866670506, "grad_norm": 0.2971084713935852, "learning_rate": 3.0032463002216505e-05, "loss": 1.9043, "step": 2135 }, { "epoch": 0.49208086160225767, "grad_norm": 0.3147260844707489, "learning_rate": 3.001276549802938e-05, "loss": 1.8548, "step": 2136 }, { "epoch": 0.4923112365374647, "grad_norm": 0.33245643973350525, "learning_rate": 2.9993064751098488e-05, "loss": 1.8654, "step": 2137 }, { "epoch": 0.4925416114726718, "grad_norm": 0.30039799213409424, "learning_rate": 2.9973360774168192e-05, "loss": 1.9197, "step": 2138 }, { "epoch": 0.49277198640787884, "grad_norm": 0.31120574474334717, "learning_rate": 2.9953653579984942e-05, "loss": 1.8696, "step": 2139 }, { "epoch": 0.4930023613430859, "grad_norm": 0.31642067432403564, "learning_rate": 2.9933943181297264e-05, "loss": 1.9001, "step": 2140 }, { "epoch": 0.49323273627829295, "grad_norm": 0.3270643651485443, "learning_rate": 2.991422959085576e-05, "loss": 1.8973, "step": 2141 }, { "epoch": 0.49346311121349995, "grad_norm": 0.3216436803340912, "learning_rate": 2.989451282141308e-05, "loss": 1.9027, "step": 2142 }, { "epoch": 0.493693486148707, "grad_norm": 0.3037739098072052, "learning_rate": 2.9874792885723974e-05, "loss": 1.8772, "step": 2143 }, { "epoch": 0.49392386108391406, "grad_norm": 0.30626600980758667, "learning_rate": 2.9855069796545186e-05, "loss": 1.905, "step": 2144 }, { "epoch": 0.4941542360191211, "grad_norm": 0.32180970907211304, "learning_rate": 2.9835343566635548e-05, "loss": 1.8962, "step": 2145 }, { "epoch": 0.4943846109543282, "grad_norm": 0.3222624361515045, "learning_rate": 2.981561420875589e-05, "loss": 1.9309, "step": 2146 }, { "epoch": 0.49461498588953523, "grad_norm": 0.295502632856369, "learning_rate": 2.9795881735669072e-05, "loss": 1.9343, "step": 2147 }, { "epoch": 0.4948453608247423, "grad_norm": 0.28029632568359375, "learning_rate": 2.9776146160139995e-05, "loss": 1.897, "step": 2148 }, { "epoch": 0.49507573575994934, "grad_norm": 0.3024124205112457, "learning_rate": 2.9756407494935528e-05, "loss": 1.8379, "step": 2149 }, { "epoch": 0.49530611069515634, "grad_norm": 0.3135741949081421, "learning_rate": 2.973666575282456e-05, "loss": 1.8615, "step": 2150 }, { "epoch": 0.4955364856303634, "grad_norm": 0.30257418751716614, "learning_rate": 2.9716920946577975e-05, "loss": 1.9206, "step": 2151 }, { "epoch": 0.49576686056557046, "grad_norm": 0.3191167116165161, "learning_rate": 2.9697173088968638e-05, "loss": 1.8576, "step": 2152 }, { "epoch": 0.4959972355007775, "grad_norm": 0.3067823052406311, "learning_rate": 2.9677422192771365e-05, "loss": 1.8852, "step": 2153 }, { "epoch": 0.49622761043598457, "grad_norm": 0.30253925919532776, "learning_rate": 2.9657668270762957e-05, "loss": 1.877, "step": 2154 }, { "epoch": 0.4964579853711916, "grad_norm": 0.29943937063217163, "learning_rate": 2.963791133572218e-05, "loss": 1.8773, "step": 2155 }, { "epoch": 0.4966883603063987, "grad_norm": 0.2902437150478363, "learning_rate": 2.961815140042974e-05, "loss": 1.8584, "step": 2156 }, { "epoch": 0.49691873524160574, "grad_norm": 0.3071887493133545, "learning_rate": 2.959838847766827e-05, "loss": 1.8935, "step": 2157 }, { "epoch": 0.49714911017681274, "grad_norm": 0.3169489800930023, "learning_rate": 2.9578622580222358e-05, "loss": 1.8758, "step": 2158 }, { "epoch": 0.4973794851120198, "grad_norm": 0.30909428000450134, "learning_rate": 2.9558853720878503e-05, "loss": 1.8246, "step": 2159 }, { "epoch": 0.49760986004722685, "grad_norm": 0.29471370577812195, "learning_rate": 2.9539081912425132e-05, "loss": 1.8746, "step": 2160 }, { "epoch": 0.4978402349824339, "grad_norm": 0.3129802942276001, "learning_rate": 2.951930716765256e-05, "loss": 1.8758, "step": 2161 }, { "epoch": 0.49807060991764096, "grad_norm": 0.2992183566093445, "learning_rate": 2.9499529499353024e-05, "loss": 1.8641, "step": 2162 }, { "epoch": 0.498300984852848, "grad_norm": 0.3135461211204529, "learning_rate": 2.9479748920320633e-05, "loss": 1.9176, "step": 2163 }, { "epoch": 0.4985313597880551, "grad_norm": 0.3251444101333618, "learning_rate": 2.945996544335139e-05, "loss": 1.9176, "step": 2164 }, { "epoch": 0.49876173472326213, "grad_norm": 0.30595842003822327, "learning_rate": 2.944017908124318e-05, "loss": 1.9248, "step": 2165 }, { "epoch": 0.4989921096584692, "grad_norm": 0.3636179268360138, "learning_rate": 2.9420389846795728e-05, "loss": 1.8698, "step": 2166 }, { "epoch": 0.4992224845936762, "grad_norm": 0.31547868251800537, "learning_rate": 2.940059775281063e-05, "loss": 1.9224, "step": 2167 }, { "epoch": 0.49945285952888324, "grad_norm": 0.3138265311717987, "learning_rate": 2.9380802812091368e-05, "loss": 1.8719, "step": 2168 }, { "epoch": 0.4996832344640903, "grad_norm": 0.29369643330574036, "learning_rate": 2.9361005037443203e-05, "loss": 1.8649, "step": 2169 }, { "epoch": 0.49991360939929735, "grad_norm": 0.32118088006973267, "learning_rate": 2.9341204441673266e-05, "loss": 1.9079, "step": 2170 }, { "epoch": 0.5001439843345044, "grad_norm": 0.3456633687019348, "learning_rate": 2.9321401037590502e-05, "loss": 1.8599, "step": 2171 }, { "epoch": 0.5003743592697114, "grad_norm": 0.3271368443965912, "learning_rate": 2.9301594838005685e-05, "loss": 1.8302, "step": 2172 }, { "epoch": 0.5006047342049185, "grad_norm": 0.3136473298072815, "learning_rate": 2.9281785855731393e-05, "loss": 1.8395, "step": 2173 }, { "epoch": 0.5008351091401255, "grad_norm": 0.3038071393966675, "learning_rate": 2.926197410358199e-05, "loss": 1.8731, "step": 2174 }, { "epoch": 0.5010654840753326, "grad_norm": 0.3109053075313568, "learning_rate": 2.924215959437364e-05, "loss": 1.8345, "step": 2175 }, { "epoch": 0.5012958590105396, "grad_norm": 0.33830076456069946, "learning_rate": 2.9222342340924308e-05, "loss": 1.8565, "step": 2176 }, { "epoch": 0.5015262339457467, "grad_norm": 0.32348284125328064, "learning_rate": 2.920252235605371e-05, "loss": 1.8496, "step": 2177 }, { "epoch": 0.5017566088809537, "grad_norm": 0.3180393576622009, "learning_rate": 2.9182699652583336e-05, "loss": 1.8899, "step": 2178 }, { "epoch": 0.5019869838161608, "grad_norm": 0.28946420550346375, "learning_rate": 2.916287424333643e-05, "loss": 1.8676, "step": 2179 }, { "epoch": 0.5022173587513679, "grad_norm": 0.2953049838542938, "learning_rate": 2.9143046141138015e-05, "loss": 1.8308, "step": 2180 }, { "epoch": 0.5024477336865749, "grad_norm": 0.3149653375148773, "learning_rate": 2.912321535881481e-05, "loss": 1.8614, "step": 2181 }, { "epoch": 0.502678108621782, "grad_norm": 0.33069857954978943, "learning_rate": 2.9103381909195304e-05, "loss": 1.8369, "step": 2182 }, { "epoch": 0.502908483556989, "grad_norm": 0.3213171064853668, "learning_rate": 2.9083545805109702e-05, "loss": 1.8471, "step": 2183 }, { "epoch": 0.5031388584921961, "grad_norm": 0.33367717266082764, "learning_rate": 2.906370705938991e-05, "loss": 1.9112, "step": 2184 }, { "epoch": 0.5033692334274031, "grad_norm": 0.3262825310230255, "learning_rate": 2.904386568486957e-05, "loss": 1.9228, "step": 2185 }, { "epoch": 0.5035996083626102, "grad_norm": 0.3353404998779297, "learning_rate": 2.9024021694384006e-05, "loss": 1.8894, "step": 2186 }, { "epoch": 0.5038299832978173, "grad_norm": 0.35145601630210876, "learning_rate": 2.9004175100770237e-05, "loss": 1.8674, "step": 2187 }, { "epoch": 0.5040603582330242, "grad_norm": 0.35221150517463684, "learning_rate": 2.8984325916866973e-05, "loss": 1.8854, "step": 2188 }, { "epoch": 0.5042907331682313, "grad_norm": 0.30925095081329346, "learning_rate": 2.8964474155514588e-05, "loss": 1.9068, "step": 2189 }, { "epoch": 0.5045211081034383, "grad_norm": 0.31176239252090454, "learning_rate": 2.8944619829555143e-05, "loss": 1.8808, "step": 2190 }, { "epoch": 0.5047514830386454, "grad_norm": 0.3351825177669525, "learning_rate": 2.892476295183232e-05, "loss": 1.9323, "step": 2191 }, { "epoch": 0.5049818579738524, "grad_norm": 0.3073180913925171, "learning_rate": 2.8904903535191496e-05, "loss": 1.8585, "step": 2192 }, { "epoch": 0.5052122329090595, "grad_norm": 0.32542288303375244, "learning_rate": 2.888504159247968e-05, "loss": 1.8678, "step": 2193 }, { "epoch": 0.5054426078442665, "grad_norm": 0.3193667232990265, "learning_rate": 2.8865177136545485e-05, "loss": 1.8587, "step": 2194 }, { "epoch": 0.5056729827794736, "grad_norm": 0.31882020831108093, "learning_rate": 2.8845310180239182e-05, "loss": 1.8972, "step": 2195 }, { "epoch": 0.5059033577146806, "grad_norm": 0.304768830537796, "learning_rate": 2.8825440736412646e-05, "loss": 1.885, "step": 2196 }, { "epoch": 0.5061337326498877, "grad_norm": 0.33005285263061523, "learning_rate": 2.880556881791936e-05, "loss": 1.8949, "step": 2197 }, { "epoch": 0.5063641075850948, "grad_norm": 0.3122861981391907, "learning_rate": 2.878569443761442e-05, "loss": 1.8398, "step": 2198 }, { "epoch": 0.5065944825203018, "grad_norm": 0.32674992084503174, "learning_rate": 2.8765817608354507e-05, "loss": 1.8554, "step": 2199 }, { "epoch": 0.5068248574555089, "grad_norm": 0.31783726811408997, "learning_rate": 2.874593834299787e-05, "loss": 1.8701, "step": 2200 }, { "epoch": 0.5070552323907159, "grad_norm": 0.29075896739959717, "learning_rate": 2.872605665440436e-05, "loss": 1.9, "step": 2201 }, { "epoch": 0.507285607325923, "grad_norm": 0.3365229070186615, "learning_rate": 2.8706172555435397e-05, "loss": 1.8587, "step": 2202 }, { "epoch": 0.50751598226113, "grad_norm": 0.3655635118484497, "learning_rate": 2.8686286058953925e-05, "loss": 1.8439, "step": 2203 }, { "epoch": 0.507746357196337, "grad_norm": 0.3258606195449829, "learning_rate": 2.8666397177824473e-05, "loss": 1.8562, "step": 2204 }, { "epoch": 0.507976732131544, "grad_norm": 0.3241688013076782, "learning_rate": 2.8646505924913108e-05, "loss": 1.883, "step": 2205 }, { "epoch": 0.5082071070667511, "grad_norm": 0.33514541387557983, "learning_rate": 2.862661231308742e-05, "loss": 1.8407, "step": 2206 }, { "epoch": 0.5084374820019582, "grad_norm": 0.30386435985565186, "learning_rate": 2.8606716355216523e-05, "loss": 1.8959, "step": 2207 }, { "epoch": 0.5086678569371652, "grad_norm": 0.32976233959198, "learning_rate": 2.858681806417106e-05, "loss": 1.8869, "step": 2208 }, { "epoch": 0.5088982318723723, "grad_norm": 0.30872756242752075, "learning_rate": 2.8566917452823182e-05, "loss": 1.9076, "step": 2209 }, { "epoch": 0.5091286068075793, "grad_norm": 0.33939942717552185, "learning_rate": 2.8547014534046536e-05, "loss": 1.8374, "step": 2210 }, { "epoch": 0.5093589817427864, "grad_norm": 0.3222332298755646, "learning_rate": 2.8527109320716267e-05, "loss": 1.8271, "step": 2211 }, { "epoch": 0.5095893566779934, "grad_norm": 0.31308990716934204, "learning_rate": 2.8507201825708994e-05, "loss": 1.8624, "step": 2212 }, { "epoch": 0.5098197316132005, "grad_norm": 0.28621742129325867, "learning_rate": 2.848729206190282e-05, "loss": 1.8844, "step": 2213 }, { "epoch": 0.5100501065484075, "grad_norm": 0.2939852476119995, "learning_rate": 2.846738004217732e-05, "loss": 1.8683, "step": 2214 }, { "epoch": 0.5102804814836146, "grad_norm": 0.29595351219177246, "learning_rate": 2.844746577941353e-05, "loss": 1.9107, "step": 2215 }, { "epoch": 0.5105108564188217, "grad_norm": 0.2980828583240509, "learning_rate": 2.8427549286493904e-05, "loss": 1.8869, "step": 2216 }, { "epoch": 0.5107412313540287, "grad_norm": 0.3016016185283661, "learning_rate": 2.840763057630239e-05, "loss": 1.8843, "step": 2217 }, { "epoch": 0.5109716062892358, "grad_norm": 0.2940928041934967, "learning_rate": 2.8387709661724338e-05, "loss": 1.871, "step": 2218 }, { "epoch": 0.5112019812244428, "grad_norm": 0.3029842972755432, "learning_rate": 2.836778655564653e-05, "loss": 1.8559, "step": 2219 }, { "epoch": 0.5114323561596499, "grad_norm": 0.29426366090774536, "learning_rate": 2.8347861270957156e-05, "loss": 1.913, "step": 2220 }, { "epoch": 0.5116627310948568, "grad_norm": 0.2965708374977112, "learning_rate": 2.8327933820545848e-05, "loss": 1.8845, "step": 2221 }, { "epoch": 0.5118931060300639, "grad_norm": 0.3069917559623718, "learning_rate": 2.8308004217303597e-05, "loss": 1.8555, "step": 2222 }, { "epoch": 0.5121234809652709, "grad_norm": 0.3158882260322571, "learning_rate": 2.8288072474122817e-05, "loss": 1.8972, "step": 2223 }, { "epoch": 0.512353855900478, "grad_norm": 0.30076122283935547, "learning_rate": 2.826813860389729e-05, "loss": 1.8876, "step": 2224 }, { "epoch": 0.512584230835685, "grad_norm": 0.3009515106678009, "learning_rate": 2.8248202619522192e-05, "loss": 1.8761, "step": 2225 }, { "epoch": 0.5128146057708921, "grad_norm": 0.277882844209671, "learning_rate": 2.822826453389404e-05, "loss": 1.8779, "step": 2226 }, { "epoch": 0.5130449807060992, "grad_norm": 0.3309481739997864, "learning_rate": 2.8208324359910738e-05, "loss": 1.8219, "step": 2227 }, { "epoch": 0.5132753556413062, "grad_norm": 0.3031958043575287, "learning_rate": 2.818838211047151e-05, "loss": 1.8547, "step": 2228 }, { "epoch": 0.5135057305765133, "grad_norm": 0.2840794622898102, "learning_rate": 2.8168437798476954e-05, "loss": 1.8994, "step": 2229 }, { "epoch": 0.5137361055117203, "grad_norm": 0.29376256465911865, "learning_rate": 2.8148491436829e-05, "loss": 1.8501, "step": 2230 }, { "epoch": 0.5139664804469274, "grad_norm": 0.31452637910842896, "learning_rate": 2.8128543038430865e-05, "loss": 1.8814, "step": 2231 }, { "epoch": 0.5141968553821344, "grad_norm": 0.29778122901916504, "learning_rate": 2.8108592616187133e-05, "loss": 1.8629, "step": 2232 }, { "epoch": 0.5144272303173415, "grad_norm": 0.29411447048187256, "learning_rate": 2.8088640183003667e-05, "loss": 1.8747, "step": 2233 }, { "epoch": 0.5146576052525486, "grad_norm": 0.29384905099868774, "learning_rate": 2.8068685751787636e-05, "loss": 1.8636, "step": 2234 }, { "epoch": 0.5148879801877556, "grad_norm": 0.3119572103023529, "learning_rate": 2.8048729335447514e-05, "loss": 1.8345, "step": 2235 }, { "epoch": 0.5151183551229627, "grad_norm": 0.31455016136169434, "learning_rate": 2.802877094689304e-05, "loss": 1.8627, "step": 2236 }, { "epoch": 0.5153487300581697, "grad_norm": 0.3078712522983551, "learning_rate": 2.8008810599035252e-05, "loss": 1.8756, "step": 2237 }, { "epoch": 0.5155791049933767, "grad_norm": 0.2880895137786865, "learning_rate": 2.7988848304786426e-05, "loss": 1.8801, "step": 2238 }, { "epoch": 0.5158094799285837, "grad_norm": 0.29487648606300354, "learning_rate": 2.7968884077060126e-05, "loss": 1.8705, "step": 2239 }, { "epoch": 0.5160398548637908, "grad_norm": 0.2921651005744934, "learning_rate": 2.7948917928771158e-05, "loss": 1.8984, "step": 2240 }, { "epoch": 0.5162702297989978, "grad_norm": 0.2776263356208801, "learning_rate": 2.7928949872835543e-05, "loss": 1.859, "step": 2241 }, { "epoch": 0.5165006047342049, "grad_norm": 0.30326393246650696, "learning_rate": 2.790897992217058e-05, "loss": 1.8841, "step": 2242 }, { "epoch": 0.516730979669412, "grad_norm": 0.2899799942970276, "learning_rate": 2.788900808969478e-05, "loss": 1.8963, "step": 2243 }, { "epoch": 0.516961354604619, "grad_norm": 0.29559093713760376, "learning_rate": 2.786903438832785e-05, "loss": 1.8604, "step": 2244 }, { "epoch": 0.5171917295398261, "grad_norm": 0.30227231979370117, "learning_rate": 2.7849058830990708e-05, "loss": 1.7898, "step": 2245 }, { "epoch": 0.5174221044750331, "grad_norm": 0.2976643145084381, "learning_rate": 2.7829081430605518e-05, "loss": 1.8642, "step": 2246 }, { "epoch": 0.5176524794102402, "grad_norm": 0.31825315952301025, "learning_rate": 2.7809102200095582e-05, "loss": 1.8985, "step": 2247 }, { "epoch": 0.5178828543454472, "grad_norm": 0.2944362759590149, "learning_rate": 2.778912115238541e-05, "loss": 1.8506, "step": 2248 }, { "epoch": 0.5181132292806543, "grad_norm": 0.28875765204429626, "learning_rate": 2.7769138300400694e-05, "loss": 1.862, "step": 2249 }, { "epoch": 0.5183436042158613, "grad_norm": 0.305813193321228, "learning_rate": 2.7749153657068272e-05, "loss": 1.8586, "step": 2250 }, { "epoch": 0.5185739791510684, "grad_norm": 0.3035238981246948, "learning_rate": 2.7729167235316163e-05, "loss": 1.8541, "step": 2251 }, { "epoch": 0.5188043540862755, "grad_norm": 0.27156367897987366, "learning_rate": 2.770917904807352e-05, "loss": 1.8691, "step": 2252 }, { "epoch": 0.5190347290214825, "grad_norm": 0.30943745374679565, "learning_rate": 2.768918910827064e-05, "loss": 1.8597, "step": 2253 }, { "epoch": 0.5192651039566896, "grad_norm": 0.30761632323265076, "learning_rate": 2.7669197428838972e-05, "loss": 1.875, "step": 2254 }, { "epoch": 0.5194954788918965, "grad_norm": 0.3043053150177002, "learning_rate": 2.764920402271107e-05, "loss": 1.8523, "step": 2255 }, { "epoch": 0.5197258538271036, "grad_norm": 0.3040861487388611, "learning_rate": 2.7629208902820612e-05, "loss": 1.8904, "step": 2256 }, { "epoch": 0.5199562287623106, "grad_norm": 0.3061616122722626, "learning_rate": 2.7609212082102374e-05, "loss": 1.8672, "step": 2257 }, { "epoch": 0.5201866036975177, "grad_norm": 0.3135985732078552, "learning_rate": 2.7589213573492262e-05, "loss": 1.8348, "step": 2258 }, { "epoch": 0.5204169786327247, "grad_norm": 0.3107624650001526, "learning_rate": 2.7569213389927246e-05, "loss": 1.8385, "step": 2259 }, { "epoch": 0.5206473535679318, "grad_norm": 0.35278424620628357, "learning_rate": 2.754921154434538e-05, "loss": 1.8069, "step": 2260 }, { "epoch": 0.5208777285031388, "grad_norm": 0.3145417869091034, "learning_rate": 2.7529208049685807e-05, "loss": 1.8509, "step": 2261 }, { "epoch": 0.5211081034383459, "grad_norm": 0.30687442421913147, "learning_rate": 2.7509202918888734e-05, "loss": 1.9259, "step": 2262 }, { "epoch": 0.521338478373553, "grad_norm": 0.288267582654953, "learning_rate": 2.748919616489542e-05, "loss": 1.8463, "step": 2263 }, { "epoch": 0.52156885330876, "grad_norm": 0.30160635709762573, "learning_rate": 2.746918780064818e-05, "loss": 1.8522, "step": 2264 }, { "epoch": 0.5217992282439671, "grad_norm": 0.32076653838157654, "learning_rate": 2.7449177839090362e-05, "loss": 1.8721, "step": 2265 }, { "epoch": 0.5220296031791741, "grad_norm": 0.2913026809692383, "learning_rate": 2.742916629316635e-05, "loss": 1.8988, "step": 2266 }, { "epoch": 0.5222599781143812, "grad_norm": 0.3367964029312134, "learning_rate": 2.7409153175821576e-05, "loss": 1.9139, "step": 2267 }, { "epoch": 0.5224903530495882, "grad_norm": 0.3060467839241028, "learning_rate": 2.738913850000246e-05, "loss": 1.8539, "step": 2268 }, { "epoch": 0.5227207279847953, "grad_norm": 0.29739901423454285, "learning_rate": 2.7369122278656423e-05, "loss": 1.8894, "step": 2269 }, { "epoch": 0.5229511029200024, "grad_norm": 0.30566534399986267, "learning_rate": 2.7349104524731916e-05, "loss": 1.894, "step": 2270 }, { "epoch": 0.5231814778552093, "grad_norm": 0.302708238363266, "learning_rate": 2.7329085251178372e-05, "loss": 1.8987, "step": 2271 }, { "epoch": 0.5234118527904164, "grad_norm": 0.2941441833972931, "learning_rate": 2.730906447094619e-05, "loss": 1.8824, "step": 2272 }, { "epoch": 0.5236422277256234, "grad_norm": 0.31807321310043335, "learning_rate": 2.728904219698676e-05, "loss": 1.8487, "step": 2273 }, { "epoch": 0.5238726026608305, "grad_norm": 0.30361509323120117, "learning_rate": 2.726901844225243e-05, "loss": 1.8893, "step": 2274 }, { "epoch": 0.5241029775960375, "grad_norm": 0.3052193224430084, "learning_rate": 2.724899321969652e-05, "loss": 1.8757, "step": 2275 }, { "epoch": 0.5243333525312446, "grad_norm": 0.3049350082874298, "learning_rate": 2.7228966542273288e-05, "loss": 1.8537, "step": 2276 }, { "epoch": 0.5245637274664516, "grad_norm": 0.32148653268814087, "learning_rate": 2.7208938422937937e-05, "loss": 1.8276, "step": 2277 }, { "epoch": 0.5247941024016587, "grad_norm": 0.30666884779930115, "learning_rate": 2.718890887464658e-05, "loss": 1.8999, "step": 2278 }, { "epoch": 0.5250244773368657, "grad_norm": 0.2992156147956848, "learning_rate": 2.71688779103563e-05, "loss": 1.8495, "step": 2279 }, { "epoch": 0.5252548522720728, "grad_norm": 0.3105941414833069, "learning_rate": 2.7148845543025063e-05, "loss": 1.8994, "step": 2280 }, { "epoch": 0.5254852272072799, "grad_norm": 0.2878016531467438, "learning_rate": 2.712881178561174e-05, "loss": 1.8194, "step": 2281 }, { "epoch": 0.5257156021424869, "grad_norm": 0.29000788927078247, "learning_rate": 2.7108776651076118e-05, "loss": 1.8839, "step": 2282 }, { "epoch": 0.525945977077694, "grad_norm": 0.4347638785839081, "learning_rate": 2.7088740152378882e-05, "loss": 1.857, "step": 2283 }, { "epoch": 0.526176352012901, "grad_norm": 0.2982732653617859, "learning_rate": 2.706870230248157e-05, "loss": 1.8543, "step": 2284 }, { "epoch": 0.5264067269481081, "grad_norm": 0.3130025267601013, "learning_rate": 2.7048663114346618e-05, "loss": 1.871, "step": 2285 }, { "epoch": 0.5266371018833151, "grad_norm": 0.3164607584476471, "learning_rate": 2.702862260093731e-05, "loss": 1.8415, "step": 2286 }, { "epoch": 0.5268674768185222, "grad_norm": 0.28092697262763977, "learning_rate": 2.7008580775217807e-05, "loss": 1.8256, "step": 2287 }, { "epoch": 0.5270978517537291, "grad_norm": 0.3019684851169586, "learning_rate": 2.6988537650153107e-05, "loss": 1.8449, "step": 2288 }, { "epoch": 0.5273282266889362, "grad_norm": 0.30433914065361023, "learning_rate": 2.696849323870905e-05, "loss": 1.8351, "step": 2289 }, { "epoch": 0.5275586016241433, "grad_norm": 0.3302403688430786, "learning_rate": 2.6948447553852306e-05, "loss": 1.8404, "step": 2290 }, { "epoch": 0.5277889765593503, "grad_norm": 0.3016055226325989, "learning_rate": 2.6928400608550375e-05, "loss": 1.8292, "step": 2291 }, { "epoch": 0.5280193514945574, "grad_norm": 0.30487513542175293, "learning_rate": 2.6908352415771577e-05, "loss": 1.8655, "step": 2292 }, { "epoch": 0.5282497264297644, "grad_norm": 0.32057109475135803, "learning_rate": 2.6888302988485014e-05, "loss": 1.8639, "step": 2293 }, { "epoch": 0.5284801013649715, "grad_norm": 0.3224034011363983, "learning_rate": 2.686825233966061e-05, "loss": 1.8641, "step": 2294 }, { "epoch": 0.5287104763001785, "grad_norm": 0.29312458634376526, "learning_rate": 2.6848200482269076e-05, "loss": 1.8763, "step": 2295 }, { "epoch": 0.5289408512353856, "grad_norm": 0.3088854253292084, "learning_rate": 2.6828147429281902e-05, "loss": 1.8894, "step": 2296 }, { "epoch": 0.5291712261705926, "grad_norm": 0.3127730190753937, "learning_rate": 2.6808093193671345e-05, "loss": 1.8514, "step": 2297 }, { "epoch": 0.5294016011057997, "grad_norm": 0.33454975485801697, "learning_rate": 2.678803778841044e-05, "loss": 1.822, "step": 2298 }, { "epoch": 0.5296319760410068, "grad_norm": 0.30542293190956116, "learning_rate": 2.676798122647297e-05, "loss": 1.884, "step": 2299 }, { "epoch": 0.5298623509762138, "grad_norm": 0.2845883071422577, "learning_rate": 2.674792352083347e-05, "loss": 1.911, "step": 2300 }, { "epoch": 0.5300927259114209, "grad_norm": 0.30640894174575806, "learning_rate": 2.6727864684467203e-05, "loss": 1.9142, "step": 2301 }, { "epoch": 0.5303231008466279, "grad_norm": 0.3379109799861908, "learning_rate": 2.6707804730350187e-05, "loss": 1.8429, "step": 2302 }, { "epoch": 0.530553475781835, "grad_norm": 0.31980395317077637, "learning_rate": 2.668774367145913e-05, "loss": 1.8873, "step": 2303 }, { "epoch": 0.530783850717042, "grad_norm": 0.31545063853263855, "learning_rate": 2.66676815207715e-05, "loss": 1.8406, "step": 2304 }, { "epoch": 0.531014225652249, "grad_norm": 0.29561007022857666, "learning_rate": 2.664761829126543e-05, "loss": 1.873, "step": 2305 }, { "epoch": 0.531244600587456, "grad_norm": 0.35476598143577576, "learning_rate": 2.6627553995919764e-05, "loss": 1.8849, "step": 2306 }, { "epoch": 0.5314749755226631, "grad_norm": 0.31780362129211426, "learning_rate": 2.660748864771403e-05, "loss": 1.9097, "step": 2307 }, { "epoch": 0.5317053504578702, "grad_norm": 0.31455689668655396, "learning_rate": 2.6587422259628476e-05, "loss": 1.867, "step": 2308 }, { "epoch": 0.5319357253930772, "grad_norm": 0.3150586783885956, "learning_rate": 2.656735484464396e-05, "loss": 1.8431, "step": 2309 }, { "epoch": 0.5321661003282843, "grad_norm": 0.31087109446525574, "learning_rate": 2.654728641574205e-05, "loss": 1.836, "step": 2310 }, { "epoch": 0.5323964752634913, "grad_norm": 0.34484174847602844, "learning_rate": 2.652721698590495e-05, "loss": 1.8935, "step": 2311 }, { "epoch": 0.5326268501986984, "grad_norm": 0.3176835775375366, "learning_rate": 2.650714656811552e-05, "loss": 1.8368, "step": 2312 }, { "epoch": 0.5328572251339054, "grad_norm": 0.2881630063056946, "learning_rate": 2.648707517535726e-05, "loss": 1.8912, "step": 2313 }, { "epoch": 0.5330876000691125, "grad_norm": 0.3040626049041748, "learning_rate": 2.6467002820614296e-05, "loss": 1.8323, "step": 2314 }, { "epoch": 0.5333179750043195, "grad_norm": 0.30654045939445496, "learning_rate": 2.6446929516871365e-05, "loss": 1.8728, "step": 2315 }, { "epoch": 0.5335483499395266, "grad_norm": 0.3147071897983551, "learning_rate": 2.642685527711385e-05, "loss": 1.8429, "step": 2316 }, { "epoch": 0.5337787248747337, "grad_norm": 0.30204835534095764, "learning_rate": 2.640678011432771e-05, "loss": 1.8606, "step": 2317 }, { "epoch": 0.5340090998099407, "grad_norm": 0.29252752661705017, "learning_rate": 2.6386704041499505e-05, "loss": 1.8721, "step": 2318 }, { "epoch": 0.5342394747451478, "grad_norm": 0.3040657639503479, "learning_rate": 2.636662707161639e-05, "loss": 1.8924, "step": 2319 }, { "epoch": 0.5344698496803548, "grad_norm": 0.30583032965660095, "learning_rate": 2.6346549217666112e-05, "loss": 1.9153, "step": 2320 }, { "epoch": 0.5347002246155619, "grad_norm": 0.3119697868824005, "learning_rate": 2.632647049263697e-05, "loss": 1.8638, "step": 2321 }, { "epoch": 0.5349305995507688, "grad_norm": 0.3079223036766052, "learning_rate": 2.630639090951783e-05, "loss": 1.8621, "step": 2322 }, { "epoch": 0.5351609744859759, "grad_norm": 0.27673542499542236, "learning_rate": 2.628631048129812e-05, "loss": 1.8928, "step": 2323 }, { "epoch": 0.5353913494211829, "grad_norm": 0.3003257215023041, "learning_rate": 2.6266229220967818e-05, "loss": 1.8682, "step": 2324 }, { "epoch": 0.53562172435639, "grad_norm": 1.138968825340271, "learning_rate": 2.624614714151743e-05, "loss": 1.9306, "step": 2325 }, { "epoch": 0.535852099291597, "grad_norm": 0.3534319996833801, "learning_rate": 2.6226064255938e-05, "loss": 1.8605, "step": 2326 }, { "epoch": 0.5360824742268041, "grad_norm": 0.2781869173049927, "learning_rate": 2.6205980577221083e-05, "loss": 1.8794, "step": 2327 }, { "epoch": 0.5363128491620112, "grad_norm": 0.3118973970413208, "learning_rate": 2.6185896118358748e-05, "loss": 1.8608, "step": 2328 }, { "epoch": 0.5365432240972182, "grad_norm": 0.29643136262893677, "learning_rate": 2.616581089234359e-05, "loss": 1.8485, "step": 2329 }, { "epoch": 0.5367735990324253, "grad_norm": 0.314693421125412, "learning_rate": 2.6145724912168678e-05, "loss": 1.8408, "step": 2330 }, { "epoch": 0.5370039739676323, "grad_norm": 0.2998167872428894, "learning_rate": 2.612563819082757e-05, "loss": 1.8966, "step": 2331 }, { "epoch": 0.5372343489028394, "grad_norm": 0.28577810525894165, "learning_rate": 2.6105550741314317e-05, "loss": 1.9089, "step": 2332 }, { "epoch": 0.5374647238380464, "grad_norm": 0.32473286986351013, "learning_rate": 2.608546257662343e-05, "loss": 1.825, "step": 2333 }, { "epoch": 0.5376950987732535, "grad_norm": 0.2960779368877411, "learning_rate": 2.606537370974989e-05, "loss": 1.8985, "step": 2334 }, { "epoch": 0.5379254737084606, "grad_norm": 0.305687814950943, "learning_rate": 2.604528415368912e-05, "loss": 1.8702, "step": 2335 }, { "epoch": 0.5381558486436676, "grad_norm": 0.3110106289386749, "learning_rate": 2.6025193921436996e-05, "loss": 1.8723, "step": 2336 }, { "epoch": 0.5383862235788747, "grad_norm": 0.317926824092865, "learning_rate": 2.600510302598984e-05, "loss": 1.8582, "step": 2337 }, { "epoch": 0.5386165985140816, "grad_norm": 0.3719663619995117, "learning_rate": 2.598501148034439e-05, "loss": 1.8718, "step": 2338 }, { "epoch": 0.5388469734492887, "grad_norm": 0.3021608889102936, "learning_rate": 2.596491929749782e-05, "loss": 1.8526, "step": 2339 }, { "epoch": 0.5390773483844957, "grad_norm": 0.300848126411438, "learning_rate": 2.594482649044769e-05, "loss": 1.8809, "step": 2340 }, { "epoch": 0.5393077233197028, "grad_norm": 0.34853875637054443, "learning_rate": 2.592473307219198e-05, "loss": 1.8476, "step": 2341 }, { "epoch": 0.5395380982549098, "grad_norm": 0.32185789942741394, "learning_rate": 2.5904639055729092e-05, "loss": 1.8389, "step": 2342 }, { "epoch": 0.5397684731901169, "grad_norm": 0.3203170597553253, "learning_rate": 2.588454445405775e-05, "loss": 1.8969, "step": 2343 }, { "epoch": 0.539998848125324, "grad_norm": 0.314529150724411, "learning_rate": 2.5864449280177116e-05, "loss": 1.8682, "step": 2344 }, { "epoch": 0.540229223060531, "grad_norm": 0.3139440417289734, "learning_rate": 2.584435354708671e-05, "loss": 1.8299, "step": 2345 }, { "epoch": 0.5404595979957381, "grad_norm": 0.2972472310066223, "learning_rate": 2.5824257267786385e-05, "loss": 1.8665, "step": 2346 }, { "epoch": 0.5406899729309451, "grad_norm": 0.29281798005104065, "learning_rate": 2.580416045527637e-05, "loss": 1.8992, "step": 2347 }, { "epoch": 0.5409203478661522, "grad_norm": 0.3049889802932739, "learning_rate": 2.578406312255725e-05, "loss": 1.878, "step": 2348 }, { "epoch": 0.5411507228013592, "grad_norm": 0.3054758906364441, "learning_rate": 2.576396528262992e-05, "loss": 1.877, "step": 2349 }, { "epoch": 0.5413810977365663, "grad_norm": 0.2904840409755707, "learning_rate": 2.5743866948495615e-05, "loss": 1.8558, "step": 2350 }, { "epoch": 0.5416114726717733, "grad_norm": 0.30583885312080383, "learning_rate": 2.5723768133155895e-05, "loss": 1.8798, "step": 2351 }, { "epoch": 0.5418418476069804, "grad_norm": 0.3021509647369385, "learning_rate": 2.570366884961263e-05, "loss": 1.8472, "step": 2352 }, { "epoch": 0.5420722225421875, "grad_norm": 0.29434525966644287, "learning_rate": 2.5683569110867984e-05, "loss": 1.8712, "step": 2353 }, { "epoch": 0.5423025974773945, "grad_norm": 0.3107386529445648, "learning_rate": 2.5663468929924416e-05, "loss": 1.7912, "step": 2354 }, { "epoch": 0.5425329724126015, "grad_norm": 0.29797908663749695, "learning_rate": 2.5643368319784694e-05, "loss": 1.816, "step": 2355 }, { "epoch": 0.5427633473478085, "grad_norm": 0.30313217639923096, "learning_rate": 2.5623267293451826e-05, "loss": 1.9032, "step": 2356 }, { "epoch": 0.5429937222830156, "grad_norm": 0.26712119579315186, "learning_rate": 2.5603165863929113e-05, "loss": 1.8989, "step": 2357 }, { "epoch": 0.5432240972182226, "grad_norm": 0.30907556414604187, "learning_rate": 2.5583064044220125e-05, "loss": 1.8402, "step": 2358 }, { "epoch": 0.5434544721534297, "grad_norm": 0.36403700709342957, "learning_rate": 2.5562961847328652e-05, "loss": 1.8392, "step": 2359 }, { "epoch": 0.5436848470886367, "grad_norm": 0.34116050601005554, "learning_rate": 2.554285928625877e-05, "loss": 1.9193, "step": 2360 }, { "epoch": 0.5439152220238438, "grad_norm": 0.3226014971733093, "learning_rate": 2.552275637401475e-05, "loss": 1.8489, "step": 2361 }, { "epoch": 0.5441455969590508, "grad_norm": 0.3153980076313019, "learning_rate": 2.550265312360112e-05, "loss": 1.8512, "step": 2362 }, { "epoch": 0.5443759718942579, "grad_norm": 0.2996184527873993, "learning_rate": 2.548254954802261e-05, "loss": 1.8681, "step": 2363 }, { "epoch": 0.544606346829465, "grad_norm": 0.3077056407928467, "learning_rate": 2.5462445660284173e-05, "loss": 1.8862, "step": 2364 }, { "epoch": 0.544836721764672, "grad_norm": 0.29320037364959717, "learning_rate": 2.5442341473390947e-05, "loss": 1.85, "step": 2365 }, { "epoch": 0.5450670966998791, "grad_norm": 0.3631782829761505, "learning_rate": 2.5422237000348276e-05, "loss": 1.9046, "step": 2366 }, { "epoch": 0.5452974716350861, "grad_norm": 0.31003430485725403, "learning_rate": 2.54021322541617e-05, "loss": 1.8612, "step": 2367 }, { "epoch": 0.5455278465702932, "grad_norm": 0.31012970209121704, "learning_rate": 2.5382027247836903e-05, "loss": 1.8406, "step": 2368 }, { "epoch": 0.5457582215055002, "grad_norm": 0.3111557960510254, "learning_rate": 2.5361921994379762e-05, "loss": 1.8925, "step": 2369 }, { "epoch": 0.5459885964407073, "grad_norm": 0.2876964211463928, "learning_rate": 2.5341816506796318e-05, "loss": 1.9059, "step": 2370 }, { "epoch": 0.5462189713759144, "grad_norm": 0.2685539722442627, "learning_rate": 2.5321710798092745e-05, "loss": 1.8723, "step": 2371 }, { "epoch": 0.5464493463111213, "grad_norm": 0.2796351909637451, "learning_rate": 2.5301604881275365e-05, "loss": 1.8597, "step": 2372 }, { "epoch": 0.5466797212463284, "grad_norm": 0.28710630536079407, "learning_rate": 2.5281498769350647e-05, "loss": 1.8284, "step": 2373 }, { "epoch": 0.5469100961815354, "grad_norm": 0.301307737827301, "learning_rate": 2.526139247532518e-05, "loss": 1.8519, "step": 2374 }, { "epoch": 0.5471404711167425, "grad_norm": 0.2865220010280609, "learning_rate": 2.5241286012205657e-05, "loss": 1.8848, "step": 2375 }, { "epoch": 0.5473708460519495, "grad_norm": 0.2865228056907654, "learning_rate": 2.52211793929989e-05, "loss": 1.8858, "step": 2376 }, { "epoch": 0.5476012209871566, "grad_norm": 0.3416576683521271, "learning_rate": 2.520107263071182e-05, "loss": 1.7914, "step": 2377 }, { "epoch": 0.5478315959223636, "grad_norm": 0.2910763919353485, "learning_rate": 2.518096573835143e-05, "loss": 1.8791, "step": 2378 }, { "epoch": 0.5480619708575707, "grad_norm": 0.2822611927986145, "learning_rate": 2.5160858728924814e-05, "loss": 1.8255, "step": 2379 }, { "epoch": 0.5482923457927777, "grad_norm": 0.30189481377601624, "learning_rate": 2.514075161543915e-05, "loss": 1.8654, "step": 2380 }, { "epoch": 0.5485227207279848, "grad_norm": 0.2851737141609192, "learning_rate": 2.5120644410901654e-05, "loss": 1.8345, "step": 2381 }, { "epoch": 0.5487530956631919, "grad_norm": 0.2902285158634186, "learning_rate": 2.510053712831964e-05, "loss": 1.8952, "step": 2382 }, { "epoch": 0.5489834705983989, "grad_norm": 0.2823216915130615, "learning_rate": 2.508042978070045e-05, "loss": 1.8575, "step": 2383 }, { "epoch": 0.549213845533606, "grad_norm": 0.2900450825691223, "learning_rate": 2.5060322381051454e-05, "loss": 1.8409, "step": 2384 }, { "epoch": 0.549444220468813, "grad_norm": 0.3021093010902405, "learning_rate": 2.5040214942380074e-05, "loss": 1.8484, "step": 2385 }, { "epoch": 0.5496745954040201, "grad_norm": 0.2864096760749817, "learning_rate": 2.502010747769378e-05, "loss": 1.8404, "step": 2386 }, { "epoch": 0.5499049703392271, "grad_norm": 0.3130319118499756, "learning_rate": 2.5e-05, "loss": 1.8459, "step": 2387 }, { "epoch": 0.5501353452744342, "grad_norm": 0.28626975417137146, "learning_rate": 2.4979892522306224e-05, "loss": 1.8994, "step": 2388 }, { "epoch": 0.5503657202096411, "grad_norm": 0.31107616424560547, "learning_rate": 2.495978505761993e-05, "loss": 1.8678, "step": 2389 }, { "epoch": 0.5505960951448482, "grad_norm": 0.3096865713596344, "learning_rate": 2.4939677618948552e-05, "loss": 1.8457, "step": 2390 }, { "epoch": 0.5508264700800553, "grad_norm": 0.2927708625793457, "learning_rate": 2.4919570219299563e-05, "loss": 1.831, "step": 2391 }, { "epoch": 0.5510568450152623, "grad_norm": 0.2778482735157013, "learning_rate": 2.4899462871680366e-05, "loss": 1.8631, "step": 2392 }, { "epoch": 0.5512872199504694, "grad_norm": 0.3557048738002777, "learning_rate": 2.4879355589098345e-05, "loss": 1.8935, "step": 2393 }, { "epoch": 0.5515175948856764, "grad_norm": 0.29121631383895874, "learning_rate": 2.485924838456086e-05, "loss": 1.8306, "step": 2394 }, { "epoch": 0.5517479698208835, "grad_norm": 0.2837740182876587, "learning_rate": 2.4839141271075188e-05, "loss": 1.8463, "step": 2395 }, { "epoch": 0.5519783447560905, "grad_norm": 0.3888038396835327, "learning_rate": 2.4819034261648573e-05, "loss": 1.8916, "step": 2396 }, { "epoch": 0.5522087196912976, "grad_norm": 0.5282106399536133, "learning_rate": 2.4798927369288186e-05, "loss": 1.8832, "step": 2397 }, { "epoch": 0.5524390946265046, "grad_norm": 0.3125828504562378, "learning_rate": 2.477882060700111e-05, "loss": 1.865, "step": 2398 }, { "epoch": 0.5526694695617117, "grad_norm": 0.2924236059188843, "learning_rate": 2.4758713987794356e-05, "loss": 1.8738, "step": 2399 }, { "epoch": 0.5528998444969188, "grad_norm": 0.30476614832878113, "learning_rate": 2.473860752467483e-05, "loss": 1.9142, "step": 2400 }, { "epoch": 0.5531302194321258, "grad_norm": 0.2958449423313141, "learning_rate": 2.4718501230649355e-05, "loss": 1.8819, "step": 2401 }, { "epoch": 0.5533605943673329, "grad_norm": 0.29546090960502625, "learning_rate": 2.4698395118724644e-05, "loss": 1.8831, "step": 2402 }, { "epoch": 0.5535909693025399, "grad_norm": 0.315680593252182, "learning_rate": 2.467828920190726e-05, "loss": 1.8347, "step": 2403 }, { "epoch": 0.553821344237747, "grad_norm": 0.4005518853664398, "learning_rate": 2.4658183493203688e-05, "loss": 1.823, "step": 2404 }, { "epoch": 0.554051719172954, "grad_norm": 0.2974913716316223, "learning_rate": 2.4638078005620243e-05, "loss": 1.8791, "step": 2405 }, { "epoch": 0.554282094108161, "grad_norm": 0.31167659163475037, "learning_rate": 2.4617972752163103e-05, "loss": 1.8529, "step": 2406 }, { "epoch": 0.554512469043368, "grad_norm": 0.3116181194782257, "learning_rate": 2.459786774583831e-05, "loss": 1.863, "step": 2407 }, { "epoch": 0.5547428439785751, "grad_norm": 0.539882481098175, "learning_rate": 2.4577762999651726e-05, "loss": 1.8714, "step": 2408 }, { "epoch": 0.5549732189137822, "grad_norm": 0.28322672843933105, "learning_rate": 2.4557658526609052e-05, "loss": 1.8136, "step": 2409 }, { "epoch": 0.5552035938489892, "grad_norm": 0.30764859914779663, "learning_rate": 2.4537554339715836e-05, "loss": 1.8543, "step": 2410 }, { "epoch": 0.5554339687841963, "grad_norm": 0.2949383556842804, "learning_rate": 2.4517450451977394e-05, "loss": 1.8655, "step": 2411 }, { "epoch": 0.5556643437194033, "grad_norm": 0.6247971653938293, "learning_rate": 2.449734687639888e-05, "loss": 1.8387, "step": 2412 }, { "epoch": 0.5558947186546104, "grad_norm": 0.3265955448150635, "learning_rate": 2.4477243625985254e-05, "loss": 1.8032, "step": 2413 }, { "epoch": 0.5561250935898174, "grad_norm": 0.3168938159942627, "learning_rate": 2.4457140713741237e-05, "loss": 1.8796, "step": 2414 }, { "epoch": 0.5563554685250245, "grad_norm": 0.30079004168510437, "learning_rate": 2.4437038152671354e-05, "loss": 1.8952, "step": 2415 }, { "epoch": 0.5565858434602315, "grad_norm": 0.30125516653060913, "learning_rate": 2.441693595577988e-05, "loss": 1.8677, "step": 2416 }, { "epoch": 0.5568162183954386, "grad_norm": 0.3655836284160614, "learning_rate": 2.4396834136070886e-05, "loss": 1.9096, "step": 2417 }, { "epoch": 0.5570465933306457, "grad_norm": 0.2724655568599701, "learning_rate": 2.4376732706548183e-05, "loss": 1.9056, "step": 2418 }, { "epoch": 0.5572769682658527, "grad_norm": 0.3083321154117584, "learning_rate": 2.4356631680215312e-05, "loss": 1.8334, "step": 2419 }, { "epoch": 0.5575073432010598, "grad_norm": 0.3010023534297943, "learning_rate": 2.4336531070075586e-05, "loss": 1.8602, "step": 2420 }, { "epoch": 0.5577377181362668, "grad_norm": 0.3055213391780853, "learning_rate": 2.431643088913202e-05, "loss": 1.8709, "step": 2421 }, { "epoch": 0.5579680930714738, "grad_norm": 0.28051331639289856, "learning_rate": 2.429633115038737e-05, "loss": 1.8883, "step": 2422 }, { "epoch": 0.5581984680066808, "grad_norm": 0.29171067476272583, "learning_rate": 2.4276231866844107e-05, "loss": 1.8435, "step": 2423 }, { "epoch": 0.5584288429418879, "grad_norm": 0.2991873025894165, "learning_rate": 2.425613305150439e-05, "loss": 1.8784, "step": 2424 }, { "epoch": 0.5586592178770949, "grad_norm": 0.3011345863342285, "learning_rate": 2.423603471737008e-05, "loss": 1.873, "step": 2425 }, { "epoch": 0.558889592812302, "grad_norm": 0.3309011161327362, "learning_rate": 2.4215936877442756e-05, "loss": 1.8112, "step": 2426 }, { "epoch": 0.559119967747509, "grad_norm": 0.293987900018692, "learning_rate": 2.4195839544723632e-05, "loss": 1.8414, "step": 2427 }, { "epoch": 0.5593503426827161, "grad_norm": 0.29255664348602295, "learning_rate": 2.4175742732213628e-05, "loss": 1.8959, "step": 2428 }, { "epoch": 0.5595807176179232, "grad_norm": 0.2941856384277344, "learning_rate": 2.4155646452913296e-05, "loss": 1.8407, "step": 2429 }, { "epoch": 0.5598110925531302, "grad_norm": 0.2960599660873413, "learning_rate": 2.4135550719822883e-05, "loss": 1.8319, "step": 2430 }, { "epoch": 0.5600414674883373, "grad_norm": 0.3027991056442261, "learning_rate": 2.411545554594226e-05, "loss": 1.8891, "step": 2431 }, { "epoch": 0.5602718424235443, "grad_norm": 0.2846747934818268, "learning_rate": 2.4095360944270917e-05, "loss": 1.8469, "step": 2432 }, { "epoch": 0.5605022173587514, "grad_norm": 0.2950774133205414, "learning_rate": 2.4075266927808018e-05, "loss": 1.8606, "step": 2433 }, { "epoch": 0.5607325922939584, "grad_norm": 0.28470537066459656, "learning_rate": 2.405517350955232e-05, "loss": 1.818, "step": 2434 }, { "epoch": 0.5609629672291655, "grad_norm": 0.303853303194046, "learning_rate": 2.4035080702502186e-05, "loss": 1.8586, "step": 2435 }, { "epoch": 0.5611933421643726, "grad_norm": 0.3355226516723633, "learning_rate": 2.4014988519655618e-05, "loss": 1.8805, "step": 2436 }, { "epoch": 0.5614237170995796, "grad_norm": 0.3010392189025879, "learning_rate": 2.3994896974010164e-05, "loss": 1.8842, "step": 2437 }, { "epoch": 0.5616540920347867, "grad_norm": 0.2781691551208496, "learning_rate": 2.397480607856301e-05, "loss": 1.8505, "step": 2438 }, { "epoch": 0.5618844669699936, "grad_norm": 0.3437471091747284, "learning_rate": 2.3954715846310893e-05, "loss": 1.8603, "step": 2439 }, { "epoch": 0.5621148419052007, "grad_norm": 0.2741662859916687, "learning_rate": 2.3934626290250118e-05, "loss": 1.8362, "step": 2440 }, { "epoch": 0.5623452168404077, "grad_norm": 0.3647768199443817, "learning_rate": 2.391453742337657e-05, "loss": 1.8181, "step": 2441 }, { "epoch": 0.5625755917756148, "grad_norm": 0.2769005298614502, "learning_rate": 2.389444925868569e-05, "loss": 1.876, "step": 2442 }, { "epoch": 0.5628059667108218, "grad_norm": 0.2976566255092621, "learning_rate": 2.387436180917243e-05, "loss": 1.8672, "step": 2443 }, { "epoch": 0.5630363416460289, "grad_norm": 0.3298698365688324, "learning_rate": 2.385427508783133e-05, "loss": 1.8818, "step": 2444 }, { "epoch": 0.563266716581236, "grad_norm": 0.26637837290763855, "learning_rate": 2.3834189107656417e-05, "loss": 1.8521, "step": 2445 }, { "epoch": 0.563497091516443, "grad_norm": 0.29219019412994385, "learning_rate": 2.3814103881641254e-05, "loss": 1.8677, "step": 2446 }, { "epoch": 0.5637274664516501, "grad_norm": 0.3123008906841278, "learning_rate": 2.379401942277893e-05, "loss": 1.8276, "step": 2447 }, { "epoch": 0.5639578413868571, "grad_norm": 0.3081786334514618, "learning_rate": 2.377393574406201e-05, "loss": 1.8601, "step": 2448 }, { "epoch": 0.5641882163220642, "grad_norm": 0.30329057574272156, "learning_rate": 2.375385285848257e-05, "loss": 1.8407, "step": 2449 }, { "epoch": 0.5644185912572712, "grad_norm": 0.28583553433418274, "learning_rate": 2.3733770779032184e-05, "loss": 1.8497, "step": 2450 }, { "epoch": 0.5646489661924783, "grad_norm": 0.26884129643440247, "learning_rate": 2.3713689518701882e-05, "loss": 1.8753, "step": 2451 }, { "epoch": 0.5648793411276853, "grad_norm": 0.2819401025772095, "learning_rate": 2.369360909048218e-05, "loss": 1.8764, "step": 2452 }, { "epoch": 0.5651097160628924, "grad_norm": 0.3177030682563782, "learning_rate": 2.3673529507363037e-05, "loss": 1.8305, "step": 2453 }, { "epoch": 0.5653400909980995, "grad_norm": 0.31076669692993164, "learning_rate": 2.365345078233389e-05, "loss": 1.8071, "step": 2454 }, { "epoch": 0.5655704659333065, "grad_norm": 0.28817838430404663, "learning_rate": 2.3633372928383618e-05, "loss": 1.8221, "step": 2455 }, { "epoch": 0.5658008408685135, "grad_norm": 0.2931694984436035, "learning_rate": 2.36132959585005e-05, "loss": 1.8815, "step": 2456 }, { "epoch": 0.5660312158037205, "grad_norm": 0.28011491894721985, "learning_rate": 2.35932198856723e-05, "loss": 1.8367, "step": 2457 }, { "epoch": 0.5662615907389276, "grad_norm": 0.2766767144203186, "learning_rate": 2.3573144722886157e-05, "loss": 1.8405, "step": 2458 }, { "epoch": 0.5664919656741346, "grad_norm": 0.606307864189148, "learning_rate": 2.355307048312863e-05, "loss": 1.8776, "step": 2459 }, { "epoch": 0.5667223406093417, "grad_norm": 0.30590906739234924, "learning_rate": 2.3532997179385713e-05, "loss": 1.8428, "step": 2460 }, { "epoch": 0.5669527155445487, "grad_norm": 0.31158676743507385, "learning_rate": 2.351292482464274e-05, "loss": 1.8581, "step": 2461 }, { "epoch": 0.5671830904797558, "grad_norm": 0.2902747094631195, "learning_rate": 2.3492853431884476e-05, "loss": 1.8224, "step": 2462 }, { "epoch": 0.5674134654149628, "grad_norm": 0.3056465685367584, "learning_rate": 2.3472783014095055e-05, "loss": 1.8614, "step": 2463 }, { "epoch": 0.5676438403501699, "grad_norm": 0.32567542791366577, "learning_rate": 2.3452713584257955e-05, "loss": 1.8428, "step": 2464 }, { "epoch": 0.567874215285377, "grad_norm": 0.30747154355049133, "learning_rate": 2.343264515535605e-05, "loss": 1.8748, "step": 2465 }, { "epoch": 0.568104590220584, "grad_norm": 0.2881675362586975, "learning_rate": 2.3412577740371533e-05, "loss": 1.8951, "step": 2466 }, { "epoch": 0.5683349651557911, "grad_norm": 0.3648599684238434, "learning_rate": 2.3392511352285967e-05, "loss": 1.8763, "step": 2467 }, { "epoch": 0.5685653400909981, "grad_norm": 0.3094540536403656, "learning_rate": 2.3372446004080252e-05, "loss": 1.8747, "step": 2468 }, { "epoch": 0.5687957150262052, "grad_norm": 0.29514652490615845, "learning_rate": 2.3352381708734576e-05, "loss": 1.8677, "step": 2469 }, { "epoch": 0.5690260899614122, "grad_norm": 0.3031538128852844, "learning_rate": 2.3332318479228503e-05, "loss": 1.876, "step": 2470 }, { "epoch": 0.5692564648966193, "grad_norm": 0.29497894644737244, "learning_rate": 2.331225632854087e-05, "loss": 1.8749, "step": 2471 }, { "epoch": 0.5694868398318264, "grad_norm": 0.3408273756504059, "learning_rate": 2.3292195269649815e-05, "loss": 1.8729, "step": 2472 }, { "epoch": 0.5697172147670333, "grad_norm": 0.30803605914115906, "learning_rate": 2.32721353155328e-05, "loss": 1.867, "step": 2473 }, { "epoch": 0.5699475897022404, "grad_norm": 0.3258834183216095, "learning_rate": 2.3252076479166536e-05, "loss": 1.8984, "step": 2474 }, { "epoch": 0.5701779646374474, "grad_norm": 0.2957330346107483, "learning_rate": 2.3232018773527028e-05, "loss": 1.8944, "step": 2475 }, { "epoch": 0.5704083395726545, "grad_norm": 0.2733860909938812, "learning_rate": 2.3211962211589562e-05, "loss": 1.8562, "step": 2476 }, { "epoch": 0.5706387145078615, "grad_norm": 0.30323314666748047, "learning_rate": 2.3191906806328657e-05, "loss": 1.8151, "step": 2477 }, { "epoch": 0.5708690894430686, "grad_norm": 0.2861703038215637, "learning_rate": 2.3171852570718097e-05, "loss": 1.835, "step": 2478 }, { "epoch": 0.5710994643782756, "grad_norm": 0.3100745379924774, "learning_rate": 2.3151799517730927e-05, "loss": 1.8102, "step": 2479 }, { "epoch": 0.5713298393134827, "grad_norm": 0.30160632729530334, "learning_rate": 2.3131747660339394e-05, "loss": 1.8657, "step": 2480 }, { "epoch": 0.5715602142486897, "grad_norm": 0.2877978980541229, "learning_rate": 2.3111697011515e-05, "loss": 1.858, "step": 2481 }, { "epoch": 0.5717905891838968, "grad_norm": 0.3101891279220581, "learning_rate": 2.309164758422843e-05, "loss": 1.8475, "step": 2482 }, { "epoch": 0.5720209641191039, "grad_norm": 0.2647848427295685, "learning_rate": 2.3071599391449624e-05, "loss": 1.8644, "step": 2483 }, { "epoch": 0.5722513390543109, "grad_norm": 0.2915893793106079, "learning_rate": 2.30515524461477e-05, "loss": 1.8549, "step": 2484 }, { "epoch": 0.572481713989518, "grad_norm": 0.3442462086677551, "learning_rate": 2.3031506761290957e-05, "loss": 1.8138, "step": 2485 }, { "epoch": 0.572712088924725, "grad_norm": 0.2868284285068512, "learning_rate": 2.3011462349846905e-05, "loss": 1.8768, "step": 2486 }, { "epoch": 0.5729424638599321, "grad_norm": 0.2741530239582062, "learning_rate": 2.29914192247822e-05, "loss": 1.8652, "step": 2487 }, { "epoch": 0.5731728387951391, "grad_norm": 0.313733845949173, "learning_rate": 2.2971377399062696e-05, "loss": 1.842, "step": 2488 }, { "epoch": 0.5734032137303461, "grad_norm": 0.28700318932533264, "learning_rate": 2.2951336885653398e-05, "loss": 1.8227, "step": 2489 }, { "epoch": 0.5736335886655531, "grad_norm": 0.2802146375179291, "learning_rate": 2.2931297697518432e-05, "loss": 1.8649, "step": 2490 }, { "epoch": 0.5738639636007602, "grad_norm": 0.29331615567207336, "learning_rate": 2.2911259847621123e-05, "loss": 1.8315, "step": 2491 }, { "epoch": 0.5740943385359673, "grad_norm": 0.2908317446708679, "learning_rate": 2.2891223348923884e-05, "loss": 1.8188, "step": 2492 }, { "epoch": 0.5743247134711743, "grad_norm": 0.2693787217140198, "learning_rate": 2.2871188214388263e-05, "loss": 1.8144, "step": 2493 }, { "epoch": 0.5745550884063814, "grad_norm": 0.30492302775382996, "learning_rate": 2.285115445697495e-05, "loss": 1.8171, "step": 2494 }, { "epoch": 0.5747854633415884, "grad_norm": 0.3689476549625397, "learning_rate": 2.283112208964371e-05, "loss": 1.7939, "step": 2495 }, { "epoch": 0.5750158382767955, "grad_norm": 0.2953653633594513, "learning_rate": 2.2811091125353417e-05, "loss": 1.8685, "step": 2496 }, { "epoch": 0.5752462132120025, "grad_norm": 0.3068760931491852, "learning_rate": 2.2791061577062076e-05, "loss": 1.8315, "step": 2497 }, { "epoch": 0.5754765881472096, "grad_norm": 0.2867404520511627, "learning_rate": 2.2771033457726714e-05, "loss": 1.8128, "step": 2498 }, { "epoch": 0.5757069630824166, "grad_norm": 0.3173063099384308, "learning_rate": 2.2751006780303474e-05, "loss": 1.8771, "step": 2499 }, { "epoch": 0.5759373380176237, "grad_norm": 0.3253519535064697, "learning_rate": 2.273098155774757e-05, "loss": 1.8486, "step": 2500 }, { "epoch": 0.5761677129528308, "grad_norm": 0.2744447886943817, "learning_rate": 2.2710957803013247e-05, "loss": 1.8657, "step": 2501 }, { "epoch": 0.5763980878880378, "grad_norm": 0.36645641922950745, "learning_rate": 2.2690935529053823e-05, "loss": 1.7947, "step": 2502 }, { "epoch": 0.5766284628232449, "grad_norm": 0.29170697927474976, "learning_rate": 2.2670914748821637e-05, "loss": 1.849, "step": 2503 }, { "epoch": 0.5768588377584519, "grad_norm": 0.306156188249588, "learning_rate": 2.2650895475268086e-05, "loss": 1.8332, "step": 2504 }, { "epoch": 0.577089212693659, "grad_norm": 0.29651039838790894, "learning_rate": 2.263087772134359e-05, "loss": 1.8612, "step": 2505 }, { "epoch": 0.5773195876288659, "grad_norm": 0.2843754291534424, "learning_rate": 2.261086149999755e-05, "loss": 1.8625, "step": 2506 }, { "epoch": 0.577549962564073, "grad_norm": 0.2866470217704773, "learning_rate": 2.2590846824178426e-05, "loss": 1.83, "step": 2507 }, { "epoch": 0.57778033749928, "grad_norm": 0.29508906602859497, "learning_rate": 2.257083370683365e-05, "loss": 1.848, "step": 2508 }, { "epoch": 0.5780107124344871, "grad_norm": 0.3443294167518616, "learning_rate": 2.255082216090964e-05, "loss": 1.8394, "step": 2509 }, { "epoch": 0.5782410873696942, "grad_norm": 0.2783217430114746, "learning_rate": 2.253081219935183e-05, "loss": 1.8467, "step": 2510 }, { "epoch": 0.5784714623049012, "grad_norm": 0.2737693786621094, "learning_rate": 2.251080383510459e-05, "loss": 1.8516, "step": 2511 }, { "epoch": 0.5787018372401083, "grad_norm": 0.27773216366767883, "learning_rate": 2.249079708111127e-05, "loss": 1.851, "step": 2512 }, { "epoch": 0.5789322121753153, "grad_norm": 0.29402753710746765, "learning_rate": 2.24707919503142e-05, "loss": 1.8402, "step": 2513 }, { "epoch": 0.5791625871105224, "grad_norm": 0.2904743254184723, "learning_rate": 2.2450788455654627e-05, "loss": 1.8475, "step": 2514 }, { "epoch": 0.5793929620457294, "grad_norm": 0.3069475293159485, "learning_rate": 2.2430786610072757e-05, "loss": 1.7986, "step": 2515 }, { "epoch": 0.5796233369809365, "grad_norm": 0.30780383944511414, "learning_rate": 2.241078642650774e-05, "loss": 1.8338, "step": 2516 }, { "epoch": 0.5798537119161435, "grad_norm": 0.2938470244407654, "learning_rate": 2.2390787917897628e-05, "loss": 1.8579, "step": 2517 }, { "epoch": 0.5800840868513506, "grad_norm": 0.2789580225944519, "learning_rate": 2.23707910971794e-05, "loss": 1.8582, "step": 2518 }, { "epoch": 0.5803144617865577, "grad_norm": 0.28654512763023376, "learning_rate": 2.235079597728893e-05, "loss": 1.858, "step": 2519 }, { "epoch": 0.5805448367217647, "grad_norm": 0.3018505573272705, "learning_rate": 2.233080257116103e-05, "loss": 1.8404, "step": 2520 }, { "epoch": 0.5807752116569718, "grad_norm": 0.30153289437294006, "learning_rate": 2.2310810891729365e-05, "loss": 1.8502, "step": 2521 }, { "epoch": 0.5810055865921788, "grad_norm": 0.2914064824581146, "learning_rate": 2.2290820951926487e-05, "loss": 1.8251, "step": 2522 }, { "epoch": 0.5812359615273858, "grad_norm": 0.3089613616466522, "learning_rate": 2.2270832764683846e-05, "loss": 1.8141, "step": 2523 }, { "epoch": 0.5814663364625928, "grad_norm": 0.3037077486515045, "learning_rate": 2.2250846342931734e-05, "loss": 1.8533, "step": 2524 }, { "epoch": 0.5816967113977999, "grad_norm": 0.31734463572502136, "learning_rate": 2.2230861699599308e-05, "loss": 1.8584, "step": 2525 }, { "epoch": 0.5819270863330069, "grad_norm": 0.29001158475875854, "learning_rate": 2.2210878847614592e-05, "loss": 1.8648, "step": 2526 }, { "epoch": 0.582157461268214, "grad_norm": 0.2988487780094147, "learning_rate": 2.2190897799904423e-05, "loss": 1.8812, "step": 2527 }, { "epoch": 0.582387836203421, "grad_norm": 0.3096851706504822, "learning_rate": 2.217091856939448e-05, "loss": 1.836, "step": 2528 }, { "epoch": 0.5826182111386281, "grad_norm": 0.28203511238098145, "learning_rate": 2.2150941169009294e-05, "loss": 1.8594, "step": 2529 }, { "epoch": 0.5828485860738352, "grad_norm": 0.30351686477661133, "learning_rate": 2.213096561167216e-05, "loss": 1.8098, "step": 2530 }, { "epoch": 0.5830789610090422, "grad_norm": 0.2948378622531891, "learning_rate": 2.2110991910305232e-05, "loss": 1.8707, "step": 2531 }, { "epoch": 0.5833093359442493, "grad_norm": 0.29262858629226685, "learning_rate": 2.2091020077829423e-05, "loss": 1.8345, "step": 2532 }, { "epoch": 0.5835397108794563, "grad_norm": 0.2837717533111572, "learning_rate": 2.2071050127164456e-05, "loss": 1.809, "step": 2533 }, { "epoch": 0.5837700858146634, "grad_norm": 0.28772294521331787, "learning_rate": 2.2051082071228854e-05, "loss": 1.8396, "step": 2534 }, { "epoch": 0.5840004607498704, "grad_norm": 0.29440733790397644, "learning_rate": 2.203111592293988e-05, "loss": 1.8422, "step": 2535 }, { "epoch": 0.5842308356850775, "grad_norm": 0.2783462703227997, "learning_rate": 2.2011151695213573e-05, "loss": 1.8603, "step": 2536 }, { "epoch": 0.5844612106202846, "grad_norm": 0.30907559394836426, "learning_rate": 2.1991189400964757e-05, "loss": 1.8295, "step": 2537 }, { "epoch": 0.5846915855554916, "grad_norm": 0.2937646210193634, "learning_rate": 2.1971229053106963e-05, "loss": 1.807, "step": 2538 }, { "epoch": 0.5849219604906987, "grad_norm": 0.6642971634864807, "learning_rate": 2.1951270664552498e-05, "loss": 1.8219, "step": 2539 }, { "epoch": 0.5851523354259056, "grad_norm": 0.3090325593948364, "learning_rate": 2.1931314248212366e-05, "loss": 1.8211, "step": 2540 }, { "epoch": 0.5853827103611127, "grad_norm": 0.28034722805023193, "learning_rate": 2.1911359816996342e-05, "loss": 1.8511, "step": 2541 }, { "epoch": 0.5856130852963197, "grad_norm": 0.29988551139831543, "learning_rate": 2.189140738381288e-05, "loss": 1.812, "step": 2542 }, { "epoch": 0.5858434602315268, "grad_norm": 0.2717093825340271, "learning_rate": 2.1871456961569137e-05, "loss": 1.8998, "step": 2543 }, { "epoch": 0.5860738351667338, "grad_norm": 0.29669463634490967, "learning_rate": 2.1851508563171008e-05, "loss": 1.8451, "step": 2544 }, { "epoch": 0.5863042101019409, "grad_norm": 0.315329372882843, "learning_rate": 2.183156220152305e-05, "loss": 1.869, "step": 2545 }, { "epoch": 0.586534585037148, "grad_norm": 0.307468980550766, "learning_rate": 2.181161788952849e-05, "loss": 1.8527, "step": 2546 }, { "epoch": 0.586764959972355, "grad_norm": 0.2745719254016876, "learning_rate": 2.1791675640089278e-05, "loss": 1.8239, "step": 2547 }, { "epoch": 0.5869953349075621, "grad_norm": 0.2886756360530853, "learning_rate": 2.177173546610597e-05, "loss": 1.8474, "step": 2548 }, { "epoch": 0.5872257098427691, "grad_norm": 0.3234466314315796, "learning_rate": 2.175179738047781e-05, "loss": 1.8078, "step": 2549 }, { "epoch": 0.5874560847779762, "grad_norm": 0.2915794551372528, "learning_rate": 2.1731861396102714e-05, "loss": 1.8343, "step": 2550 }, { "epoch": 0.5876864597131832, "grad_norm": 0.28853878378868103, "learning_rate": 2.1711927525877192e-05, "loss": 1.8641, "step": 2551 }, { "epoch": 0.5879168346483903, "grad_norm": 0.28549548983573914, "learning_rate": 2.1691995782696415e-05, "loss": 1.8403, "step": 2552 }, { "epoch": 0.5881472095835973, "grad_norm": 0.2723354697227478, "learning_rate": 2.1672066179454158e-05, "loss": 1.8831, "step": 2553 }, { "epoch": 0.5883775845188044, "grad_norm": 0.2954583764076233, "learning_rate": 2.1652138729042846e-05, "loss": 1.7923, "step": 2554 }, { "epoch": 0.5886079594540115, "grad_norm": 0.2957284450531006, "learning_rate": 2.1632213444353482e-05, "loss": 1.856, "step": 2555 }, { "epoch": 0.5888383343892184, "grad_norm": 0.28720954060554504, "learning_rate": 2.1612290338275664e-05, "loss": 1.8639, "step": 2556 }, { "epoch": 0.5890687093244255, "grad_norm": 0.28308477997779846, "learning_rate": 2.159236942369761e-05, "loss": 1.8478, "step": 2557 }, { "epoch": 0.5892990842596325, "grad_norm": 0.29915404319763184, "learning_rate": 2.1572450713506098e-05, "loss": 1.8231, "step": 2558 }, { "epoch": 0.5895294591948396, "grad_norm": 0.3009408414363861, "learning_rate": 2.1552534220586475e-05, "loss": 1.7832, "step": 2559 }, { "epoch": 0.5897598341300466, "grad_norm": 0.2969898581504822, "learning_rate": 2.153261995782268e-05, "loss": 1.8756, "step": 2560 }, { "epoch": 0.5899902090652537, "grad_norm": 0.3001381754875183, "learning_rate": 2.1512707938097182e-05, "loss": 1.8321, "step": 2561 }, { "epoch": 0.5902205840004607, "grad_norm": 0.29610010981559753, "learning_rate": 2.1492798174291008e-05, "loss": 1.805, "step": 2562 }, { "epoch": 0.5904509589356678, "grad_norm": 0.28272441029548645, "learning_rate": 2.1472890679283742e-05, "loss": 1.8865, "step": 2563 }, { "epoch": 0.5906813338708748, "grad_norm": 0.3029026687145233, "learning_rate": 2.1452985465953466e-05, "loss": 1.8263, "step": 2564 }, { "epoch": 0.5909117088060819, "grad_norm": 0.31427106261253357, "learning_rate": 2.1433082547176817e-05, "loss": 1.7995, "step": 2565 }, { "epoch": 0.591142083741289, "grad_norm": 0.31010016798973083, "learning_rate": 2.1413181935828948e-05, "loss": 1.8498, "step": 2566 }, { "epoch": 0.591372458676496, "grad_norm": 0.2968805432319641, "learning_rate": 2.1393283644783486e-05, "loss": 1.845, "step": 2567 }, { "epoch": 0.5916028336117031, "grad_norm": 0.2810072600841522, "learning_rate": 2.1373387686912592e-05, "loss": 1.8328, "step": 2568 }, { "epoch": 0.5918332085469101, "grad_norm": 0.29528313875198364, "learning_rate": 2.1353494075086894e-05, "loss": 1.8673, "step": 2569 }, { "epoch": 0.5920635834821172, "grad_norm": 0.32670342922210693, "learning_rate": 2.1333602822175526e-05, "loss": 1.824, "step": 2570 }, { "epoch": 0.5922939584173242, "grad_norm": 0.33715903759002686, "learning_rate": 2.131371394104608e-05, "loss": 1.8375, "step": 2571 }, { "epoch": 0.5925243333525313, "grad_norm": 0.33614447712898254, "learning_rate": 2.129382744456461e-05, "loss": 1.8998, "step": 2572 }, { "epoch": 0.5927547082877382, "grad_norm": 0.2827538847923279, "learning_rate": 2.1273943345595637e-05, "loss": 1.8463, "step": 2573 }, { "epoch": 0.5929850832229453, "grad_norm": 0.2857297956943512, "learning_rate": 2.125406165700214e-05, "loss": 1.8341, "step": 2574 }, { "epoch": 0.5932154581581524, "grad_norm": 0.2878466546535492, "learning_rate": 2.12341823916455e-05, "loss": 1.8376, "step": 2575 }, { "epoch": 0.5934458330933594, "grad_norm": 0.2947518229484558, "learning_rate": 2.1214305562385592e-05, "loss": 1.8918, "step": 2576 }, { "epoch": 0.5936762080285665, "grad_norm": 0.2864413261413574, "learning_rate": 2.1194431182080645e-05, "loss": 1.8381, "step": 2577 }, { "epoch": 0.5939065829637735, "grad_norm": 0.3152362108230591, "learning_rate": 2.1174559263587356e-05, "loss": 1.8793, "step": 2578 }, { "epoch": 0.5941369578989806, "grad_norm": 0.28977373242378235, "learning_rate": 2.115468981976083e-05, "loss": 1.85, "step": 2579 }, { "epoch": 0.5943673328341876, "grad_norm": 0.3054090440273285, "learning_rate": 2.113482286345452e-05, "loss": 1.8531, "step": 2580 }, { "epoch": 0.5945977077693947, "grad_norm": 0.2902108132839203, "learning_rate": 2.111495840752033e-05, "loss": 1.8674, "step": 2581 }, { "epoch": 0.5948280827046017, "grad_norm": 0.2926919460296631, "learning_rate": 2.1095096464808506e-05, "loss": 1.8893, "step": 2582 }, { "epoch": 0.5950584576398088, "grad_norm": 0.29574039578437805, "learning_rate": 2.107523704816768e-05, "loss": 1.782, "step": 2583 }, { "epoch": 0.5952888325750159, "grad_norm": 0.2679656147956848, "learning_rate": 2.105538017044487e-05, "loss": 1.8509, "step": 2584 }, { "epoch": 0.5955192075102229, "grad_norm": 0.29585549235343933, "learning_rate": 2.1035525844485415e-05, "loss": 1.809, "step": 2585 }, { "epoch": 0.59574958244543, "grad_norm": 0.27863675355911255, "learning_rate": 2.1015674083133026e-05, "loss": 1.8667, "step": 2586 }, { "epoch": 0.595979957380637, "grad_norm": 0.29813405871391296, "learning_rate": 2.0995824899229766e-05, "loss": 1.8588, "step": 2587 }, { "epoch": 0.5962103323158441, "grad_norm": 0.38296520709991455, "learning_rate": 2.0975978305616e-05, "loss": 1.8088, "step": 2588 }, { "epoch": 0.5964407072510511, "grad_norm": 0.3027479648590088, "learning_rate": 2.0956134315130436e-05, "loss": 1.8581, "step": 2589 }, { "epoch": 0.5966710821862581, "grad_norm": 0.2989402413368225, "learning_rate": 2.0936292940610094e-05, "loss": 1.8361, "step": 2590 }, { "epoch": 0.5969014571214651, "grad_norm": 0.2920522689819336, "learning_rate": 2.0916454194890307e-05, "loss": 1.8596, "step": 2591 }, { "epoch": 0.5971318320566722, "grad_norm": 0.2853662669658661, "learning_rate": 2.0896618090804705e-05, "loss": 1.8458, "step": 2592 }, { "epoch": 0.5973622069918793, "grad_norm": 0.318293035030365, "learning_rate": 2.0876784641185197e-05, "loss": 1.82, "step": 2593 }, { "epoch": 0.5975925819270863, "grad_norm": 0.3034115433692932, "learning_rate": 2.0856953858861995e-05, "loss": 1.8313, "step": 2594 }, { "epoch": 0.5978229568622934, "grad_norm": 0.29821979999542236, "learning_rate": 2.0837125756663574e-05, "loss": 1.8243, "step": 2595 }, { "epoch": 0.5980533317975004, "grad_norm": 0.2782273292541504, "learning_rate": 2.081730034741667e-05, "loss": 1.8821, "step": 2596 }, { "epoch": 0.5982837067327075, "grad_norm": 0.3095095455646515, "learning_rate": 2.07974776439463e-05, "loss": 1.8786, "step": 2597 }, { "epoch": 0.5985140816679145, "grad_norm": 0.3202097713947296, "learning_rate": 2.07776576590757e-05, "loss": 1.8893, "step": 2598 }, { "epoch": 0.5987444566031216, "grad_norm": 0.3395479917526245, "learning_rate": 2.0757840405626358e-05, "loss": 1.8282, "step": 2599 }, { "epoch": 0.5989748315383286, "grad_norm": 0.3024066090583801, "learning_rate": 2.0738025896418016e-05, "loss": 1.8399, "step": 2600 }, { "epoch": 0.5992052064735357, "grad_norm": 0.26807546615600586, "learning_rate": 2.0718214144268616e-05, "loss": 1.8496, "step": 2601 }, { "epoch": 0.5994355814087428, "grad_norm": 0.30377814173698425, "learning_rate": 2.0698405161994314e-05, "loss": 1.8317, "step": 2602 }, { "epoch": 0.5996659563439498, "grad_norm": 0.28162291646003723, "learning_rate": 2.0678598962409504e-05, "loss": 1.8499, "step": 2603 }, { "epoch": 0.5998963312791569, "grad_norm": 0.2837395668029785, "learning_rate": 2.0658795558326743e-05, "loss": 1.7889, "step": 2604 }, { "epoch": 0.6001267062143639, "grad_norm": 0.2979898154735565, "learning_rate": 2.063899496255681e-05, "loss": 1.8261, "step": 2605 }, { "epoch": 0.600357081149571, "grad_norm": 0.3074885606765747, "learning_rate": 2.0619197187908638e-05, "loss": 1.8749, "step": 2606 }, { "epoch": 0.6005874560847779, "grad_norm": 0.30022934079170227, "learning_rate": 2.0599402247189364e-05, "loss": 1.8569, "step": 2607 }, { "epoch": 0.600817831019985, "grad_norm": 0.3287382423877716, "learning_rate": 2.057961015320428e-05, "loss": 1.8023, "step": 2608 }, { "epoch": 0.601048205955192, "grad_norm": 0.2562629282474518, "learning_rate": 2.0559820918756825e-05, "loss": 1.863, "step": 2609 }, { "epoch": 0.6012785808903991, "grad_norm": 0.31106942892074585, "learning_rate": 2.054003455664861e-05, "loss": 1.7658, "step": 2610 }, { "epoch": 0.6015089558256062, "grad_norm": 0.27053338289260864, "learning_rate": 2.0520251079679373e-05, "loss": 1.8368, "step": 2611 }, { "epoch": 0.6017393307608132, "grad_norm": 0.2798379957675934, "learning_rate": 2.0500470500646978e-05, "loss": 1.8198, "step": 2612 }, { "epoch": 0.6019697056960203, "grad_norm": 0.3092193901538849, "learning_rate": 2.0480692832347446e-05, "loss": 1.818, "step": 2613 }, { "epoch": 0.6022000806312273, "grad_norm": 0.30552536249160767, "learning_rate": 2.0460918087574877e-05, "loss": 1.814, "step": 2614 }, { "epoch": 0.6024304555664344, "grad_norm": 0.26858285069465637, "learning_rate": 2.0441146279121496e-05, "loss": 1.8387, "step": 2615 }, { "epoch": 0.6026608305016414, "grad_norm": 0.2703883945941925, "learning_rate": 2.0421377419777648e-05, "loss": 1.8368, "step": 2616 }, { "epoch": 0.6028912054368485, "grad_norm": 0.2770843207836151, "learning_rate": 2.040161152233174e-05, "loss": 1.8572, "step": 2617 }, { "epoch": 0.6031215803720555, "grad_norm": 0.3309488296508789, "learning_rate": 2.0381848599570276e-05, "loss": 1.8396, "step": 2618 }, { "epoch": 0.6033519553072626, "grad_norm": 0.30366387963294983, "learning_rate": 2.0362088664277823e-05, "loss": 1.861, "step": 2619 }, { "epoch": 0.6035823302424697, "grad_norm": 0.2881322205066681, "learning_rate": 2.0342331729237046e-05, "loss": 1.848, "step": 2620 }, { "epoch": 0.6038127051776767, "grad_norm": 0.27569669485092163, "learning_rate": 2.0322577807228648e-05, "loss": 1.8452, "step": 2621 }, { "epoch": 0.6040430801128838, "grad_norm": 0.2932263910770416, "learning_rate": 2.0302826911031368e-05, "loss": 1.7915, "step": 2622 }, { "epoch": 0.6042734550480907, "grad_norm": 0.28159990906715393, "learning_rate": 2.028307905342202e-05, "loss": 1.8245, "step": 2623 }, { "epoch": 0.6045038299832978, "grad_norm": 0.3174625337123871, "learning_rate": 2.0263334247175445e-05, "loss": 1.8319, "step": 2624 }, { "epoch": 0.6047342049185048, "grad_norm": 0.2774382531642914, "learning_rate": 2.0243592505064474e-05, "loss": 1.8838, "step": 2625 }, { "epoch": 0.6049645798537119, "grad_norm": 0.292835533618927, "learning_rate": 2.0223853839860018e-05, "loss": 1.8623, "step": 2626 }, { "epoch": 0.6051949547889189, "grad_norm": 0.2814779579639435, "learning_rate": 2.020411826433093e-05, "loss": 1.829, "step": 2627 }, { "epoch": 0.605425329724126, "grad_norm": 0.3027658462524414, "learning_rate": 2.0184385791244117e-05, "loss": 1.788, "step": 2628 }, { "epoch": 0.605655704659333, "grad_norm": 0.2796262204647064, "learning_rate": 2.016465643336446e-05, "loss": 1.8175, "step": 2629 }, { "epoch": 0.6058860795945401, "grad_norm": 0.2987147569656372, "learning_rate": 2.0144930203454816e-05, "loss": 1.8263, "step": 2630 }, { "epoch": 0.6061164545297472, "grad_norm": 0.3067782521247864, "learning_rate": 2.0125207114276035e-05, "loss": 1.8274, "step": 2631 }, { "epoch": 0.6063468294649542, "grad_norm": 0.3148927092552185, "learning_rate": 2.0105487178586926e-05, "loss": 1.8438, "step": 2632 }, { "epoch": 0.6065772044001613, "grad_norm": 0.2963244318962097, "learning_rate": 2.008577040914425e-05, "loss": 1.8022, "step": 2633 }, { "epoch": 0.6068075793353683, "grad_norm": 0.29066532850265503, "learning_rate": 2.0066056818702745e-05, "loss": 1.8314, "step": 2634 }, { "epoch": 0.6070379542705754, "grad_norm": 0.28807196021080017, "learning_rate": 2.0046346420015067e-05, "loss": 1.8241, "step": 2635 }, { "epoch": 0.6072683292057824, "grad_norm": 0.28036564588546753, "learning_rate": 2.002663922583181e-05, "loss": 1.8552, "step": 2636 }, { "epoch": 0.6074987041409895, "grad_norm": 0.2690677344799042, "learning_rate": 2.000693524890152e-05, "loss": 1.8269, "step": 2637 }, { "epoch": 0.6077290790761966, "grad_norm": 0.2991630434989929, "learning_rate": 1.998723450197063e-05, "loss": 1.8673, "step": 2638 }, { "epoch": 0.6079594540114036, "grad_norm": 0.2787671983242035, "learning_rate": 1.9967536997783494e-05, "loss": 1.8409, "step": 2639 }, { "epoch": 0.6081898289466106, "grad_norm": 0.2987036108970642, "learning_rate": 1.9947842749082393e-05, "loss": 1.8691, "step": 2640 }, { "epoch": 0.6084202038818176, "grad_norm": 0.2780972719192505, "learning_rate": 1.992815176860746e-05, "loss": 1.8748, "step": 2641 }, { "epoch": 0.6086505788170247, "grad_norm": 0.2830334007740021, "learning_rate": 1.9908464069096742e-05, "loss": 1.8685, "step": 2642 }, { "epoch": 0.6088809537522317, "grad_norm": 0.26656150817871094, "learning_rate": 1.988877966328615e-05, "loss": 1.8399, "step": 2643 }, { "epoch": 0.6091113286874388, "grad_norm": 0.2936076521873474, "learning_rate": 1.9869098563909475e-05, "loss": 1.8416, "step": 2644 }, { "epoch": 0.6093417036226458, "grad_norm": 0.277855783700943, "learning_rate": 1.984942078369837e-05, "loss": 1.841, "step": 2645 }, { "epoch": 0.6095720785578529, "grad_norm": 0.28765249252319336, "learning_rate": 1.982974633538232e-05, "loss": 1.7869, "step": 2646 }, { "epoch": 0.60980245349306, "grad_norm": 0.292584627866745, "learning_rate": 1.981007523168869e-05, "loss": 1.8339, "step": 2647 }, { "epoch": 0.610032828428267, "grad_norm": 0.3170861303806305, "learning_rate": 1.979040748534264e-05, "loss": 1.8119, "step": 2648 }, { "epoch": 0.6102632033634741, "grad_norm": 0.32868313789367676, "learning_rate": 1.977074310906719e-05, "loss": 1.8708, "step": 2649 }, { "epoch": 0.6104935782986811, "grad_norm": 0.2933259904384613, "learning_rate": 1.9751082115583174e-05, "loss": 1.7937, "step": 2650 }, { "epoch": 0.6107239532338882, "grad_norm": 0.2894025146961212, "learning_rate": 1.9731424517609225e-05, "loss": 1.8427, "step": 2651 }, { "epoch": 0.6109543281690952, "grad_norm": 0.29598268866539, "learning_rate": 1.9711770327861783e-05, "loss": 1.8315, "step": 2652 }, { "epoch": 0.6111847031043023, "grad_norm": 0.30586478114128113, "learning_rate": 1.9692119559055104e-05, "loss": 1.7912, "step": 2653 }, { "epoch": 0.6114150780395093, "grad_norm": 0.2936316430568695, "learning_rate": 1.9672472223901198e-05, "loss": 1.8127, "step": 2654 }, { "epoch": 0.6116454529747164, "grad_norm": 0.3116845190525055, "learning_rate": 1.9652828335109882e-05, "loss": 1.843, "step": 2655 }, { "epoch": 0.6118758279099235, "grad_norm": 0.29598772525787354, "learning_rate": 1.9633187905388718e-05, "loss": 1.7984, "step": 2656 }, { "epoch": 0.6121062028451304, "grad_norm": 0.30691006779670715, "learning_rate": 1.9613550947443056e-05, "loss": 1.8153, "step": 2657 }, { "epoch": 0.6123365777803375, "grad_norm": 0.29004013538360596, "learning_rate": 1.9593917473975994e-05, "loss": 1.8703, "step": 2658 }, { "epoch": 0.6125669527155445, "grad_norm": 0.29097646474838257, "learning_rate": 1.9574287497688345e-05, "loss": 1.8379, "step": 2659 }, { "epoch": 0.6127973276507516, "grad_norm": 0.29144570231437683, "learning_rate": 1.9554661031278712e-05, "loss": 1.8225, "step": 2660 }, { "epoch": 0.6130277025859586, "grad_norm": 0.2721238434314728, "learning_rate": 1.953503808744339e-05, "loss": 1.8252, "step": 2661 }, { "epoch": 0.6132580775211657, "grad_norm": 0.3094329833984375, "learning_rate": 1.9515418678876398e-05, "loss": 1.8193, "step": 2662 }, { "epoch": 0.6134884524563727, "grad_norm": 0.26936590671539307, "learning_rate": 1.9495802818269493e-05, "loss": 1.8434, "step": 2663 }, { "epoch": 0.6137188273915798, "grad_norm": 0.2923469841480255, "learning_rate": 1.9476190518312103e-05, "loss": 1.8232, "step": 2664 }, { "epoch": 0.6139492023267868, "grad_norm": 0.29124948382377625, "learning_rate": 1.9456581791691373e-05, "loss": 1.8584, "step": 2665 }, { "epoch": 0.6141795772619939, "grad_norm": 0.27914559841156006, "learning_rate": 1.9436976651092144e-05, "loss": 1.8735, "step": 2666 }, { "epoch": 0.614409952197201, "grad_norm": 0.2884381115436554, "learning_rate": 1.941737510919691e-05, "loss": 1.8197, "step": 2667 }, { "epoch": 0.614640327132408, "grad_norm": 0.29700732231140137, "learning_rate": 1.9397777178685855e-05, "loss": 1.841, "step": 2668 }, { "epoch": 0.6148707020676151, "grad_norm": 0.27893298864364624, "learning_rate": 1.9378182872236837e-05, "loss": 1.8639, "step": 2669 }, { "epoch": 0.6151010770028221, "grad_norm": 0.28308361768722534, "learning_rate": 1.935859220252535e-05, "loss": 1.8465, "step": 2670 }, { "epoch": 0.6153314519380292, "grad_norm": 0.2699137032032013, "learning_rate": 1.9339005182224545e-05, "loss": 1.8701, "step": 2671 }, { "epoch": 0.6155618268732362, "grad_norm": 0.27939727902412415, "learning_rate": 1.9319421824005192e-05, "loss": 1.8527, "step": 2672 }, { "epoch": 0.6157922018084433, "grad_norm": 0.26618412137031555, "learning_rate": 1.9299842140535735e-05, "loss": 1.8182, "step": 2673 }, { "epoch": 0.6160225767436502, "grad_norm": 0.28039252758026123, "learning_rate": 1.928026614448221e-05, "loss": 1.8465, "step": 2674 }, { "epoch": 0.6162529516788573, "grad_norm": 0.29064783453941345, "learning_rate": 1.926069384850826e-05, "loss": 1.829, "step": 2675 }, { "epoch": 0.6164833266140644, "grad_norm": 0.26629796624183655, "learning_rate": 1.924112526527516e-05, "loss": 1.8428, "step": 2676 }, { "epoch": 0.6167137015492714, "grad_norm": 0.2812316119670868, "learning_rate": 1.9221560407441763e-05, "loss": 1.8837, "step": 2677 }, { "epoch": 0.6169440764844785, "grad_norm": 0.28533315658569336, "learning_rate": 1.9201999287664523e-05, "loss": 1.828, "step": 2678 }, { "epoch": 0.6171744514196855, "grad_norm": 0.28231483697891235, "learning_rate": 1.9182441918597484e-05, "loss": 1.8131, "step": 2679 }, { "epoch": 0.6174048263548926, "grad_norm": 0.2767685055732727, "learning_rate": 1.916288831289223e-05, "loss": 1.8574, "step": 2680 }, { "epoch": 0.6176352012900996, "grad_norm": 0.28399473428726196, "learning_rate": 1.914333848319795e-05, "loss": 1.8283, "step": 2681 }, { "epoch": 0.6178655762253067, "grad_norm": 0.30244871973991394, "learning_rate": 1.912379244216137e-05, "loss": 1.8289, "step": 2682 }, { "epoch": 0.6180959511605137, "grad_norm": 0.3040837347507477, "learning_rate": 1.910425020242676e-05, "loss": 1.7988, "step": 2683 }, { "epoch": 0.6183263260957208, "grad_norm": 0.2875714898109436, "learning_rate": 1.9084711776635958e-05, "loss": 1.8783, "step": 2684 }, { "epoch": 0.6185567010309279, "grad_norm": 0.29130589962005615, "learning_rate": 1.90651771774283e-05, "loss": 1.8437, "step": 2685 }, { "epoch": 0.6187870759661349, "grad_norm": 0.273703008890152, "learning_rate": 1.9045646417440657e-05, "loss": 1.838, "step": 2686 }, { "epoch": 0.619017450901342, "grad_norm": 0.2994091510772705, "learning_rate": 1.9026119509307443e-05, "loss": 1.8099, "step": 2687 }, { "epoch": 0.619247825836549, "grad_norm": 0.2950076162815094, "learning_rate": 1.9006596465660548e-05, "loss": 1.7664, "step": 2688 }, { "epoch": 0.6194782007717561, "grad_norm": 0.28494006395339966, "learning_rate": 1.8987077299129367e-05, "loss": 1.8645, "step": 2689 }, { "epoch": 0.619708575706963, "grad_norm": 0.280617892742157, "learning_rate": 1.896756202234081e-05, "loss": 1.8486, "step": 2690 }, { "epoch": 0.6199389506421701, "grad_norm": 0.3080570697784424, "learning_rate": 1.894805064791924e-05, "loss": 1.7658, "step": 2691 }, { "epoch": 0.6201693255773771, "grad_norm": 0.2850179076194763, "learning_rate": 1.892854318848652e-05, "loss": 1.8272, "step": 2692 }, { "epoch": 0.6203997005125842, "grad_norm": 0.2938685417175293, "learning_rate": 1.890903965666195e-05, "loss": 1.8872, "step": 2693 }, { "epoch": 0.6206300754477913, "grad_norm": 0.32391637563705444, "learning_rate": 1.8889540065062338e-05, "loss": 1.8335, "step": 2694 }, { "epoch": 0.6208604503829983, "grad_norm": 0.29258719086647034, "learning_rate": 1.88700444263019e-05, "loss": 1.8129, "step": 2695 }, { "epoch": 0.6210908253182054, "grad_norm": 0.3039275109767914, "learning_rate": 1.8850552752992298e-05, "loss": 1.8156, "step": 2696 }, { "epoch": 0.6213212002534124, "grad_norm": 0.2799144387245178, "learning_rate": 1.8831065057742657e-05, "loss": 1.8867, "step": 2697 }, { "epoch": 0.6215515751886195, "grad_norm": 0.31009629368782043, "learning_rate": 1.8811581353159518e-05, "loss": 1.8497, "step": 2698 }, { "epoch": 0.6217819501238265, "grad_norm": 0.289315402507782, "learning_rate": 1.8792101651846804e-05, "loss": 1.8285, "step": 2699 }, { "epoch": 0.6220123250590336, "grad_norm": 0.29361626505851746, "learning_rate": 1.877262596640591e-05, "loss": 1.8474, "step": 2700 }, { "epoch": 0.6222426999942406, "grad_norm": 0.28731948137283325, "learning_rate": 1.8753154309435582e-05, "loss": 1.8455, "step": 2701 }, { "epoch": 0.6224730749294477, "grad_norm": 0.29488179087638855, "learning_rate": 1.8733686693531985e-05, "loss": 1.8017, "step": 2702 }, { "epoch": 0.6227034498646548, "grad_norm": 0.30670037865638733, "learning_rate": 1.871422313128867e-05, "loss": 1.8219, "step": 2703 }, { "epoch": 0.6229338247998618, "grad_norm": 0.28978481888771057, "learning_rate": 1.869476363529656e-05, "loss": 1.8152, "step": 2704 }, { "epoch": 0.6231641997350689, "grad_norm": 0.2739492952823639, "learning_rate": 1.867530821814393e-05, "loss": 1.866, "step": 2705 }, { "epoch": 0.6233945746702759, "grad_norm": 0.2669689357280731, "learning_rate": 1.865585689241646e-05, "loss": 1.8425, "step": 2706 }, { "epoch": 0.6236249496054829, "grad_norm": 0.29171085357666016, "learning_rate": 1.863640967069714e-05, "loss": 1.8068, "step": 2707 }, { "epoch": 0.6238553245406899, "grad_norm": 0.28707852959632874, "learning_rate": 1.861696656556633e-05, "loss": 1.8186, "step": 2708 }, { "epoch": 0.624085699475897, "grad_norm": 0.3037241995334625, "learning_rate": 1.859752758960171e-05, "loss": 1.8046, "step": 2709 }, { "epoch": 0.624316074411104, "grad_norm": 0.30157989263534546, "learning_rate": 1.8578092755378306e-05, "loss": 1.8057, "step": 2710 }, { "epoch": 0.6245464493463111, "grad_norm": 0.26054251194000244, "learning_rate": 1.8558662075468466e-05, "loss": 1.8348, "step": 2711 }, { "epoch": 0.6247768242815182, "grad_norm": 0.28202253580093384, "learning_rate": 1.853923556244182e-05, "loss": 1.8391, "step": 2712 }, { "epoch": 0.6250071992167252, "grad_norm": 0.2866058647632599, "learning_rate": 1.8519813228865345e-05, "loss": 1.7859, "step": 2713 }, { "epoch": 0.6252375741519323, "grad_norm": 0.3000645637512207, "learning_rate": 1.850039508730328e-05, "loss": 1.7729, "step": 2714 }, { "epoch": 0.6254679490871393, "grad_norm": 0.2843453884124756, "learning_rate": 1.8480981150317163e-05, "loss": 1.8591, "step": 2715 }, { "epoch": 0.6256983240223464, "grad_norm": 0.3123877942562103, "learning_rate": 1.8461571430465834e-05, "loss": 1.7716, "step": 2716 }, { "epoch": 0.6259286989575534, "grad_norm": 0.29820388555526733, "learning_rate": 1.8442165940305368e-05, "loss": 1.8065, "step": 2717 }, { "epoch": 0.6261590738927605, "grad_norm": 0.283799409866333, "learning_rate": 1.8422764692389123e-05, "loss": 1.8425, "step": 2718 }, { "epoch": 0.6263894488279675, "grad_norm": 0.3212757408618927, "learning_rate": 1.840336769926772e-05, "loss": 1.8086, "step": 2719 }, { "epoch": 0.6266198237631746, "grad_norm": 0.30716052651405334, "learning_rate": 1.838397497348901e-05, "loss": 1.8116, "step": 2720 }, { "epoch": 0.6268501986983817, "grad_norm": 0.28902870416641235, "learning_rate": 1.8364586527598105e-05, "loss": 1.8376, "step": 2721 }, { "epoch": 0.6270805736335887, "grad_norm": 0.29228028655052185, "learning_rate": 1.834520237413732e-05, "loss": 1.8414, "step": 2722 }, { "epoch": 0.6273109485687958, "grad_norm": 0.28650397062301636, "learning_rate": 1.8325822525646208e-05, "loss": 1.8724, "step": 2723 }, { "epoch": 0.6275413235040027, "grad_norm": 0.2873867452144623, "learning_rate": 1.8306446994661558e-05, "loss": 1.8578, "step": 2724 }, { "epoch": 0.6277716984392098, "grad_norm": 0.2857045829296112, "learning_rate": 1.8287075793717324e-05, "loss": 1.84, "step": 2725 }, { "epoch": 0.6280020733744168, "grad_norm": 0.27860191464424133, "learning_rate": 1.8267708935344687e-05, "loss": 1.8557, "step": 2726 }, { "epoch": 0.6282324483096239, "grad_norm": 0.2874991297721863, "learning_rate": 1.8248346432072027e-05, "loss": 1.8413, "step": 2727 }, { "epoch": 0.6284628232448309, "grad_norm": 0.2989272177219391, "learning_rate": 1.8228988296424877e-05, "loss": 1.8307, "step": 2728 }, { "epoch": 0.628693198180038, "grad_norm": 0.26092085242271423, "learning_rate": 1.8209634540925966e-05, "loss": 1.8298, "step": 2729 }, { "epoch": 0.628923573115245, "grad_norm": 0.28993719816207886, "learning_rate": 1.8190285178095172e-05, "loss": 1.8058, "step": 2730 }, { "epoch": 0.6291539480504521, "grad_norm": 0.27340343594551086, "learning_rate": 1.8170940220449556e-05, "loss": 1.8369, "step": 2731 }, { "epoch": 0.6293843229856592, "grad_norm": 0.3038185238838196, "learning_rate": 1.815159968050332e-05, "loss": 1.8098, "step": 2732 }, { "epoch": 0.6296146979208662, "grad_norm": 0.3179700970649719, "learning_rate": 1.8132263570767786e-05, "loss": 1.8366, "step": 2733 }, { "epoch": 0.6298450728560733, "grad_norm": 0.34343865513801575, "learning_rate": 1.811293190375144e-05, "loss": 1.8135, "step": 2734 }, { "epoch": 0.6300754477912803, "grad_norm": 0.3261578679084778, "learning_rate": 1.8093604691959893e-05, "loss": 1.8002, "step": 2735 }, { "epoch": 0.6303058227264874, "grad_norm": 0.28672653436660767, "learning_rate": 1.807428194789584e-05, "loss": 1.8669, "step": 2736 }, { "epoch": 0.6305361976616944, "grad_norm": 0.26695770025253296, "learning_rate": 1.8054963684059134e-05, "loss": 1.8467, "step": 2737 }, { "epoch": 0.6307665725969015, "grad_norm": 0.28915661573410034, "learning_rate": 1.8035649912946684e-05, "loss": 1.8063, "step": 2738 }, { "epoch": 0.6309969475321086, "grad_norm": 0.3003162741661072, "learning_rate": 1.8016340647052516e-05, "loss": 1.8823, "step": 2739 }, { "epoch": 0.6312273224673156, "grad_norm": 0.28742724657058716, "learning_rate": 1.7997035898867755e-05, "loss": 1.8634, "step": 2740 }, { "epoch": 0.6314576974025226, "grad_norm": 0.34247449040412903, "learning_rate": 1.7977735680880575e-05, "loss": 1.7713, "step": 2741 }, { "epoch": 0.6316880723377296, "grad_norm": 0.3174580931663513, "learning_rate": 1.7958440005576245e-05, "loss": 1.7988, "step": 2742 }, { "epoch": 0.6319184472729367, "grad_norm": 0.2869790196418762, "learning_rate": 1.7939148885437056e-05, "loss": 1.8216, "step": 2743 }, { "epoch": 0.6321488222081437, "grad_norm": 0.35271555185317993, "learning_rate": 1.79198623329424e-05, "loss": 1.8092, "step": 2744 }, { "epoch": 0.6323791971433508, "grad_norm": 0.28075021505355835, "learning_rate": 1.7900580360568696e-05, "loss": 1.8659, "step": 2745 }, { "epoch": 0.6326095720785578, "grad_norm": 0.3160676658153534, "learning_rate": 1.788130298078938e-05, "loss": 1.8386, "step": 2746 }, { "epoch": 0.6328399470137649, "grad_norm": 0.2825228273868561, "learning_rate": 1.786203020607495e-05, "loss": 1.8137, "step": 2747 }, { "epoch": 0.633070321948972, "grad_norm": 0.269745796918869, "learning_rate": 1.7842762048892904e-05, "loss": 1.8612, "step": 2748 }, { "epoch": 0.633300696884179, "grad_norm": 0.27678778767585754, "learning_rate": 1.7823498521707753e-05, "loss": 1.8412, "step": 2749 }, { "epoch": 0.6335310718193861, "grad_norm": 0.2861742079257965, "learning_rate": 1.780423963698103e-05, "loss": 1.7995, "step": 2750 }, { "epoch": 0.6337614467545931, "grad_norm": 0.297958105802536, "learning_rate": 1.778498540717124e-05, "loss": 1.8602, "step": 2751 }, { "epoch": 0.6339918216898002, "grad_norm": 0.2912128269672394, "learning_rate": 1.7765735844733887e-05, "loss": 1.8356, "step": 2752 }, { "epoch": 0.6342221966250072, "grad_norm": 0.3150177597999573, "learning_rate": 1.7746490962121482e-05, "loss": 1.8135, "step": 2753 }, { "epoch": 0.6344525715602143, "grad_norm": 0.42880234122276306, "learning_rate": 1.772725077178346e-05, "loss": 1.84, "step": 2754 }, { "epoch": 0.6346829464954213, "grad_norm": 0.33141738176345825, "learning_rate": 1.7708015286166247e-05, "loss": 1.7688, "step": 2755 }, { "epoch": 0.6349133214306284, "grad_norm": 0.2790096700191498, "learning_rate": 1.7688784517713248e-05, "loss": 1.8229, "step": 2756 }, { "epoch": 0.6351436963658353, "grad_norm": 0.2812858819961548, "learning_rate": 1.7669558478864764e-05, "loss": 1.8024, "step": 2757 }, { "epoch": 0.6353740713010424, "grad_norm": 0.28339675068855286, "learning_rate": 1.7650337182058084e-05, "loss": 1.7668, "step": 2758 }, { "epoch": 0.6356044462362495, "grad_norm": 0.27783873677253723, "learning_rate": 1.7631120639727393e-05, "loss": 1.8474, "step": 2759 }, { "epoch": 0.6358348211714565, "grad_norm": 0.289358526468277, "learning_rate": 1.7611908864303836e-05, "loss": 1.8256, "step": 2760 }, { "epoch": 0.6360651961066636, "grad_norm": 0.2800230383872986, "learning_rate": 1.759270186821545e-05, "loss": 1.8521, "step": 2761 }, { "epoch": 0.6362955710418706, "grad_norm": 0.3006149232387543, "learning_rate": 1.7573499663887176e-05, "loss": 1.807, "step": 2762 }, { "epoch": 0.6365259459770777, "grad_norm": 0.28088146448135376, "learning_rate": 1.7554302263740874e-05, "loss": 1.8586, "step": 2763 }, { "epoch": 0.6367563209122847, "grad_norm": 0.2774014472961426, "learning_rate": 1.7535109680195296e-05, "loss": 1.8314, "step": 2764 }, { "epoch": 0.6369866958474918, "grad_norm": 0.302323579788208, "learning_rate": 1.7515921925666052e-05, "loss": 1.8422, "step": 2765 }, { "epoch": 0.6372170707826988, "grad_norm": 0.28445854783058167, "learning_rate": 1.7496739012565667e-05, "loss": 1.8409, "step": 2766 }, { "epoch": 0.6374474457179059, "grad_norm": 0.3159946799278259, "learning_rate": 1.7477560953303484e-05, "loss": 1.828, "step": 2767 }, { "epoch": 0.637677820653113, "grad_norm": 0.3107292056083679, "learning_rate": 1.7458387760285752e-05, "loss": 1.8458, "step": 2768 }, { "epoch": 0.63790819558832, "grad_norm": 0.2941262125968933, "learning_rate": 1.743921944591557e-05, "loss": 1.8204, "step": 2769 }, { "epoch": 0.6381385705235271, "grad_norm": 0.30625632405281067, "learning_rate": 1.742005602259284e-05, "loss": 1.8407, "step": 2770 }, { "epoch": 0.6383689454587341, "grad_norm": 0.2838900089263916, "learning_rate": 1.7400897502714332e-05, "loss": 1.8515, "step": 2771 }, { "epoch": 0.6385993203939412, "grad_norm": 0.40026313066482544, "learning_rate": 1.738174389867366e-05, "loss": 1.8179, "step": 2772 }, { "epoch": 0.6388296953291482, "grad_norm": 0.2905896306037903, "learning_rate": 1.7362595222861204e-05, "loss": 1.8008, "step": 2773 }, { "epoch": 0.6390600702643552, "grad_norm": 0.28002220392227173, "learning_rate": 1.7343451487664214e-05, "loss": 1.8431, "step": 2774 }, { "epoch": 0.6392904451995622, "grad_norm": 0.2793145179748535, "learning_rate": 1.73243127054667e-05, "loss": 1.842, "step": 2775 }, { "epoch": 0.6395208201347693, "grad_norm": 0.28293710947036743, "learning_rate": 1.730517888864949e-05, "loss": 1.8207, "step": 2776 }, { "epoch": 0.6397511950699764, "grad_norm": 0.2765275537967682, "learning_rate": 1.7286050049590208e-05, "loss": 1.8343, "step": 2777 }, { "epoch": 0.6399815700051834, "grad_norm": 0.296703040599823, "learning_rate": 1.726692620066323e-05, "loss": 1.8357, "step": 2778 }, { "epoch": 0.6402119449403905, "grad_norm": 0.28233829140663147, "learning_rate": 1.7247807354239733e-05, "loss": 1.8195, "step": 2779 }, { "epoch": 0.6404423198755975, "grad_norm": 0.29848694801330566, "learning_rate": 1.7228693522687627e-05, "loss": 1.8111, "step": 2780 }, { "epoch": 0.6406726948108046, "grad_norm": 0.2996178865432739, "learning_rate": 1.720958471837161e-05, "loss": 1.8314, "step": 2781 }, { "epoch": 0.6409030697460116, "grad_norm": 0.27586162090301514, "learning_rate": 1.7190480953653114e-05, "loss": 1.7936, "step": 2782 }, { "epoch": 0.6411334446812187, "grad_norm": 0.29923510551452637, "learning_rate": 1.7171382240890292e-05, "loss": 1.7975, "step": 2783 }, { "epoch": 0.6413638196164257, "grad_norm": 0.29855969548225403, "learning_rate": 1.715228859243807e-05, "loss": 1.8408, "step": 2784 }, { "epoch": 0.6415941945516328, "grad_norm": 0.2834641933441162, "learning_rate": 1.7133200020648065e-05, "loss": 1.8322, "step": 2785 }, { "epoch": 0.6418245694868399, "grad_norm": 0.2905501425266266, "learning_rate": 1.711411653786861e-05, "loss": 1.8528, "step": 2786 }, { "epoch": 0.6420549444220469, "grad_norm": 0.32797691226005554, "learning_rate": 1.7095038156444782e-05, "loss": 1.8374, "step": 2787 }, { "epoch": 0.642285319357254, "grad_norm": 0.30104348063468933, "learning_rate": 1.7075964888718304e-05, "loss": 1.796, "step": 2788 }, { "epoch": 0.642515694292461, "grad_norm": 0.2701955735683441, "learning_rate": 1.7056896747027628e-05, "loss": 1.8897, "step": 2789 }, { "epoch": 0.6427460692276681, "grad_norm": 0.2925713360309601, "learning_rate": 1.7037833743707892e-05, "loss": 1.8786, "step": 2790 }, { "epoch": 0.642976444162875, "grad_norm": 0.29387930035591125, "learning_rate": 1.7018775891090884e-05, "loss": 1.8113, "step": 2791 }, { "epoch": 0.6432068190980821, "grad_norm": 0.3701002895832062, "learning_rate": 1.6999723201505078e-05, "loss": 1.8065, "step": 2792 }, { "epoch": 0.6434371940332891, "grad_norm": 0.28178834915161133, "learning_rate": 1.6980675687275615e-05, "loss": 1.8492, "step": 2793 }, { "epoch": 0.6436675689684962, "grad_norm": 0.2811805307865143, "learning_rate": 1.6961633360724262e-05, "loss": 1.8303, "step": 2794 }, { "epoch": 0.6438979439037033, "grad_norm": 0.2885025143623352, "learning_rate": 1.694259623416946e-05, "loss": 1.8673, "step": 2795 }, { "epoch": 0.6441283188389103, "grad_norm": 0.3063545823097229, "learning_rate": 1.6923564319926262e-05, "loss": 1.8196, "step": 2796 }, { "epoch": 0.6443586937741174, "grad_norm": 0.2978596091270447, "learning_rate": 1.690453763030636e-05, "loss": 1.8066, "step": 2797 }, { "epoch": 0.6445890687093244, "grad_norm": 0.2741273045539856, "learning_rate": 1.6885516177618076e-05, "loss": 1.8527, "step": 2798 }, { "epoch": 0.6448194436445315, "grad_norm": 0.281325101852417, "learning_rate": 1.6866499974166317e-05, "loss": 1.8308, "step": 2799 }, { "epoch": 0.6450498185797385, "grad_norm": 0.27894875407218933, "learning_rate": 1.6847489032252627e-05, "loss": 1.8115, "step": 2800 }, { "epoch": 0.6452801935149456, "grad_norm": 0.27713969349861145, "learning_rate": 1.6828483364175128e-05, "loss": 1.8251, "step": 2801 }, { "epoch": 0.6455105684501526, "grad_norm": 0.2817394733428955, "learning_rate": 1.6809482982228513e-05, "loss": 1.8528, "step": 2802 }, { "epoch": 0.6457409433853597, "grad_norm": 0.34143173694610596, "learning_rate": 1.6790487898704104e-05, "loss": 1.8119, "step": 2803 }, { "epoch": 0.6459713183205668, "grad_norm": 0.31097298860549927, "learning_rate": 1.677149812588975e-05, "loss": 1.7699, "step": 2804 }, { "epoch": 0.6462016932557738, "grad_norm": 0.28098368644714355, "learning_rate": 1.675251367606988e-05, "loss": 1.8172, "step": 2805 }, { "epoch": 0.6464320681909809, "grad_norm": 0.2841233015060425, "learning_rate": 1.673353456152549e-05, "loss": 1.8299, "step": 2806 }, { "epoch": 0.6466624431261879, "grad_norm": 0.28791579604148865, "learning_rate": 1.6714560794534108e-05, "loss": 1.7822, "step": 2807 }, { "epoch": 0.6468928180613949, "grad_norm": 0.30934950709342957, "learning_rate": 1.6695592387369815e-05, "loss": 1.8194, "step": 2808 }, { "epoch": 0.6471231929966019, "grad_norm": 0.3119139075279236, "learning_rate": 1.6676629352303207e-05, "loss": 1.7905, "step": 2809 }, { "epoch": 0.647353567931809, "grad_norm": 0.28271180391311646, "learning_rate": 1.6657671701601434e-05, "loss": 1.8478, "step": 2810 }, { "epoch": 0.647583942867016, "grad_norm": 0.27350932359695435, "learning_rate": 1.6638719447528146e-05, "loss": 1.8522, "step": 2811 }, { "epoch": 0.6478143178022231, "grad_norm": 0.28451916575431824, "learning_rate": 1.6619772602343482e-05, "loss": 1.8253, "step": 2812 }, { "epoch": 0.6480446927374302, "grad_norm": 0.3081676661968231, "learning_rate": 1.6600831178304122e-05, "loss": 1.8281, "step": 2813 }, { "epoch": 0.6482750676726372, "grad_norm": 0.2930755615234375, "learning_rate": 1.658189518766322e-05, "loss": 1.811, "step": 2814 }, { "epoch": 0.6485054426078443, "grad_norm": 0.2891353964805603, "learning_rate": 1.6562964642670398e-05, "loss": 1.8495, "step": 2815 }, { "epoch": 0.6487358175430513, "grad_norm": 0.2907170355319977, "learning_rate": 1.6544039555571793e-05, "loss": 1.8515, "step": 2816 }, { "epoch": 0.6489661924782584, "grad_norm": 0.3124254643917084, "learning_rate": 1.652511993860997e-05, "loss": 1.8148, "step": 2817 }, { "epoch": 0.6491965674134654, "grad_norm": 0.2937740385532379, "learning_rate": 1.6506205804023987e-05, "loss": 1.8887, "step": 2818 }, { "epoch": 0.6494269423486725, "grad_norm": 0.2885580360889435, "learning_rate": 1.648729716404935e-05, "loss": 1.7993, "step": 2819 }, { "epoch": 0.6496573172838795, "grad_norm": 0.3041837215423584, "learning_rate": 1.646839403091798e-05, "loss": 1.8356, "step": 2820 }, { "epoch": 0.6498876922190866, "grad_norm": 0.28253495693206787, "learning_rate": 1.6449496416858284e-05, "loss": 1.8575, "step": 2821 }, { "epoch": 0.6501180671542937, "grad_norm": 0.28217291831970215, "learning_rate": 1.643060433409507e-05, "loss": 1.8286, "step": 2822 }, { "epoch": 0.6503484420895007, "grad_norm": 0.2842041254043579, "learning_rate": 1.6411717794849562e-05, "loss": 1.7974, "step": 2823 }, { "epoch": 0.6505788170247078, "grad_norm": 0.27787041664123535, "learning_rate": 1.6392836811339425e-05, "loss": 1.8265, "step": 2824 }, { "epoch": 0.6508091919599147, "grad_norm": 0.3016466200351715, "learning_rate": 1.63739613957787e-05, "loss": 1.8368, "step": 2825 }, { "epoch": 0.6510395668951218, "grad_norm": 0.33788999915122986, "learning_rate": 1.635509156037784e-05, "loss": 1.747, "step": 2826 }, { "epoch": 0.6512699418303288, "grad_norm": 0.2973991930484772, "learning_rate": 1.63362273173437e-05, "loss": 1.8325, "step": 2827 }, { "epoch": 0.6515003167655359, "grad_norm": 0.26971203088760376, "learning_rate": 1.6317368678879495e-05, "loss": 1.8393, "step": 2828 }, { "epoch": 0.6517306917007429, "grad_norm": 0.29452863335609436, "learning_rate": 1.6298515657184825e-05, "loss": 1.8085, "step": 2829 }, { "epoch": 0.65196106663595, "grad_norm": 0.3006722629070282, "learning_rate": 1.6279668264455665e-05, "loss": 1.824, "step": 2830 }, { "epoch": 0.652191441571157, "grad_norm": 0.27584460377693176, "learning_rate": 1.6260826512884328e-05, "loss": 1.8385, "step": 2831 }, { "epoch": 0.6524218165063641, "grad_norm": 0.28232792019844055, "learning_rate": 1.62419904146595e-05, "loss": 1.8222, "step": 2832 }, { "epoch": 0.6526521914415712, "grad_norm": 0.25489944219589233, "learning_rate": 1.6223159981966185e-05, "loss": 1.8453, "step": 2833 }, { "epoch": 0.6528825663767782, "grad_norm": 0.28543561697006226, "learning_rate": 1.620433522698575e-05, "loss": 1.8357, "step": 2834 }, { "epoch": 0.6531129413119853, "grad_norm": 0.28466758131980896, "learning_rate": 1.6185516161895877e-05, "loss": 1.8059, "step": 2835 }, { "epoch": 0.6533433162471923, "grad_norm": 0.27619054913520813, "learning_rate": 1.616670279887055e-05, "loss": 1.8575, "step": 2836 }, { "epoch": 0.6535736911823994, "grad_norm": 0.3021552860736847, "learning_rate": 1.61478951500801e-05, "loss": 1.785, "step": 2837 }, { "epoch": 0.6538040661176064, "grad_norm": 0.28089281916618347, "learning_rate": 1.612909322769113e-05, "loss": 1.8718, "step": 2838 }, { "epoch": 0.6540344410528135, "grad_norm": 0.2917262613773346, "learning_rate": 1.611029704386654e-05, "loss": 1.846, "step": 2839 }, { "epoch": 0.6542648159880206, "grad_norm": 0.2609608471393585, "learning_rate": 1.6091506610765554e-05, "loss": 1.7939, "step": 2840 }, { "epoch": 0.6544951909232275, "grad_norm": 0.26673001050949097, "learning_rate": 1.6072721940543626e-05, "loss": 1.8493, "step": 2841 }, { "epoch": 0.6547255658584346, "grad_norm": 0.2754606008529663, "learning_rate": 1.605394304535252e-05, "loss": 1.861, "step": 2842 }, { "epoch": 0.6549559407936416, "grad_norm": 0.27952325344085693, "learning_rate": 1.6035169937340246e-05, "loss": 1.8545, "step": 2843 }, { "epoch": 0.6551863157288487, "grad_norm": 0.2914597988128662, "learning_rate": 1.6016402628651072e-05, "loss": 1.8463, "step": 2844 }, { "epoch": 0.6554166906640557, "grad_norm": 0.29022854566574097, "learning_rate": 1.5997641131425527e-05, "loss": 1.8394, "step": 2845 }, { "epoch": 0.6556470655992628, "grad_norm": 0.264548122882843, "learning_rate": 1.5978885457800345e-05, "loss": 1.848, "step": 2846 }, { "epoch": 0.6558774405344698, "grad_norm": 0.27614739537239075, "learning_rate": 1.5960135619908542e-05, "loss": 1.8372, "step": 2847 }, { "epoch": 0.6561078154696769, "grad_norm": 0.3059418499469757, "learning_rate": 1.5941391629879332e-05, "loss": 1.8492, "step": 2848 }, { "epoch": 0.656338190404884, "grad_norm": 0.30283188819885254, "learning_rate": 1.5922653499838137e-05, "loss": 1.7794, "step": 2849 }, { "epoch": 0.656568565340091, "grad_norm": 0.2927135229110718, "learning_rate": 1.5903921241906612e-05, "loss": 1.8144, "step": 2850 }, { "epoch": 0.6567989402752981, "grad_norm": 0.28198733925819397, "learning_rate": 1.5885194868202596e-05, "loss": 1.8385, "step": 2851 }, { "epoch": 0.6570293152105051, "grad_norm": 0.280225545167923, "learning_rate": 1.5866474390840125e-05, "loss": 1.7886, "step": 2852 }, { "epoch": 0.6572596901457122, "grad_norm": 0.3173483610153198, "learning_rate": 1.584775982192943e-05, "loss": 1.7963, "step": 2853 }, { "epoch": 0.6574900650809192, "grad_norm": 0.2984909415245056, "learning_rate": 1.5829051173576905e-05, "loss": 1.8454, "step": 2854 }, { "epoch": 0.6577204400161263, "grad_norm": 0.2595962584018707, "learning_rate": 1.581034845788512e-05, "loss": 1.8633, "step": 2855 }, { "epoch": 0.6579508149513333, "grad_norm": 0.29189616441726685, "learning_rate": 1.5791651686952823e-05, "loss": 1.818, "step": 2856 }, { "epoch": 0.6581811898865404, "grad_norm": 0.28957805037498474, "learning_rate": 1.5772960872874886e-05, "loss": 1.7914, "step": 2857 }, { "epoch": 0.6584115648217473, "grad_norm": 0.28320997953414917, "learning_rate": 1.5754276027742345e-05, "loss": 1.8586, "step": 2858 }, { "epoch": 0.6586419397569544, "grad_norm": 0.2988170087337494, "learning_rate": 1.5735597163642392e-05, "loss": 1.8033, "step": 2859 }, { "epoch": 0.6588723146921615, "grad_norm": 0.32306888699531555, "learning_rate": 1.5716924292658304e-05, "loss": 1.8091, "step": 2860 }, { "epoch": 0.6591026896273685, "grad_norm": 0.2737857699394226, "learning_rate": 1.5698257426869532e-05, "loss": 1.8209, "step": 2861 }, { "epoch": 0.6593330645625756, "grad_norm": 0.2693231999874115, "learning_rate": 1.5679596578351593e-05, "loss": 1.8235, "step": 2862 }, { "epoch": 0.6595634394977826, "grad_norm": 0.30649876594543457, "learning_rate": 1.566094175917616e-05, "loss": 1.7795, "step": 2863 }, { "epoch": 0.6597938144329897, "grad_norm": 0.26787808537483215, "learning_rate": 1.5642292981410976e-05, "loss": 1.87, "step": 2864 }, { "epoch": 0.6600241893681967, "grad_norm": 0.28190261125564575, "learning_rate": 1.5623650257119877e-05, "loss": 1.8095, "step": 2865 }, { "epoch": 0.6602545643034038, "grad_norm": 0.2754456102848053, "learning_rate": 1.560501359836279e-05, "loss": 1.7955, "step": 2866 }, { "epoch": 0.6604849392386108, "grad_norm": 0.2811458706855774, "learning_rate": 1.558638301719572e-05, "loss": 1.8387, "step": 2867 }, { "epoch": 0.6607153141738179, "grad_norm": 0.29319047927856445, "learning_rate": 1.556775852567074e-05, "loss": 1.8197, "step": 2868 }, { "epoch": 0.660945689109025, "grad_norm": 0.2976089417934418, "learning_rate": 1.5549140135835976e-05, "loss": 1.8085, "step": 2869 }, { "epoch": 0.661176064044232, "grad_norm": 0.29923444986343384, "learning_rate": 1.55305278597356e-05, "loss": 1.7929, "step": 2870 }, { "epoch": 0.6614064389794391, "grad_norm": 0.305320680141449, "learning_rate": 1.551192170940986e-05, "loss": 1.8141, "step": 2871 }, { "epoch": 0.6616368139146461, "grad_norm": 0.2688693404197693, "learning_rate": 1.549332169689502e-05, "loss": 1.8154, "step": 2872 }, { "epoch": 0.6618671888498532, "grad_norm": 0.30483150482177734, "learning_rate": 1.5474727834223356e-05, "loss": 1.8021, "step": 2873 }, { "epoch": 0.6620975637850602, "grad_norm": 0.28056633472442627, "learning_rate": 1.545614013342321e-05, "loss": 1.8366, "step": 2874 }, { "epoch": 0.6623279387202672, "grad_norm": 0.2721034586429596, "learning_rate": 1.5437558606518892e-05, "loss": 1.8336, "step": 2875 }, { "epoch": 0.6625583136554742, "grad_norm": 0.29388630390167236, "learning_rate": 1.5418983265530747e-05, "loss": 1.8111, "step": 2876 }, { "epoch": 0.6627886885906813, "grad_norm": 0.2941872477531433, "learning_rate": 1.540041412247512e-05, "loss": 1.7753, "step": 2877 }, { "epoch": 0.6630190635258884, "grad_norm": 0.272259920835495, "learning_rate": 1.5381851189364324e-05, "loss": 1.8066, "step": 2878 }, { "epoch": 0.6632494384610954, "grad_norm": 0.26403525471687317, "learning_rate": 1.5363294478206666e-05, "loss": 1.8069, "step": 2879 }, { "epoch": 0.6634798133963025, "grad_norm": 0.271199107170105, "learning_rate": 1.5344744001006444e-05, "loss": 1.8074, "step": 2880 }, { "epoch": 0.6637101883315095, "grad_norm": 0.2839808762073517, "learning_rate": 1.5326199769763898e-05, "loss": 1.8308, "step": 2881 }, { "epoch": 0.6639405632667166, "grad_norm": 0.28125348687171936, "learning_rate": 1.5307661796475247e-05, "loss": 1.8338, "step": 2882 }, { "epoch": 0.6641709382019236, "grad_norm": 0.30613189935684204, "learning_rate": 1.5289130093132632e-05, "loss": 1.8073, "step": 2883 }, { "epoch": 0.6644013131371307, "grad_norm": 0.3390466272830963, "learning_rate": 1.5270604671724188e-05, "loss": 1.7388, "step": 2884 }, { "epoch": 0.6646316880723377, "grad_norm": 0.2809712588787079, "learning_rate": 1.525208554423394e-05, "loss": 1.8299, "step": 2885 }, { "epoch": 0.6648620630075448, "grad_norm": 0.31606441736221313, "learning_rate": 1.5233572722641859e-05, "loss": 1.7705, "step": 2886 }, { "epoch": 0.6650924379427519, "grad_norm": 0.301192045211792, "learning_rate": 1.5215066218923842e-05, "loss": 1.7911, "step": 2887 }, { "epoch": 0.6653228128779589, "grad_norm": 0.26601141691207886, "learning_rate": 1.51965660450517e-05, "loss": 1.8331, "step": 2888 }, { "epoch": 0.665553187813166, "grad_norm": 0.27703893184661865, "learning_rate": 1.5178072212993124e-05, "loss": 1.8247, "step": 2889 }, { "epoch": 0.665783562748373, "grad_norm": 0.320966899394989, "learning_rate": 1.5159584734711743e-05, "loss": 1.8362, "step": 2890 }, { "epoch": 0.6660139376835801, "grad_norm": 0.26578909158706665, "learning_rate": 1.5141103622167041e-05, "loss": 1.8373, "step": 2891 }, { "epoch": 0.666244312618787, "grad_norm": 0.28774014115333557, "learning_rate": 1.5122628887314392e-05, "loss": 1.8062, "step": 2892 }, { "epoch": 0.6664746875539941, "grad_norm": 0.305311381816864, "learning_rate": 1.5104160542105066e-05, "loss": 1.8423, "step": 2893 }, { "epoch": 0.6667050624892011, "grad_norm": 0.334513396024704, "learning_rate": 1.5085698598486175e-05, "loss": 1.782, "step": 2894 }, { "epoch": 0.6669354374244082, "grad_norm": 0.2803722321987152, "learning_rate": 1.5067243068400688e-05, "loss": 1.8312, "step": 2895 }, { "epoch": 0.6671658123596153, "grad_norm": 0.28199848532676697, "learning_rate": 1.5048793963787459e-05, "loss": 1.8252, "step": 2896 }, { "epoch": 0.6673961872948223, "grad_norm": 0.27033546566963196, "learning_rate": 1.5030351296581138e-05, "loss": 1.8406, "step": 2897 }, { "epoch": 0.6676265622300294, "grad_norm": 0.270999550819397, "learning_rate": 1.5011915078712251e-05, "loss": 1.8215, "step": 2898 }, { "epoch": 0.6678569371652364, "grad_norm": 0.2643238306045532, "learning_rate": 1.4993485322107115e-05, "loss": 1.8449, "step": 2899 }, { "epoch": 0.6680873121004435, "grad_norm": 0.2708822190761566, "learning_rate": 1.4975062038687904e-05, "loss": 1.8678, "step": 2900 }, { "epoch": 0.6683176870356505, "grad_norm": 0.28325819969177246, "learning_rate": 1.4956645240372588e-05, "loss": 1.8255, "step": 2901 }, { "epoch": 0.6685480619708576, "grad_norm": 0.2944508492946625, "learning_rate": 1.4938234939074927e-05, "loss": 1.806, "step": 2902 }, { "epoch": 0.6687784369060646, "grad_norm": 0.28917184472084045, "learning_rate": 1.4919831146704511e-05, "loss": 1.7988, "step": 2903 }, { "epoch": 0.6690088118412717, "grad_norm": 0.28792211413383484, "learning_rate": 1.4901433875166687e-05, "loss": 1.8613, "step": 2904 }, { "epoch": 0.6692391867764788, "grad_norm": 0.2721806466579437, "learning_rate": 1.4883043136362598e-05, "loss": 1.8652, "step": 2905 }, { "epoch": 0.6694695617116858, "grad_norm": 0.27962788939476013, "learning_rate": 1.4864658942189174e-05, "loss": 1.8546, "step": 2906 }, { "epoch": 0.6696999366468929, "grad_norm": 0.2823469042778015, "learning_rate": 1.4846281304539089e-05, "loss": 1.8089, "step": 2907 }, { "epoch": 0.6699303115820998, "grad_norm": 0.28958043456077576, "learning_rate": 1.4827910235300774e-05, "loss": 1.7704, "step": 2908 }, { "epoch": 0.6701606865173069, "grad_norm": 0.3054419159889221, "learning_rate": 1.4809545746358447e-05, "loss": 1.8038, "step": 2909 }, { "epoch": 0.6703910614525139, "grad_norm": 0.2721618413925171, "learning_rate": 1.479118784959202e-05, "loss": 1.8154, "step": 2910 }, { "epoch": 0.670621436387721, "grad_norm": 0.28447091579437256, "learning_rate": 1.4772836556877185e-05, "loss": 1.8391, "step": 2911 }, { "epoch": 0.670851811322928, "grad_norm": 0.28988972306251526, "learning_rate": 1.475449188008532e-05, "loss": 1.8543, "step": 2912 }, { "epoch": 0.6710821862581351, "grad_norm": 0.29551056027412415, "learning_rate": 1.4736153831083547e-05, "loss": 1.8104, "step": 2913 }, { "epoch": 0.6713125611933421, "grad_norm": 0.31045958399772644, "learning_rate": 1.4717822421734718e-05, "loss": 1.7854, "step": 2914 }, { "epoch": 0.6715429361285492, "grad_norm": 0.2889399230480194, "learning_rate": 1.4699497663897349e-05, "loss": 1.8123, "step": 2915 }, { "epoch": 0.6717733110637563, "grad_norm": 0.2658938765525818, "learning_rate": 1.4681179569425676e-05, "loss": 1.8163, "step": 2916 }, { "epoch": 0.6720036859989633, "grad_norm": 0.2752992510795593, "learning_rate": 1.466286815016964e-05, "loss": 1.8459, "step": 2917 }, { "epoch": 0.6722340609341704, "grad_norm": 0.2982789874076843, "learning_rate": 1.4644563417974827e-05, "loss": 1.8591, "step": 2918 }, { "epoch": 0.6724644358693774, "grad_norm": 0.28021398186683655, "learning_rate": 1.4626265384682525e-05, "loss": 1.8144, "step": 2919 }, { "epoch": 0.6726948108045845, "grad_norm": 0.2925551235675812, "learning_rate": 1.460797406212967e-05, "loss": 1.7936, "step": 2920 }, { "epoch": 0.6729251857397915, "grad_norm": 0.28405582904815674, "learning_rate": 1.458968946214888e-05, "loss": 1.8481, "step": 2921 }, { "epoch": 0.6731555606749986, "grad_norm": 0.28417956829071045, "learning_rate": 1.4571411596568404e-05, "loss": 1.7957, "step": 2922 }, { "epoch": 0.6733859356102057, "grad_norm": 0.2926378548145294, "learning_rate": 1.4553140477212141e-05, "loss": 1.8343, "step": 2923 }, { "epoch": 0.6736163105454127, "grad_norm": 0.27255597710609436, "learning_rate": 1.4534876115899631e-05, "loss": 1.8249, "step": 2924 }, { "epoch": 0.6738466854806197, "grad_norm": 0.295119047164917, "learning_rate": 1.451661852444603e-05, "loss": 1.822, "step": 2925 }, { "epoch": 0.6740770604158267, "grad_norm": 0.2918982207775116, "learning_rate": 1.4498367714662128e-05, "loss": 1.7791, "step": 2926 }, { "epoch": 0.6743074353510338, "grad_norm": 0.2675226032733917, "learning_rate": 1.4480123698354332e-05, "loss": 1.8034, "step": 2927 }, { "epoch": 0.6745378102862408, "grad_norm": 0.2965121567249298, "learning_rate": 1.4461886487324605e-05, "loss": 1.8343, "step": 2928 }, { "epoch": 0.6747681852214479, "grad_norm": 0.2984461486339569, "learning_rate": 1.4443656093370588e-05, "loss": 1.7427, "step": 2929 }, { "epoch": 0.6749985601566549, "grad_norm": 0.28650057315826416, "learning_rate": 1.442543252828547e-05, "loss": 1.7922, "step": 2930 }, { "epoch": 0.675228935091862, "grad_norm": 0.31797507405281067, "learning_rate": 1.4407215803857988e-05, "loss": 1.7631, "step": 2931 }, { "epoch": 0.675459310027069, "grad_norm": 0.28190696239471436, "learning_rate": 1.4389005931872534e-05, "loss": 1.8517, "step": 2932 }, { "epoch": 0.6756896849622761, "grad_norm": 0.2889227271080017, "learning_rate": 1.437080292410899e-05, "loss": 1.843, "step": 2933 }, { "epoch": 0.6759200598974832, "grad_norm": 0.28385260701179504, "learning_rate": 1.4352606792342829e-05, "loss": 1.7786, "step": 2934 }, { "epoch": 0.6761504348326902, "grad_norm": 0.29595473408699036, "learning_rate": 1.4334417548345108e-05, "loss": 1.818, "step": 2935 }, { "epoch": 0.6763808097678973, "grad_norm": 0.2683328092098236, "learning_rate": 1.4316235203882371e-05, "loss": 1.8037, "step": 2936 }, { "epoch": 0.6766111847031043, "grad_norm": 0.26511332392692566, "learning_rate": 1.4298059770716721e-05, "loss": 1.853, "step": 2937 }, { "epoch": 0.6768415596383114, "grad_norm": 0.29540401697158813, "learning_rate": 1.4279891260605821e-05, "loss": 1.7933, "step": 2938 }, { "epoch": 0.6770719345735184, "grad_norm": 0.2772752046585083, "learning_rate": 1.4261729685302808e-05, "loss": 1.8427, "step": 2939 }, { "epoch": 0.6773023095087255, "grad_norm": 0.2769555151462555, "learning_rate": 1.4243575056556355e-05, "loss": 1.8163, "step": 2940 }, { "epoch": 0.6775326844439326, "grad_norm": 0.28700175881385803, "learning_rate": 1.422542738611064e-05, "loss": 1.8262, "step": 2941 }, { "epoch": 0.6777630593791395, "grad_norm": 0.28971007466316223, "learning_rate": 1.4207286685705345e-05, "loss": 1.7935, "step": 2942 }, { "epoch": 0.6779934343143466, "grad_norm": 0.277018666267395, "learning_rate": 1.4189152967075636e-05, "loss": 1.8292, "step": 2943 }, { "epoch": 0.6782238092495536, "grad_norm": 0.2650841474533081, "learning_rate": 1.4171026241952163e-05, "loss": 1.8527, "step": 2944 }, { "epoch": 0.6784541841847607, "grad_norm": 0.3044723868370056, "learning_rate": 1.4152906522061048e-05, "loss": 1.796, "step": 2945 }, { "epoch": 0.6786845591199677, "grad_norm": 0.2969011068344116, "learning_rate": 1.4134793819123896e-05, "loss": 1.7641, "step": 2946 }, { "epoch": 0.6789149340551748, "grad_norm": 0.2904401421546936, "learning_rate": 1.4116688144857754e-05, "loss": 1.805, "step": 2947 }, { "epoch": 0.6791453089903818, "grad_norm": 0.2653507888317108, "learning_rate": 1.409858951097514e-05, "loss": 1.8583, "step": 2948 }, { "epoch": 0.6793756839255889, "grad_norm": 0.2944558560848236, "learning_rate": 1.4080497929184e-05, "loss": 1.83, "step": 2949 }, { "epoch": 0.679606058860796, "grad_norm": 0.27916258573532104, "learning_rate": 1.4062413411187736e-05, "loss": 1.839, "step": 2950 }, { "epoch": 0.679836433796003, "grad_norm": 0.32424795627593994, "learning_rate": 1.4044335968685162e-05, "loss": 1.7829, "step": 2951 }, { "epoch": 0.6800668087312101, "grad_norm": 0.31134986877441406, "learning_rate": 1.4026265613370529e-05, "loss": 1.8193, "step": 2952 }, { "epoch": 0.6802971836664171, "grad_norm": 0.29750746488571167, "learning_rate": 1.40082023569335e-05, "loss": 1.7621, "step": 2953 }, { "epoch": 0.6805275586016242, "grad_norm": 0.2966921031475067, "learning_rate": 1.399014621105914e-05, "loss": 1.8224, "step": 2954 }, { "epoch": 0.6807579335368312, "grad_norm": 0.33195924758911133, "learning_rate": 1.3972097187427923e-05, "loss": 1.7641, "step": 2955 }, { "epoch": 0.6809883084720383, "grad_norm": 0.2690456807613373, "learning_rate": 1.3954055297715717e-05, "loss": 1.8436, "step": 2956 }, { "epoch": 0.6812186834072453, "grad_norm": 0.27495434880256653, "learning_rate": 1.3936020553593746e-05, "loss": 1.819, "step": 2957 }, { "epoch": 0.6814490583424524, "grad_norm": 0.26606398820877075, "learning_rate": 1.3917992966728655e-05, "loss": 1.848, "step": 2958 }, { "epoch": 0.6816794332776593, "grad_norm": 0.2787027060985565, "learning_rate": 1.3899972548782447e-05, "loss": 1.8077, "step": 2959 }, { "epoch": 0.6819098082128664, "grad_norm": 0.27797064185142517, "learning_rate": 1.388195931141245e-05, "loss": 1.8017, "step": 2960 }, { "epoch": 0.6821401831480735, "grad_norm": 0.277274489402771, "learning_rate": 1.386395326627139e-05, "loss": 1.8482, "step": 2961 }, { "epoch": 0.6823705580832805, "grad_norm": 0.27800890803337097, "learning_rate": 1.3845954425007347e-05, "loss": 1.7828, "step": 2962 }, { "epoch": 0.6826009330184876, "grad_norm": 0.2988794147968292, "learning_rate": 1.3827962799263685e-05, "loss": 1.8272, "step": 2963 }, { "epoch": 0.6828313079536946, "grad_norm": 0.27166399359703064, "learning_rate": 1.3809978400679157e-05, "loss": 1.8378, "step": 2964 }, { "epoch": 0.6830616828889017, "grad_norm": 0.29434657096862793, "learning_rate": 1.3792001240887814e-05, "loss": 1.8842, "step": 2965 }, { "epoch": 0.6832920578241087, "grad_norm": 0.27882522344589233, "learning_rate": 1.3774031331519033e-05, "loss": 1.8545, "step": 2966 }, { "epoch": 0.6835224327593158, "grad_norm": 0.2808299660682678, "learning_rate": 1.375606868419749e-05, "loss": 1.863, "step": 2967 }, { "epoch": 0.6837528076945228, "grad_norm": 0.2764309048652649, "learning_rate": 1.3738113310543177e-05, "loss": 1.8372, "step": 2968 }, { "epoch": 0.6839831826297299, "grad_norm": 0.28309836983680725, "learning_rate": 1.3720165222171372e-05, "loss": 1.8238, "step": 2969 }, { "epoch": 0.684213557564937, "grad_norm": 0.25659680366516113, "learning_rate": 1.3702224430692645e-05, "loss": 1.7918, "step": 2970 }, { "epoch": 0.684443932500144, "grad_norm": 0.27735546231269836, "learning_rate": 1.3684290947712838e-05, "loss": 1.8583, "step": 2971 }, { "epoch": 0.6846743074353511, "grad_norm": 0.28321123123168945, "learning_rate": 1.3666364784833075e-05, "loss": 1.7889, "step": 2972 }, { "epoch": 0.6849046823705581, "grad_norm": 0.28864991664886475, "learning_rate": 1.3648445953649736e-05, "loss": 1.789, "step": 2973 }, { "epoch": 0.6851350573057652, "grad_norm": 0.282606840133667, "learning_rate": 1.3630534465754463e-05, "loss": 1.8121, "step": 2974 }, { "epoch": 0.6853654322409721, "grad_norm": 0.27299293875694275, "learning_rate": 1.361263033273415e-05, "loss": 1.8258, "step": 2975 }, { "epoch": 0.6855958071761792, "grad_norm": 0.30268508195877075, "learning_rate": 1.3594733566170926e-05, "loss": 1.7933, "step": 2976 }, { "epoch": 0.6858261821113862, "grad_norm": 0.28967490792274475, "learning_rate": 1.357684417764217e-05, "loss": 1.8057, "step": 2977 }, { "epoch": 0.6860565570465933, "grad_norm": 0.2939078211784363, "learning_rate": 1.3558962178720446e-05, "loss": 1.7997, "step": 2978 }, { "epoch": 0.6862869319818004, "grad_norm": 0.2719234526157379, "learning_rate": 1.35410875809736e-05, "loss": 1.8504, "step": 2979 }, { "epoch": 0.6865173069170074, "grad_norm": 0.2803416848182678, "learning_rate": 1.3523220395964653e-05, "loss": 1.8459, "step": 2980 }, { "epoch": 0.6867476818522145, "grad_norm": 0.27610597014427185, "learning_rate": 1.3505360635251812e-05, "loss": 1.8108, "step": 2981 }, { "epoch": 0.6869780567874215, "grad_norm": 0.29099011421203613, "learning_rate": 1.3487508310388536e-05, "loss": 1.7953, "step": 2982 }, { "epoch": 0.6872084317226286, "grad_norm": 0.294182151556015, "learning_rate": 1.346966343292344e-05, "loss": 1.8038, "step": 2983 }, { "epoch": 0.6874388066578356, "grad_norm": 0.27504345774650574, "learning_rate": 1.3451826014400295e-05, "loss": 1.8365, "step": 2984 }, { "epoch": 0.6876691815930427, "grad_norm": 0.2720038592815399, "learning_rate": 1.3433996066358117e-05, "loss": 1.8423, "step": 2985 }, { "epoch": 0.6878995565282497, "grad_norm": 0.2738204002380371, "learning_rate": 1.341617360033102e-05, "loss": 1.8691, "step": 2986 }, { "epoch": 0.6881299314634568, "grad_norm": 0.298013299703598, "learning_rate": 1.3398358627848304e-05, "loss": 1.837, "step": 2987 }, { "epoch": 0.6883603063986639, "grad_norm": 0.30239924788475037, "learning_rate": 1.3380551160434457e-05, "loss": 1.8776, "step": 2988 }, { "epoch": 0.6885906813338709, "grad_norm": 0.28659412264823914, "learning_rate": 1.3362751209609043e-05, "loss": 1.8043, "step": 2989 }, { "epoch": 0.688821056269078, "grad_norm": 0.2653122544288635, "learning_rate": 1.3344958786886808e-05, "loss": 1.796, "step": 2990 }, { "epoch": 0.689051431204285, "grad_norm": 0.284671425819397, "learning_rate": 1.3327173903777646e-05, "loss": 1.7971, "step": 2991 }, { "epoch": 0.689281806139492, "grad_norm": 0.2737259268760681, "learning_rate": 1.3309396571786514e-05, "loss": 1.8576, "step": 2992 }, { "epoch": 0.689512181074699, "grad_norm": 0.2915668785572052, "learning_rate": 1.3291626802413536e-05, "loss": 1.86, "step": 2993 }, { "epoch": 0.6897425560099061, "grad_norm": 0.2652774751186371, "learning_rate": 1.3273864607153916e-05, "loss": 1.8419, "step": 2994 }, { "epoch": 0.6899729309451131, "grad_norm": 0.2607288062572479, "learning_rate": 1.3256109997497974e-05, "loss": 1.8188, "step": 2995 }, { "epoch": 0.6902033058803202, "grad_norm": 0.276959091424942, "learning_rate": 1.3238362984931113e-05, "loss": 1.8314, "step": 2996 }, { "epoch": 0.6904336808155273, "grad_norm": 0.31025612354278564, "learning_rate": 1.3220623580933828e-05, "loss": 1.7813, "step": 2997 }, { "epoch": 0.6906640557507343, "grad_norm": 0.2855507433414459, "learning_rate": 1.3202891796981685e-05, "loss": 1.8462, "step": 2998 }, { "epoch": 0.6908944306859414, "grad_norm": 0.29063963890075684, "learning_rate": 1.3185167644545327e-05, "loss": 1.7828, "step": 2999 }, { "epoch": 0.6911248056211484, "grad_norm": 0.3323044776916504, "learning_rate": 1.3167451135090459e-05, "loss": 1.7868, "step": 3000 }, { "epoch": 0.6913551805563555, "grad_norm": 0.280320405960083, "learning_rate": 1.314974228007784e-05, "loss": 1.7997, "step": 3001 }, { "epoch": 0.6915855554915625, "grad_norm": 0.27609577775001526, "learning_rate": 1.3132041090963276e-05, "loss": 1.8071, "step": 3002 }, { "epoch": 0.6918159304267696, "grad_norm": 0.2709547281265259, "learning_rate": 1.311434757919762e-05, "loss": 1.8267, "step": 3003 }, { "epoch": 0.6920463053619766, "grad_norm": 0.2870616614818573, "learning_rate": 1.3096661756226749e-05, "loss": 1.7831, "step": 3004 }, { "epoch": 0.6922766802971837, "grad_norm": 0.291556179523468, "learning_rate": 1.3078983633491575e-05, "loss": 1.8255, "step": 3005 }, { "epoch": 0.6925070552323908, "grad_norm": 0.32134324312210083, "learning_rate": 1.306131322242804e-05, "loss": 1.8208, "step": 3006 }, { "epoch": 0.6927374301675978, "grad_norm": 0.2886468768119812, "learning_rate": 1.3043650534467053e-05, "loss": 1.7812, "step": 3007 }, { "epoch": 0.6929678051028049, "grad_norm": 0.295486181974411, "learning_rate": 1.3025995581034561e-05, "loss": 1.8217, "step": 3008 }, { "epoch": 0.6931981800380118, "grad_norm": 0.3013034462928772, "learning_rate": 1.3008348373551538e-05, "loss": 1.7948, "step": 3009 }, { "epoch": 0.6934285549732189, "grad_norm": 0.2738533914089203, "learning_rate": 1.2990708923433875e-05, "loss": 1.8588, "step": 3010 }, { "epoch": 0.6936589299084259, "grad_norm": 0.2903570830821991, "learning_rate": 1.2973077242092486e-05, "loss": 1.7938, "step": 3011 }, { "epoch": 0.693889304843633, "grad_norm": 0.28944048285484314, "learning_rate": 1.2955453340933282e-05, "loss": 1.8103, "step": 3012 }, { "epoch": 0.69411967977884, "grad_norm": 0.28718599677085876, "learning_rate": 1.293783723135709e-05, "loss": 1.7972, "step": 3013 }, { "epoch": 0.6943500547140471, "grad_norm": 0.3205178678035736, "learning_rate": 1.2920228924759728e-05, "loss": 1.7689, "step": 3014 }, { "epoch": 0.6945804296492541, "grad_norm": 0.3779389262199402, "learning_rate": 1.2902628432531961e-05, "loss": 1.8061, "step": 3015 }, { "epoch": 0.6948108045844612, "grad_norm": 0.3879905641078949, "learning_rate": 1.2885035766059495e-05, "loss": 1.8289, "step": 3016 }, { "epoch": 0.6950411795196683, "grad_norm": 0.29506784677505493, "learning_rate": 1.286745093672298e-05, "loss": 1.8072, "step": 3017 }, { "epoch": 0.6952715544548753, "grad_norm": 0.2680102288722992, "learning_rate": 1.2849873955897987e-05, "loss": 1.834, "step": 3018 }, { "epoch": 0.6955019293900824, "grad_norm": 0.28438860177993774, "learning_rate": 1.2832304834955016e-05, "loss": 1.83, "step": 3019 }, { "epoch": 0.6957323043252894, "grad_norm": 0.29002511501312256, "learning_rate": 1.2814743585259487e-05, "loss": 1.8615, "step": 3020 }, { "epoch": 0.6959626792604965, "grad_norm": 0.2785131633281708, "learning_rate": 1.2797190218171717e-05, "loss": 1.8269, "step": 3021 }, { "epoch": 0.6961930541957035, "grad_norm": 0.3804493248462677, "learning_rate": 1.2779644745046932e-05, "loss": 1.7638, "step": 3022 }, { "epoch": 0.6964234291309106, "grad_norm": 0.28337040543556213, "learning_rate": 1.276210717723525e-05, "loss": 1.806, "step": 3023 }, { "epoch": 0.6966538040661177, "grad_norm": 0.2892305850982666, "learning_rate": 1.2744577526081666e-05, "loss": 1.8247, "step": 3024 }, { "epoch": 0.6968841790013247, "grad_norm": 0.2785550653934479, "learning_rate": 1.2727055802926069e-05, "loss": 1.7798, "step": 3025 }, { "epoch": 0.6971145539365317, "grad_norm": 0.3207387626171112, "learning_rate": 1.270954201910321e-05, "loss": 1.7466, "step": 3026 }, { "epoch": 0.6973449288717387, "grad_norm": 0.2900592088699341, "learning_rate": 1.2692036185942707e-05, "loss": 1.7286, "step": 3027 }, { "epoch": 0.6975753038069458, "grad_norm": 0.3024560213088989, "learning_rate": 1.2674538314769033e-05, "loss": 1.8259, "step": 3028 }, { "epoch": 0.6978056787421528, "grad_norm": 0.27262699604034424, "learning_rate": 1.265704841690151e-05, "loss": 1.8202, "step": 3029 }, { "epoch": 0.6980360536773599, "grad_norm": 0.27766501903533936, "learning_rate": 1.2639566503654315e-05, "loss": 1.8179, "step": 3030 }, { "epoch": 0.6982664286125669, "grad_norm": 0.29272645711898804, "learning_rate": 1.2622092586336415e-05, "loss": 1.8091, "step": 3031 }, { "epoch": 0.698496803547774, "grad_norm": 0.2808494567871094, "learning_rate": 1.2604626676251668e-05, "loss": 1.7966, "step": 3032 }, { "epoch": 0.698727178482981, "grad_norm": 0.330878883600235, "learning_rate": 1.2587168784698727e-05, "loss": 1.8541, "step": 3033 }, { "epoch": 0.6989575534181881, "grad_norm": 0.2857818603515625, "learning_rate": 1.2569718922971018e-05, "loss": 1.7605, "step": 3034 }, { "epoch": 0.6991879283533952, "grad_norm": 0.28313377499580383, "learning_rate": 1.2552277102356846e-05, "loss": 1.77, "step": 3035 }, { "epoch": 0.6994183032886022, "grad_norm": 0.30969253182411194, "learning_rate": 1.2534843334139248e-05, "loss": 1.7881, "step": 3036 }, { "epoch": 0.6996486782238093, "grad_norm": 0.32219699025154114, "learning_rate": 1.251741762959608e-05, "loss": 1.802, "step": 3037 }, { "epoch": 0.6998790531590163, "grad_norm": 0.29604795575141907, "learning_rate": 1.2500000000000006e-05, "loss": 1.8377, "step": 3038 }, { "epoch": 0.7001094280942234, "grad_norm": 0.2881243824958801, "learning_rate": 1.2482590456618415e-05, "loss": 1.7953, "step": 3039 }, { "epoch": 0.7003398030294304, "grad_norm": 0.28181594610214233, "learning_rate": 1.2465189010713488e-05, "loss": 1.7872, "step": 3040 }, { "epoch": 0.7005701779646375, "grad_norm": 0.3054746389389038, "learning_rate": 1.2447795673542203e-05, "loss": 1.8102, "step": 3041 }, { "epoch": 0.7008005528998444, "grad_norm": 0.27632394433021545, "learning_rate": 1.2430410456356234e-05, "loss": 1.8, "step": 3042 }, { "epoch": 0.7010309278350515, "grad_norm": 0.26726073026657104, "learning_rate": 1.2413033370402032e-05, "loss": 1.8084, "step": 3043 }, { "epoch": 0.7012613027702586, "grad_norm": 0.2772845923900604, "learning_rate": 1.239566442692079e-05, "loss": 1.8315, "step": 3044 }, { "epoch": 0.7014916777054656, "grad_norm": 0.2713444232940674, "learning_rate": 1.2378303637148423e-05, "loss": 1.8293, "step": 3045 }, { "epoch": 0.7017220526406727, "grad_norm": 0.3469386100769043, "learning_rate": 1.236095101231558e-05, "loss": 1.8101, "step": 3046 }, { "epoch": 0.7019524275758797, "grad_norm": 0.26292577385902405, "learning_rate": 1.2343606563647625e-05, "loss": 1.8338, "step": 3047 }, { "epoch": 0.7021828025110868, "grad_norm": 0.2840518653392792, "learning_rate": 1.2326270302364628e-05, "loss": 1.8288, "step": 3048 }, { "epoch": 0.7024131774462938, "grad_norm": 0.27193447947502136, "learning_rate": 1.2308942239681376e-05, "loss": 1.8526, "step": 3049 }, { "epoch": 0.7026435523815009, "grad_norm": 0.2662602365016937, "learning_rate": 1.2291622386807336e-05, "loss": 1.8048, "step": 3050 }, { "epoch": 0.702873927316708, "grad_norm": 0.27378273010253906, "learning_rate": 1.2274310754946672e-05, "loss": 1.8114, "step": 3051 }, { "epoch": 0.703104302251915, "grad_norm": 2.0510501861572266, "learning_rate": 1.2257007355298233e-05, "loss": 1.8481, "step": 3052 }, { "epoch": 0.7033346771871221, "grad_norm": 0.295777827501297, "learning_rate": 1.223971219905554e-05, "loss": 1.8276, "step": 3053 }, { "epoch": 0.7035650521223291, "grad_norm": 0.2779991030693054, "learning_rate": 1.2222425297406783e-05, "loss": 1.7616, "step": 3054 }, { "epoch": 0.7037954270575362, "grad_norm": 0.29603680968284607, "learning_rate": 1.2205146661534785e-05, "loss": 1.7983, "step": 3055 }, { "epoch": 0.7040258019927432, "grad_norm": 0.2869161367416382, "learning_rate": 1.218787630261708e-05, "loss": 1.8338, "step": 3056 }, { "epoch": 0.7042561769279503, "grad_norm": 0.2555495500564575, "learning_rate": 1.2170614231825808e-05, "loss": 1.8521, "step": 3057 }, { "epoch": 0.7044865518631573, "grad_norm": 0.6244174838066101, "learning_rate": 1.2153360460327726e-05, "loss": 1.8611, "step": 3058 }, { "epoch": 0.7047169267983643, "grad_norm": 0.28516247868537903, "learning_rate": 1.2136114999284288e-05, "loss": 1.8244, "step": 3059 }, { "epoch": 0.7049473017335713, "grad_norm": 0.2732241749763489, "learning_rate": 1.2118877859851504e-05, "loss": 1.8277, "step": 3060 }, { "epoch": 0.7051776766687784, "grad_norm": 0.2772604823112488, "learning_rate": 1.2101649053180037e-05, "loss": 1.8488, "step": 3061 }, { "epoch": 0.7054080516039855, "grad_norm": 0.26636892557144165, "learning_rate": 1.2084428590415172e-05, "loss": 1.8332, "step": 3062 }, { "epoch": 0.7056384265391925, "grad_norm": 0.27403607964515686, "learning_rate": 1.2067216482696755e-05, "loss": 1.8469, "step": 3063 }, { "epoch": 0.7058688014743996, "grad_norm": 0.280049204826355, "learning_rate": 1.2050012741159258e-05, "loss": 1.834, "step": 3064 }, { "epoch": 0.7060991764096066, "grad_norm": 0.2874005436897278, "learning_rate": 1.2032817376931734e-05, "loss": 1.7588, "step": 3065 }, { "epoch": 0.7063295513448137, "grad_norm": 0.3173491358757019, "learning_rate": 1.2015630401137812e-05, "loss": 1.7668, "step": 3066 }, { "epoch": 0.7065599262800207, "grad_norm": 0.28414109349250793, "learning_rate": 1.1998451824895702e-05, "loss": 1.8016, "step": 3067 }, { "epoch": 0.7067903012152278, "grad_norm": 0.27395501732826233, "learning_rate": 1.1981281659318169e-05, "loss": 1.7774, "step": 3068 }, { "epoch": 0.7070206761504348, "grad_norm": 0.2692941427230835, "learning_rate": 1.196411991551255e-05, "loss": 1.8151, "step": 3069 }, { "epoch": 0.7072510510856419, "grad_norm": 0.2688745856285095, "learning_rate": 1.1946966604580731e-05, "loss": 1.8525, "step": 3070 }, { "epoch": 0.707481426020849, "grad_norm": 0.2797418534755707, "learning_rate": 1.1929821737619131e-05, "loss": 1.797, "step": 3071 }, { "epoch": 0.707711800956056, "grad_norm": 0.2793016731739044, "learning_rate": 1.1912685325718725e-05, "loss": 1.8247, "step": 3072 }, { "epoch": 0.7079421758912631, "grad_norm": 0.2760831415653229, "learning_rate": 1.1895557379965005e-05, "loss": 1.8198, "step": 3073 }, { "epoch": 0.7081725508264701, "grad_norm": 0.3031299412250519, "learning_rate": 1.187843791143799e-05, "loss": 1.7699, "step": 3074 }, { "epoch": 0.7084029257616772, "grad_norm": 0.3006208539009094, "learning_rate": 1.1861326931212215e-05, "loss": 1.7903, "step": 3075 }, { "epoch": 0.7086333006968841, "grad_norm": 0.3032180368900299, "learning_rate": 1.1844224450356728e-05, "loss": 1.8051, "step": 3076 }, { "epoch": 0.7088636756320912, "grad_norm": 0.2954537272453308, "learning_rate": 1.1827130479935073e-05, "loss": 1.7848, "step": 3077 }, { "epoch": 0.7090940505672982, "grad_norm": 0.2881776988506317, "learning_rate": 1.181004503100529e-05, "loss": 1.8109, "step": 3078 }, { "epoch": 0.7093244255025053, "grad_norm": 0.28180626034736633, "learning_rate": 1.179296811461991e-05, "loss": 1.7979, "step": 3079 }, { "epoch": 0.7095548004377124, "grad_norm": 0.28211474418640137, "learning_rate": 1.1775899741825947e-05, "loss": 1.8276, "step": 3080 }, { "epoch": 0.7097851753729194, "grad_norm": 0.31223800778388977, "learning_rate": 1.1758839923664858e-05, "loss": 1.7727, "step": 3081 }, { "epoch": 0.7100155503081265, "grad_norm": 0.27158835530281067, "learning_rate": 1.1741788671172618e-05, "loss": 1.8561, "step": 3082 }, { "epoch": 0.7102459252433335, "grad_norm": 0.2928842008113861, "learning_rate": 1.1724745995379636e-05, "loss": 1.8143, "step": 3083 }, { "epoch": 0.7104763001785406, "grad_norm": 0.2574116587638855, "learning_rate": 1.1707711907310739e-05, "loss": 1.77, "step": 3084 }, { "epoch": 0.7107066751137476, "grad_norm": 0.2774415910243988, "learning_rate": 1.1690686417985258e-05, "loss": 1.8193, "step": 3085 }, { "epoch": 0.7109370500489547, "grad_norm": 0.31710949540138245, "learning_rate": 1.167366953841694e-05, "loss": 1.784, "step": 3086 }, { "epoch": 0.7111674249841617, "grad_norm": 0.27744996547698975, "learning_rate": 1.165666127961392e-05, "loss": 1.8311, "step": 3087 }, { "epoch": 0.7113977999193688, "grad_norm": 0.28695008158683777, "learning_rate": 1.1639661652578831e-05, "loss": 1.8426, "step": 3088 }, { "epoch": 0.7116281748545759, "grad_norm": 0.29508981108665466, "learning_rate": 1.1622670668308663e-05, "loss": 1.8247, "step": 3089 }, { "epoch": 0.7118585497897829, "grad_norm": 0.2797519266605377, "learning_rate": 1.1605688337794825e-05, "loss": 1.8191, "step": 3090 }, { "epoch": 0.71208892472499, "grad_norm": 0.26347047090530396, "learning_rate": 1.1588714672023174e-05, "loss": 1.8679, "step": 3091 }, { "epoch": 0.712319299660197, "grad_norm": 0.26418834924697876, "learning_rate": 1.1571749681973895e-05, "loss": 1.7819, "step": 3092 }, { "epoch": 0.712549674595404, "grad_norm": 0.2901369333267212, "learning_rate": 1.1554793378621604e-05, "loss": 1.8568, "step": 3093 }, { "epoch": 0.712780049530611, "grad_norm": 0.2868339419364929, "learning_rate": 1.1537845772935279e-05, "loss": 1.7785, "step": 3094 }, { "epoch": 0.7130104244658181, "grad_norm": 0.27963727712631226, "learning_rate": 1.1520906875878285e-05, "loss": 1.8171, "step": 3095 }, { "epoch": 0.7132407994010251, "grad_norm": 0.270170658826828, "learning_rate": 1.1503976698408345e-05, "loss": 1.7943, "step": 3096 }, { "epoch": 0.7134711743362322, "grad_norm": 0.2871338725090027, "learning_rate": 1.1487055251477539e-05, "loss": 1.7979, "step": 3097 }, { "epoch": 0.7137015492714393, "grad_norm": 0.2788611650466919, "learning_rate": 1.1470142546032304e-05, "loss": 1.8384, "step": 3098 }, { "epoch": 0.7139319242066463, "grad_norm": 0.274203360080719, "learning_rate": 1.1453238593013424e-05, "loss": 1.8494, "step": 3099 }, { "epoch": 0.7141622991418534, "grad_norm": 0.2755934000015259, "learning_rate": 1.1436343403356017e-05, "loss": 1.8407, "step": 3100 }, { "epoch": 0.7143926740770604, "grad_norm": 0.27435731887817383, "learning_rate": 1.141945698798954e-05, "loss": 1.8205, "step": 3101 }, { "epoch": 0.7146230490122675, "grad_norm": 0.29314520955085754, "learning_rate": 1.1402579357837741e-05, "loss": 1.7948, "step": 3102 }, { "epoch": 0.7148534239474745, "grad_norm": 0.29471665620803833, "learning_rate": 1.1385710523818743e-05, "loss": 1.7376, "step": 3103 }, { "epoch": 0.7150837988826816, "grad_norm": 0.2833497226238251, "learning_rate": 1.1368850496844941e-05, "loss": 1.8155, "step": 3104 }, { "epoch": 0.7153141738178886, "grad_norm": 0.29524317383766174, "learning_rate": 1.1351999287823011e-05, "loss": 1.759, "step": 3105 }, { "epoch": 0.7155445487530957, "grad_norm": 0.2749376893043518, "learning_rate": 1.1335156907653985e-05, "loss": 1.7979, "step": 3106 }, { "epoch": 0.7157749236883028, "grad_norm": 0.27178260684013367, "learning_rate": 1.1318323367233146e-05, "loss": 1.8204, "step": 3107 }, { "epoch": 0.7160052986235098, "grad_norm": 0.29267576336860657, "learning_rate": 1.1301498677450037e-05, "loss": 1.8151, "step": 3108 }, { "epoch": 0.7162356735587168, "grad_norm": 0.28709912300109863, "learning_rate": 1.1284682849188537e-05, "loss": 1.7948, "step": 3109 }, { "epoch": 0.7164660484939238, "grad_norm": 0.2849646508693695, "learning_rate": 1.1267875893326738e-05, "loss": 1.7564, "step": 3110 }, { "epoch": 0.7166964234291309, "grad_norm": 0.2607845664024353, "learning_rate": 1.1251077820737003e-05, "loss": 1.8562, "step": 3111 }, { "epoch": 0.7169267983643379, "grad_norm": 0.27292609214782715, "learning_rate": 1.1234288642285987e-05, "loss": 1.8076, "step": 3112 }, { "epoch": 0.717157173299545, "grad_norm": 0.2836748957633972, "learning_rate": 1.1217508368834536e-05, "loss": 1.8016, "step": 3113 }, { "epoch": 0.717387548234752, "grad_norm": 0.267560750246048, "learning_rate": 1.1200737011237763e-05, "loss": 1.8169, "step": 3114 }, { "epoch": 0.7176179231699591, "grad_norm": 0.30464881658554077, "learning_rate": 1.1183974580345036e-05, "loss": 1.8171, "step": 3115 }, { "epoch": 0.7178482981051661, "grad_norm": 0.26762327551841736, "learning_rate": 1.1167221086999895e-05, "loss": 1.8065, "step": 3116 }, { "epoch": 0.7180786730403732, "grad_norm": 0.281147301197052, "learning_rate": 1.1150476542040143e-05, "loss": 1.8021, "step": 3117 }, { "epoch": 0.7183090479755803, "grad_norm": 0.27060404419898987, "learning_rate": 1.1133740956297769e-05, "loss": 1.8449, "step": 3118 }, { "epoch": 0.7185394229107873, "grad_norm": 0.2753790318965912, "learning_rate": 1.1117014340598986e-05, "loss": 1.8308, "step": 3119 }, { "epoch": 0.7187697978459944, "grad_norm": 0.30082687735557556, "learning_rate": 1.1100296705764184e-05, "loss": 1.8502, "step": 3120 }, { "epoch": 0.7190001727812014, "grad_norm": 0.2731691002845764, "learning_rate": 1.1083588062607963e-05, "loss": 1.8046, "step": 3121 }, { "epoch": 0.7192305477164085, "grad_norm": 0.29562678933143616, "learning_rate": 1.1066888421939093e-05, "loss": 1.8294, "step": 3122 }, { "epoch": 0.7194609226516155, "grad_norm": 0.2722376585006714, "learning_rate": 1.1050197794560521e-05, "loss": 1.8099, "step": 3123 }, { "epoch": 0.7196912975868226, "grad_norm": 0.2908334732055664, "learning_rate": 1.1033516191269371e-05, "loss": 1.8529, "step": 3124 }, { "epoch": 0.7199216725220297, "grad_norm": 0.2908092737197876, "learning_rate": 1.1016843622856923e-05, "loss": 1.8148, "step": 3125 }, { "epoch": 0.7201520474572366, "grad_norm": 0.2811799645423889, "learning_rate": 1.1000180100108612e-05, "loss": 1.8512, "step": 3126 }, { "epoch": 0.7203824223924437, "grad_norm": 0.29130369424819946, "learning_rate": 1.0983525633804028e-05, "loss": 1.8182, "step": 3127 }, { "epoch": 0.7206127973276507, "grad_norm": 0.318336546421051, "learning_rate": 1.0966880234716894e-05, "loss": 1.8064, "step": 3128 }, { "epoch": 0.7208431722628578, "grad_norm": 0.3424930274486542, "learning_rate": 1.0950243913615075e-05, "loss": 1.7577, "step": 3129 }, { "epoch": 0.7210735471980648, "grad_norm": 0.2770111858844757, "learning_rate": 1.0933616681260565e-05, "loss": 1.8022, "step": 3130 }, { "epoch": 0.7213039221332719, "grad_norm": 0.27987024188041687, "learning_rate": 1.0916998548409449e-05, "loss": 1.7943, "step": 3131 }, { "epoch": 0.7215342970684789, "grad_norm": 0.29537233710289, "learning_rate": 1.0900389525811975e-05, "loss": 1.7966, "step": 3132 }, { "epoch": 0.721764672003686, "grad_norm": 0.28178608417510986, "learning_rate": 1.0883789624212476e-05, "loss": 1.8109, "step": 3133 }, { "epoch": 0.721995046938893, "grad_norm": 0.3126186728477478, "learning_rate": 1.086719885434935e-05, "loss": 1.843, "step": 3134 }, { "epoch": 0.7222254218741001, "grad_norm": 0.3063524663448334, "learning_rate": 1.0850617226955149e-05, "loss": 1.8415, "step": 3135 }, { "epoch": 0.7224557968093072, "grad_norm": 0.29424482583999634, "learning_rate": 1.0834044752756478e-05, "loss": 1.8019, "step": 3136 }, { "epoch": 0.7226861717445142, "grad_norm": 0.24890874326229095, "learning_rate": 1.0817481442473998e-05, "loss": 1.8385, "step": 3137 }, { "epoch": 0.7229165466797213, "grad_norm": 0.3609786033630371, "learning_rate": 1.0800927306822504e-05, "loss": 1.8084, "step": 3138 }, { "epoch": 0.7231469216149283, "grad_norm": 0.30816441774368286, "learning_rate": 1.078438235651079e-05, "loss": 1.8194, "step": 3139 }, { "epoch": 0.7233772965501354, "grad_norm": 0.29790326952934265, "learning_rate": 1.0767846602241741e-05, "loss": 1.7546, "step": 3140 }, { "epoch": 0.7236076714853424, "grad_norm": 0.2701346278190613, "learning_rate": 1.0751320054712314e-05, "loss": 1.8022, "step": 3141 }, { "epoch": 0.7238380464205495, "grad_norm": 0.2733031213283539, "learning_rate": 1.0734802724613457e-05, "loss": 1.8661, "step": 3142 }, { "epoch": 0.7240684213557564, "grad_norm": 0.28817975521087646, "learning_rate": 1.0718294622630188e-05, "loss": 1.8357, "step": 3143 }, { "epoch": 0.7242987962909635, "grad_norm": 0.2966670095920563, "learning_rate": 1.0701795759441576e-05, "loss": 1.7927, "step": 3144 }, { "epoch": 0.7245291712261706, "grad_norm": 0.26579567790031433, "learning_rate": 1.068530614572066e-05, "loss": 1.8422, "step": 3145 }, { "epoch": 0.7247595461613776, "grad_norm": 0.27430954575538635, "learning_rate": 1.066882579213454e-05, "loss": 1.7674, "step": 3146 }, { "epoch": 0.7249899210965847, "grad_norm": 0.29704174399375916, "learning_rate": 1.06523547093443e-05, "loss": 1.8156, "step": 3147 }, { "epoch": 0.7252202960317917, "grad_norm": 0.3126538097858429, "learning_rate": 1.0635892908005046e-05, "loss": 1.7875, "step": 3148 }, { "epoch": 0.7254506709669988, "grad_norm": 0.30556002259254456, "learning_rate": 1.0619440398765864e-05, "loss": 1.8163, "step": 3149 }, { "epoch": 0.7256810459022058, "grad_norm": 0.27165189385414124, "learning_rate": 1.060299719226984e-05, "loss": 1.7787, "step": 3150 }, { "epoch": 0.7259114208374129, "grad_norm": 0.3017580211162567, "learning_rate": 1.058656329915403e-05, "loss": 1.7972, "step": 3151 }, { "epoch": 0.72614179577262, "grad_norm": 0.2666109502315521, "learning_rate": 1.0570138730049484e-05, "loss": 1.7807, "step": 3152 }, { "epoch": 0.726372170707827, "grad_norm": 0.29512521624565125, "learning_rate": 1.0553723495581203e-05, "loss": 1.7934, "step": 3153 }, { "epoch": 0.7266025456430341, "grad_norm": 0.26903393864631653, "learning_rate": 1.0537317606368164e-05, "loss": 1.7975, "step": 3154 }, { "epoch": 0.7268329205782411, "grad_norm": 0.2920481562614441, "learning_rate": 1.0520921073023263e-05, "loss": 1.778, "step": 3155 }, { "epoch": 0.7270632955134482, "grad_norm": 0.29241693019866943, "learning_rate": 1.0504533906153398e-05, "loss": 1.8268, "step": 3156 }, { "epoch": 0.7272936704486552, "grad_norm": 0.27005574107170105, "learning_rate": 1.0488156116359383e-05, "loss": 1.8651, "step": 3157 }, { "epoch": 0.7275240453838623, "grad_norm": 0.27161383628845215, "learning_rate": 1.0471787714235935e-05, "loss": 1.8233, "step": 3158 }, { "epoch": 0.7277544203190693, "grad_norm": 0.27943968772888184, "learning_rate": 1.0455428710371762e-05, "loss": 1.761, "step": 3159 }, { "epoch": 0.7279847952542763, "grad_norm": 0.2790951132774353, "learning_rate": 1.0439079115349432e-05, "loss": 1.8079, "step": 3160 }, { "epoch": 0.7282151701894833, "grad_norm": 0.25877198576927185, "learning_rate": 1.0422738939745452e-05, "loss": 1.848, "step": 3161 }, { "epoch": 0.7284455451246904, "grad_norm": 0.34319621324539185, "learning_rate": 1.0406408194130259e-05, "loss": 1.7853, "step": 3162 }, { "epoch": 0.7286759200598975, "grad_norm": 0.25897225737571716, "learning_rate": 1.0390086889068142e-05, "loss": 1.8333, "step": 3163 }, { "epoch": 0.7289062949951045, "grad_norm": 0.27443841099739075, "learning_rate": 1.0373775035117305e-05, "loss": 1.8203, "step": 3164 }, { "epoch": 0.7291366699303116, "grad_norm": 0.2766304910182953, "learning_rate": 1.0357472642829869e-05, "loss": 1.8138, "step": 3165 }, { "epoch": 0.7293670448655186, "grad_norm": 0.2707384526729584, "learning_rate": 1.0341179722751777e-05, "loss": 1.7852, "step": 3166 }, { "epoch": 0.7295974198007257, "grad_norm": 0.24030984938144684, "learning_rate": 1.032489628542288e-05, "loss": 1.8222, "step": 3167 }, { "epoch": 0.7298277947359327, "grad_norm": 0.28437280654907227, "learning_rate": 1.0308622341376892e-05, "loss": 1.8535, "step": 3168 }, { "epoch": 0.7300581696711398, "grad_norm": 0.27590861916542053, "learning_rate": 1.0292357901141375e-05, "loss": 1.7918, "step": 3169 }, { "epoch": 0.7302885446063468, "grad_norm": 0.27357959747314453, "learning_rate": 1.0276102975237754e-05, "loss": 1.8233, "step": 3170 }, { "epoch": 0.7305189195415539, "grad_norm": 0.2753695845603943, "learning_rate": 1.0259857574181292e-05, "loss": 1.8296, "step": 3171 }, { "epoch": 0.730749294476761, "grad_norm": 0.2714838683605194, "learning_rate": 1.0243621708481097e-05, "loss": 1.8052, "step": 3172 }, { "epoch": 0.730979669411968, "grad_norm": 0.2658763825893402, "learning_rate": 1.0227395388640096e-05, "loss": 1.8013, "step": 3173 }, { "epoch": 0.7312100443471751, "grad_norm": 0.2854185700416565, "learning_rate": 1.0211178625155057e-05, "loss": 1.8558, "step": 3174 }, { "epoch": 0.7314404192823821, "grad_norm": 0.2947908639907837, "learning_rate": 1.0194971428516555e-05, "loss": 1.817, "step": 3175 }, { "epoch": 0.7316707942175891, "grad_norm": 0.2593194246292114, "learning_rate": 1.017877380920898e-05, "loss": 1.8339, "step": 3176 }, { "epoch": 0.7319011691527961, "grad_norm": 0.2843533754348755, "learning_rate": 1.0162585777710526e-05, "loss": 1.7912, "step": 3177 }, { "epoch": 0.7321315440880032, "grad_norm": 0.3903351128101349, "learning_rate": 1.0146407344493186e-05, "loss": 1.8418, "step": 3178 }, { "epoch": 0.7323619190232102, "grad_norm": 0.3211487829685211, "learning_rate": 1.013023852002274e-05, "loss": 1.755, "step": 3179 }, { "epoch": 0.7325922939584173, "grad_norm": 0.2915933132171631, "learning_rate": 1.0114079314758754e-05, "loss": 1.7938, "step": 3180 }, { "epoch": 0.7328226688936244, "grad_norm": 0.3052954077720642, "learning_rate": 1.0097929739154573e-05, "loss": 1.7995, "step": 3181 }, { "epoch": 0.7330530438288314, "grad_norm": 0.270112007856369, "learning_rate": 1.0081789803657316e-05, "loss": 1.8137, "step": 3182 }, { "epoch": 0.7332834187640385, "grad_norm": 0.2920215427875519, "learning_rate": 1.0065659518707865e-05, "loss": 1.8409, "step": 3183 }, { "epoch": 0.7335137936992455, "grad_norm": 0.28531619906425476, "learning_rate": 1.004953889474083e-05, "loss": 1.7837, "step": 3184 }, { "epoch": 0.7337441686344526, "grad_norm": 0.3175010681152344, "learning_rate": 1.0033427942184622e-05, "loss": 1.7591, "step": 3185 }, { "epoch": 0.7339745435696596, "grad_norm": 0.2879771888256073, "learning_rate": 1.0017326671461375e-05, "loss": 1.8519, "step": 3186 }, { "epoch": 0.7342049185048667, "grad_norm": 0.2822569012641907, "learning_rate": 1.0001235092986924e-05, "loss": 1.796, "step": 3187 }, { "epoch": 0.7344352934400737, "grad_norm": 0.32143279910087585, "learning_rate": 9.985153217170903e-06, "loss": 1.7865, "step": 3188 }, { "epoch": 0.7346656683752808, "grad_norm": 0.25991228222846985, "learning_rate": 9.969081054416604e-06, "loss": 1.7955, "step": 3189 }, { "epoch": 0.7348960433104879, "grad_norm": 0.28120145201683044, "learning_rate": 9.953018615121064e-06, "loss": 1.8422, "step": 3190 }, { "epoch": 0.7351264182456949, "grad_norm": 0.26654064655303955, "learning_rate": 9.936965909675061e-06, "loss": 1.8424, "step": 3191 }, { "epoch": 0.735356793180902, "grad_norm": 0.29728877544403076, "learning_rate": 9.92092294846301e-06, "loss": 1.7854, "step": 3192 }, { "epoch": 0.7355871681161089, "grad_norm": 0.26970410346984863, "learning_rate": 9.90488974186306e-06, "loss": 1.831, "step": 3193 }, { "epoch": 0.735817543051316, "grad_norm": 0.3056948184967041, "learning_rate": 9.888866300247077e-06, "loss": 1.7875, "step": 3194 }, { "epoch": 0.736047917986523, "grad_norm": 0.27815401554107666, "learning_rate": 9.872852633980544e-06, "loss": 1.7658, "step": 3195 }, { "epoch": 0.7362782929217301, "grad_norm": 0.2765747606754303, "learning_rate": 9.856848753422676e-06, "loss": 1.8276, "step": 3196 }, { "epoch": 0.7365086678569371, "grad_norm": 0.3567811846733093, "learning_rate": 9.840854668926333e-06, "loss": 1.79, "step": 3197 }, { "epoch": 0.7367390427921442, "grad_norm": 0.30469316244125366, "learning_rate": 9.824870390838042e-06, "loss": 1.8455, "step": 3198 }, { "epoch": 0.7369694177273513, "grad_norm": 0.2936754822731018, "learning_rate": 9.808895929497986e-06, "loss": 1.7843, "step": 3199 }, { "epoch": 0.7371997926625583, "grad_norm": 0.27403199672698975, "learning_rate": 9.792931295240002e-06, "loss": 1.7897, "step": 3200 }, { "epoch": 0.7374301675977654, "grad_norm": 0.27644360065460205, "learning_rate": 9.776976498391566e-06, "loss": 1.7855, "step": 3201 }, { "epoch": 0.7376605425329724, "grad_norm": 0.27243703603744507, "learning_rate": 9.761031549273788e-06, "loss": 1.8159, "step": 3202 }, { "epoch": 0.7378909174681795, "grad_norm": 0.27694347500801086, "learning_rate": 9.745096458201414e-06, "loss": 1.778, "step": 3203 }, { "epoch": 0.7381212924033865, "grad_norm": 0.27833202481269836, "learning_rate": 9.729171235482815e-06, "loss": 1.8097, "step": 3204 }, { "epoch": 0.7383516673385936, "grad_norm": 0.30770817399024963, "learning_rate": 9.713255891419948e-06, "loss": 1.809, "step": 3205 }, { "epoch": 0.7385820422738006, "grad_norm": 0.2723585367202759, "learning_rate": 9.697350436308427e-06, "loss": 1.8082, "step": 3206 }, { "epoch": 0.7388124172090077, "grad_norm": 0.274852454662323, "learning_rate": 9.681454880437451e-06, "loss": 1.8217, "step": 3207 }, { "epoch": 0.7390427921442148, "grad_norm": 0.26969432830810547, "learning_rate": 9.66556923408978e-06, "loss": 1.7719, "step": 3208 }, { "epoch": 0.7392731670794218, "grad_norm": 0.2648554742336273, "learning_rate": 9.649693507541818e-06, "loss": 1.7926, "step": 3209 }, { "epoch": 0.7395035420146288, "grad_norm": 0.2826301157474518, "learning_rate": 9.633827711063534e-06, "loss": 1.7381, "step": 3210 }, { "epoch": 0.7397339169498358, "grad_norm": 0.26882413029670715, "learning_rate": 9.617971854918433e-06, "loss": 1.814, "step": 3211 }, { "epoch": 0.7399642918850429, "grad_norm": 0.27366578578948975, "learning_rate": 9.602125949363664e-06, "loss": 1.7876, "step": 3212 }, { "epoch": 0.7401946668202499, "grad_norm": 0.2702476680278778, "learning_rate": 9.586290004649866e-06, "loss": 1.7939, "step": 3213 }, { "epoch": 0.740425041755457, "grad_norm": 0.2954297363758087, "learning_rate": 9.570464031021273e-06, "loss": 1.7913, "step": 3214 }, { "epoch": 0.740655416690664, "grad_norm": 0.30569690465927124, "learning_rate": 9.554648038715685e-06, "loss": 1.761, "step": 3215 }, { "epoch": 0.7408857916258711, "grad_norm": 0.26410189270973206, "learning_rate": 9.538842037964397e-06, "loss": 1.8242, "step": 3216 }, { "epoch": 0.7411161665610781, "grad_norm": 0.27778398990631104, "learning_rate": 9.523046038992267e-06, "loss": 1.8022, "step": 3217 }, { "epoch": 0.7413465414962852, "grad_norm": 0.2960231602191925, "learning_rate": 9.507260052017708e-06, "loss": 1.7751, "step": 3218 }, { "epoch": 0.7415769164314923, "grad_norm": 0.27225571870803833, "learning_rate": 9.4914840872526e-06, "loss": 1.7906, "step": 3219 }, { "epoch": 0.7418072913666993, "grad_norm": 0.27159231901168823, "learning_rate": 9.475718154902382e-06, "loss": 1.7983, "step": 3220 }, { "epoch": 0.7420376663019064, "grad_norm": 0.32868847250938416, "learning_rate": 9.459962265165995e-06, "loss": 1.7755, "step": 3221 }, { "epoch": 0.7422680412371134, "grad_norm": 0.2965604364871979, "learning_rate": 9.444216428235872e-06, "loss": 1.8356, "step": 3222 }, { "epoch": 0.7424984161723205, "grad_norm": 0.2780265808105469, "learning_rate": 9.428480654297952e-06, "loss": 1.8245, "step": 3223 }, { "epoch": 0.7427287911075275, "grad_norm": 0.2743082642555237, "learning_rate": 9.412754953531663e-06, "loss": 1.7918, "step": 3224 }, { "epoch": 0.7429591660427346, "grad_norm": 0.2714844346046448, "learning_rate": 9.397039336109919e-06, "loss": 1.8484, "step": 3225 }, { "epoch": 0.7431895409779417, "grad_norm": 0.28346091508865356, "learning_rate": 9.381333812199105e-06, "loss": 1.8319, "step": 3226 }, { "epoch": 0.7434199159131486, "grad_norm": 0.27233585715293884, "learning_rate": 9.36563839195908e-06, "loss": 1.7684, "step": 3227 }, { "epoch": 0.7436502908483557, "grad_norm": 0.29621586203575134, "learning_rate": 9.349953085543167e-06, "loss": 1.7844, "step": 3228 }, { "epoch": 0.7438806657835627, "grad_norm": 0.2833784818649292, "learning_rate": 9.334277903098148e-06, "loss": 1.8299, "step": 3229 }, { "epoch": 0.7441110407187698, "grad_norm": 0.2817254066467285, "learning_rate": 9.318612854764253e-06, "loss": 1.7927, "step": 3230 }, { "epoch": 0.7443414156539768, "grad_norm": 0.29320815205574036, "learning_rate": 9.302957950675156e-06, "loss": 1.8009, "step": 3231 }, { "epoch": 0.7445717905891839, "grad_norm": 0.26255279779434204, "learning_rate": 9.28731320095797e-06, "loss": 1.8123, "step": 3232 }, { "epoch": 0.7448021655243909, "grad_norm": 0.31811514496803284, "learning_rate": 9.271678615733252e-06, "loss": 1.7989, "step": 3233 }, { "epoch": 0.745032540459598, "grad_norm": 0.3301912844181061, "learning_rate": 9.256054205114939e-06, "loss": 1.799, "step": 3234 }, { "epoch": 0.745262915394805, "grad_norm": 0.2626081705093384, "learning_rate": 9.240439979210444e-06, "loss": 1.7764, "step": 3235 }, { "epoch": 0.7454932903300121, "grad_norm": 0.25490784645080566, "learning_rate": 9.224835948120572e-06, "loss": 1.8337, "step": 3236 }, { "epoch": 0.7457236652652192, "grad_norm": 0.2753852605819702, "learning_rate": 9.209242121939493e-06, "loss": 1.7917, "step": 3237 }, { "epoch": 0.7459540402004262, "grad_norm": 0.27070173621177673, "learning_rate": 9.193658510754841e-06, "loss": 1.7729, "step": 3238 }, { "epoch": 0.7461844151356333, "grad_norm": 0.27347081899642944, "learning_rate": 9.178085124647603e-06, "loss": 1.7987, "step": 3239 }, { "epoch": 0.7464147900708403, "grad_norm": 0.30872678756713867, "learning_rate": 9.16252197369214e-06, "loss": 1.7894, "step": 3240 }, { "epoch": 0.7466451650060474, "grad_norm": 0.2690551280975342, "learning_rate": 9.146969067956238e-06, "loss": 1.7882, "step": 3241 }, { "epoch": 0.7468755399412544, "grad_norm": 0.28601008653640747, "learning_rate": 9.131426417501005e-06, "loss": 1.8, "step": 3242 }, { "epoch": 0.7471059148764614, "grad_norm": 0.2779127061367035, "learning_rate": 9.115894032380937e-06, "loss": 1.7726, "step": 3243 }, { "epoch": 0.7473362898116684, "grad_norm": 0.2643921971321106, "learning_rate": 9.100371922643913e-06, "loss": 1.8156, "step": 3244 }, { "epoch": 0.7475666647468755, "grad_norm": 0.28133416175842285, "learning_rate": 9.084860098331122e-06, "loss": 1.7987, "step": 3245 }, { "epoch": 0.7477970396820826, "grad_norm": 0.2685142159461975, "learning_rate": 9.069358569477113e-06, "loss": 1.7754, "step": 3246 }, { "epoch": 0.7480274146172896, "grad_norm": 0.28941860795021057, "learning_rate": 9.05386734610981e-06, "loss": 1.8011, "step": 3247 }, { "epoch": 0.7482577895524967, "grad_norm": 0.2927720844745636, "learning_rate": 9.038386438250415e-06, "loss": 1.7853, "step": 3248 }, { "epoch": 0.7484881644877037, "grad_norm": 0.2620724141597748, "learning_rate": 9.022915855913492e-06, "loss": 1.7531, "step": 3249 }, { "epoch": 0.7487185394229108, "grad_norm": 0.27957969903945923, "learning_rate": 9.007455609106915e-06, "loss": 1.7655, "step": 3250 }, { "epoch": 0.7489489143581178, "grad_norm": 0.274221807718277, "learning_rate": 8.992005707831876e-06, "loss": 1.7959, "step": 3251 }, { "epoch": 0.7491792892933249, "grad_norm": 0.2443668693304062, "learning_rate": 8.976566162082872e-06, "loss": 1.8306, "step": 3252 }, { "epoch": 0.749409664228532, "grad_norm": 0.31528815627098083, "learning_rate": 8.9611369818477e-06, "loss": 1.7684, "step": 3253 }, { "epoch": 0.749640039163739, "grad_norm": 0.2636263966560364, "learning_rate": 8.945718177107465e-06, "loss": 1.8135, "step": 3254 }, { "epoch": 0.7498704140989461, "grad_norm": 0.2811656594276428, "learning_rate": 8.930309757836517e-06, "loss": 1.7971, "step": 3255 }, { "epoch": 0.7501007890341531, "grad_norm": 0.28934717178344727, "learning_rate": 8.914911734002548e-06, "loss": 1.8124, "step": 3256 }, { "epoch": 0.7503311639693602, "grad_norm": 0.2819565236568451, "learning_rate": 8.899524115566493e-06, "loss": 1.7967, "step": 3257 }, { "epoch": 0.7505615389045672, "grad_norm": 0.2903241515159607, "learning_rate": 8.884146912482535e-06, "loss": 1.8208, "step": 3258 }, { "epoch": 0.7507919138397743, "grad_norm": 0.27068454027175903, "learning_rate": 8.868780134698168e-06, "loss": 1.8184, "step": 3259 }, { "epoch": 0.7510222887749812, "grad_norm": 0.26937758922576904, "learning_rate": 8.85342379215412e-06, "loss": 1.7935, "step": 3260 }, { "epoch": 0.7512526637101883, "grad_norm": 0.2775340974330902, "learning_rate": 8.838077894784333e-06, "loss": 1.8211, "step": 3261 }, { "epoch": 0.7514830386453953, "grad_norm": 0.2747703492641449, "learning_rate": 8.822742452516064e-06, "loss": 1.7859, "step": 3262 }, { "epoch": 0.7517134135806024, "grad_norm": 0.25367775559425354, "learning_rate": 8.80741747526974e-06, "loss": 1.8537, "step": 3263 }, { "epoch": 0.7519437885158095, "grad_norm": 0.2704440653324127, "learning_rate": 8.792102972959049e-06, "loss": 1.822, "step": 3264 }, { "epoch": 0.7521741634510165, "grad_norm": 0.30925291776657104, "learning_rate": 8.776798955490917e-06, "loss": 1.8225, "step": 3265 }, { "epoch": 0.7524045383862236, "grad_norm": 0.26987722516059875, "learning_rate": 8.761505432765446e-06, "loss": 1.8489, "step": 3266 }, { "epoch": 0.7526349133214306, "grad_norm": 0.29807358980178833, "learning_rate": 8.746222414675978e-06, "loss": 1.8049, "step": 3267 }, { "epoch": 0.7528652882566377, "grad_norm": 0.27147310972213745, "learning_rate": 8.730949911109074e-06, "loss": 1.8039, "step": 3268 }, { "epoch": 0.7530956631918447, "grad_norm": 0.2885366678237915, "learning_rate": 8.715687931944449e-06, "loss": 1.8039, "step": 3269 }, { "epoch": 0.7533260381270518, "grad_norm": 0.28416264057159424, "learning_rate": 8.700436487055045e-06, "loss": 1.8433, "step": 3270 }, { "epoch": 0.7535564130622588, "grad_norm": 0.2952539324760437, "learning_rate": 8.68519558630697e-06, "loss": 1.7647, "step": 3271 }, { "epoch": 0.7537867879974659, "grad_norm": 0.2797040045261383, "learning_rate": 8.669965239559533e-06, "loss": 1.8254, "step": 3272 }, { "epoch": 0.754017162932673, "grad_norm": 0.2801256477832794, "learning_rate": 8.65474545666519e-06, "loss": 1.7921, "step": 3273 }, { "epoch": 0.75424753786788, "grad_norm": 0.3131978213787079, "learning_rate": 8.639536247469582e-06, "loss": 1.7665, "step": 3274 }, { "epoch": 0.7544779128030871, "grad_norm": 0.2733227610588074, "learning_rate": 8.624337621811499e-06, "loss": 1.8141, "step": 3275 }, { "epoch": 0.7547082877382941, "grad_norm": 0.37430471181869507, "learning_rate": 8.609149589522894e-06, "loss": 1.7442, "step": 3276 }, { "epoch": 0.7549386626735011, "grad_norm": 0.3089756369590759, "learning_rate": 8.593972160428856e-06, "loss": 1.7849, "step": 3277 }, { "epoch": 0.7551690376087081, "grad_norm": 0.2756957411766052, "learning_rate": 8.578805344347623e-06, "loss": 1.8238, "step": 3278 }, { "epoch": 0.7553994125439152, "grad_norm": 0.2844947576522827, "learning_rate": 8.563649151090569e-06, "loss": 1.826, "step": 3279 }, { "epoch": 0.7556297874791222, "grad_norm": 0.2719784379005432, "learning_rate": 8.548503590462186e-06, "loss": 1.7999, "step": 3280 }, { "epoch": 0.7558601624143293, "grad_norm": 0.25956887006759644, "learning_rate": 8.5333686722601e-06, "loss": 1.8281, "step": 3281 }, { "epoch": 0.7560905373495364, "grad_norm": 0.2814229428768158, "learning_rate": 8.518244406275045e-06, "loss": 1.7976, "step": 3282 }, { "epoch": 0.7563209122847434, "grad_norm": 0.28179362416267395, "learning_rate": 8.503130802290863e-06, "loss": 1.7961, "step": 3283 }, { "epoch": 0.7565512872199505, "grad_norm": 0.2672673165798187, "learning_rate": 8.4880278700845e-06, "loss": 1.7953, "step": 3284 }, { "epoch": 0.7567816621551575, "grad_norm": 0.27959567308425903, "learning_rate": 8.472935619426006e-06, "loss": 1.78, "step": 3285 }, { "epoch": 0.7570120370903646, "grad_norm": 0.27133825421333313, "learning_rate": 8.45785406007852e-06, "loss": 1.817, "step": 3286 }, { "epoch": 0.7572424120255716, "grad_norm": 0.2697560787200928, "learning_rate": 8.442783201798237e-06, "loss": 1.7867, "step": 3287 }, { "epoch": 0.7574727869607787, "grad_norm": 0.27842018008232117, "learning_rate": 8.427723054334474e-06, "loss": 1.788, "step": 3288 }, { "epoch": 0.7577031618959857, "grad_norm": 0.48152342438697815, "learning_rate": 8.4126736274296e-06, "loss": 1.8177, "step": 3289 }, { "epoch": 0.7579335368311928, "grad_norm": 0.27487555146217346, "learning_rate": 8.397634930819021e-06, "loss": 1.8259, "step": 3290 }, { "epoch": 0.7581639117663999, "grad_norm": 0.2988632023334503, "learning_rate": 8.382606974231261e-06, "loss": 1.8023, "step": 3291 }, { "epoch": 0.7583942867016069, "grad_norm": 0.2814781367778778, "learning_rate": 8.367589767387834e-06, "loss": 1.8373, "step": 3292 }, { "epoch": 0.758624661636814, "grad_norm": 0.2705274820327759, "learning_rate": 8.352583320003336e-06, "loss": 1.8014, "step": 3293 }, { "epoch": 0.7588550365720209, "grad_norm": 0.27117669582366943, "learning_rate": 8.33758764178541e-06, "loss": 1.8108, "step": 3294 }, { "epoch": 0.759085411507228, "grad_norm": 0.27182239294052124, "learning_rate": 8.322602742434701e-06, "loss": 1.8013, "step": 3295 }, { "epoch": 0.759315786442435, "grad_norm": 0.31706130504608154, "learning_rate": 8.307628631644903e-06, "loss": 1.787, "step": 3296 }, { "epoch": 0.7595461613776421, "grad_norm": 0.30248165130615234, "learning_rate": 8.292665319102728e-06, "loss": 1.8, "step": 3297 }, { "epoch": 0.7597765363128491, "grad_norm": 0.30354106426239014, "learning_rate": 8.277712814487895e-06, "loss": 1.7767, "step": 3298 }, { "epoch": 0.7600069112480562, "grad_norm": 0.3005891740322113, "learning_rate": 8.262771127473143e-06, "loss": 1.7694, "step": 3299 }, { "epoch": 0.7602372861832633, "grad_norm": 0.2734683156013489, "learning_rate": 8.247840267724203e-06, "loss": 1.7869, "step": 3300 }, { "epoch": 0.7604676611184703, "grad_norm": 0.25632330775260925, "learning_rate": 8.232920244899805e-06, "loss": 1.8275, "step": 3301 }, { "epoch": 0.7606980360536774, "grad_norm": 0.3364461362361908, "learning_rate": 8.218011068651673e-06, "loss": 1.7777, "step": 3302 }, { "epoch": 0.7609284109888844, "grad_norm": 0.2740950882434845, "learning_rate": 8.20311274862451e-06, "loss": 1.8237, "step": 3303 }, { "epoch": 0.7611587859240915, "grad_norm": 0.2743265628814697, "learning_rate": 8.188225294455992e-06, "loss": 1.8152, "step": 3304 }, { "epoch": 0.7613891608592985, "grad_norm": 0.27799153327941895, "learning_rate": 8.173348715776777e-06, "loss": 1.7921, "step": 3305 }, { "epoch": 0.7616195357945056, "grad_norm": 0.2820511758327484, "learning_rate": 8.158483022210478e-06, "loss": 1.858, "step": 3306 }, { "epoch": 0.7618499107297126, "grad_norm": 0.29499122500419617, "learning_rate": 8.14362822337368e-06, "loss": 1.7873, "step": 3307 }, { "epoch": 0.7620802856649197, "grad_norm": 0.31190818548202515, "learning_rate": 8.128784328875885e-06, "loss": 1.7836, "step": 3308 }, { "epoch": 0.7623106606001268, "grad_norm": 0.2771385908126831, "learning_rate": 8.11395134831959e-06, "loss": 1.794, "step": 3309 }, { "epoch": 0.7625410355353338, "grad_norm": 0.27025073766708374, "learning_rate": 8.099129291300209e-06, "loss": 1.8251, "step": 3310 }, { "epoch": 0.7627714104705408, "grad_norm": 0.26839718222618103, "learning_rate": 8.084318167406063e-06, "loss": 1.8372, "step": 3311 }, { "epoch": 0.7630017854057478, "grad_norm": 0.2908474802970886, "learning_rate": 8.069517986218453e-06, "loss": 1.8049, "step": 3312 }, { "epoch": 0.7632321603409549, "grad_norm": 0.28028973937034607, "learning_rate": 8.054728757311573e-06, "loss": 1.831, "step": 3313 }, { "epoch": 0.7634625352761619, "grad_norm": 0.26137226819992065, "learning_rate": 8.039950490252505e-06, "loss": 1.824, "step": 3314 }, { "epoch": 0.763692910211369, "grad_norm": 0.27614331245422363, "learning_rate": 8.025183194601304e-06, "loss": 1.8268, "step": 3315 }, { "epoch": 0.763923285146576, "grad_norm": 0.28644701838493347, "learning_rate": 8.010426879910868e-06, "loss": 1.806, "step": 3316 }, { "epoch": 0.7641536600817831, "grad_norm": 0.25692033767700195, "learning_rate": 7.99568155572701e-06, "loss": 1.8425, "step": 3317 }, { "epoch": 0.7643840350169901, "grad_norm": 0.27543511986732483, "learning_rate": 7.980947231588471e-06, "loss": 1.8082, "step": 3318 }, { "epoch": 0.7646144099521972, "grad_norm": 0.27254635095596313, "learning_rate": 7.966223917026813e-06, "loss": 1.8056, "step": 3319 }, { "epoch": 0.7648447848874043, "grad_norm": 0.29552149772644043, "learning_rate": 7.951511621566516e-06, "loss": 1.8232, "step": 3320 }, { "epoch": 0.7650751598226113, "grad_norm": 0.25874099135398865, "learning_rate": 7.936810354724922e-06, "loss": 1.805, "step": 3321 }, { "epoch": 0.7653055347578184, "grad_norm": 0.28873616456985474, "learning_rate": 7.922120126012239e-06, "loss": 1.7815, "step": 3322 }, { "epoch": 0.7655359096930254, "grad_norm": 0.284698486328125, "learning_rate": 7.907440944931536e-06, "loss": 1.7619, "step": 3323 }, { "epoch": 0.7657662846282325, "grad_norm": 0.2732622027397156, "learning_rate": 7.89277282097873e-06, "loss": 1.8037, "step": 3324 }, { "epoch": 0.7659966595634395, "grad_norm": 0.30493035912513733, "learning_rate": 7.87811576364259e-06, "loss": 1.7695, "step": 3325 }, { "epoch": 0.7662270344986466, "grad_norm": 0.2738823890686035, "learning_rate": 7.863469782404725e-06, "loss": 1.8109, "step": 3326 }, { "epoch": 0.7664574094338535, "grad_norm": 0.28531473875045776, "learning_rate": 7.848834886739582e-06, "loss": 1.8045, "step": 3327 }, { "epoch": 0.7666877843690606, "grad_norm": 0.2714003026485443, "learning_rate": 7.834211086114427e-06, "loss": 1.772, "step": 3328 }, { "epoch": 0.7669181593042677, "grad_norm": 0.29338911175727844, "learning_rate": 7.819598389989358e-06, "loss": 1.7726, "step": 3329 }, { "epoch": 0.7671485342394747, "grad_norm": 0.28901827335357666, "learning_rate": 7.804996807817288e-06, "loss": 1.7765, "step": 3330 }, { "epoch": 0.7673789091746818, "grad_norm": 0.2625153362751007, "learning_rate": 7.790406349043938e-06, "loss": 1.8062, "step": 3331 }, { "epoch": 0.7676092841098888, "grad_norm": 0.30268511176109314, "learning_rate": 7.775827023107835e-06, "loss": 1.8053, "step": 3332 }, { "epoch": 0.7678396590450959, "grad_norm": 0.27547672390937805, "learning_rate": 7.761258839440303e-06, "loss": 1.8029, "step": 3333 }, { "epoch": 0.7680700339803029, "grad_norm": 0.2660207748413086, "learning_rate": 7.74670180746546e-06, "loss": 1.8222, "step": 3334 }, { "epoch": 0.76830040891551, "grad_norm": 0.27756378054618835, "learning_rate": 7.732155936600202e-06, "loss": 1.7896, "step": 3335 }, { "epoch": 0.768530783850717, "grad_norm": 0.25876009464263916, "learning_rate": 7.71762123625423e-06, "loss": 1.8143, "step": 3336 }, { "epoch": 0.7687611587859241, "grad_norm": 0.279329776763916, "learning_rate": 7.703097715829969e-06, "loss": 1.7614, "step": 3337 }, { "epoch": 0.7689915337211312, "grad_norm": 0.29333215951919556, "learning_rate": 7.688585384722668e-06, "loss": 1.8012, "step": 3338 }, { "epoch": 0.7692219086563382, "grad_norm": 0.2800804674625397, "learning_rate": 7.674084252320315e-06, "loss": 1.8151, "step": 3339 }, { "epoch": 0.7694522835915453, "grad_norm": 0.3330163359642029, "learning_rate": 7.659594328003625e-06, "loss": 1.7807, "step": 3340 }, { "epoch": 0.7696826585267523, "grad_norm": 0.286993145942688, "learning_rate": 7.645115621146115e-06, "loss": 1.7666, "step": 3341 }, { "epoch": 0.7699130334619594, "grad_norm": 0.2863076627254486, "learning_rate": 7.63064814111402e-06, "loss": 1.8049, "step": 3342 }, { "epoch": 0.7701434083971664, "grad_norm": 0.27516666054725647, "learning_rate": 7.6161918972662925e-06, "loss": 1.8042, "step": 3343 }, { "epoch": 0.7703737833323734, "grad_norm": 0.26822471618652344, "learning_rate": 7.601746898954645e-06, "loss": 1.8209, "step": 3344 }, { "epoch": 0.7706041582675804, "grad_norm": 0.3164624869823456, "learning_rate": 7.587313155523507e-06, "loss": 1.7497, "step": 3345 }, { "epoch": 0.7708345332027875, "grad_norm": 0.3164377808570862, "learning_rate": 7.572890676310026e-06, "loss": 1.7196, "step": 3346 }, { "epoch": 0.7710649081379946, "grad_norm": 0.29858776926994324, "learning_rate": 7.558479470644062e-06, "loss": 1.8374, "step": 3347 }, { "epoch": 0.7712952830732016, "grad_norm": 0.2687447965145111, "learning_rate": 7.5440795478481815e-06, "loss": 1.8465, "step": 3348 }, { "epoch": 0.7715256580084087, "grad_norm": 0.3198637366294861, "learning_rate": 7.5296909172376555e-06, "loss": 1.732, "step": 3349 }, { "epoch": 0.7717560329436157, "grad_norm": 0.2874554395675659, "learning_rate": 7.515313588120451e-06, "loss": 1.8123, "step": 3350 }, { "epoch": 0.7719864078788228, "grad_norm": 0.27386170625686646, "learning_rate": 7.500947569797214e-06, "loss": 1.8008, "step": 3351 }, { "epoch": 0.7722167828140298, "grad_norm": 0.2700245678424835, "learning_rate": 7.48659287156129e-06, "loss": 1.8108, "step": 3352 }, { "epoch": 0.7724471577492369, "grad_norm": 0.29302743077278137, "learning_rate": 7.472249502698686e-06, "loss": 1.7974, "step": 3353 }, { "epoch": 0.772677532684444, "grad_norm": 0.2978028357028961, "learning_rate": 7.4579174724880875e-06, "loss": 1.7738, "step": 3354 }, { "epoch": 0.772907907619651, "grad_norm": 0.273666650056839, "learning_rate": 7.4435967902008465e-06, "loss": 1.7982, "step": 3355 }, { "epoch": 0.7731382825548581, "grad_norm": 0.2964412569999695, "learning_rate": 7.429287465100968e-06, "loss": 1.7553, "step": 3356 }, { "epoch": 0.7733686574900651, "grad_norm": 0.2646833062171936, "learning_rate": 7.414989506445128e-06, "loss": 1.8261, "step": 3357 }, { "epoch": 0.7735990324252722, "grad_norm": 0.2775539755821228, "learning_rate": 7.400702923482603e-06, "loss": 1.8237, "step": 3358 }, { "epoch": 0.7738294073604792, "grad_norm": 0.2694000005722046, "learning_rate": 7.386427725455372e-06, "loss": 1.8236, "step": 3359 }, { "epoch": 0.7740597822956863, "grad_norm": 0.2709178924560547, "learning_rate": 7.3721639215980216e-06, "loss": 1.8425, "step": 3360 }, { "epoch": 0.7742901572308932, "grad_norm": 0.2711578905582428, "learning_rate": 7.3579115211377395e-06, "loss": 1.8164, "step": 3361 }, { "epoch": 0.7745205321661003, "grad_norm": 0.29309016466140747, "learning_rate": 7.3436705332943865e-06, "loss": 1.7964, "step": 3362 }, { "epoch": 0.7747509071013073, "grad_norm": 0.30336782336235046, "learning_rate": 7.3294409672804184e-06, "loss": 1.777, "step": 3363 }, { "epoch": 0.7749812820365144, "grad_norm": 0.2855885326862335, "learning_rate": 7.315222832300875e-06, "loss": 1.7861, "step": 3364 }, { "epoch": 0.7752116569717215, "grad_norm": 0.2600601315498352, "learning_rate": 7.301016137553463e-06, "loss": 1.782, "step": 3365 }, { "epoch": 0.7754420319069285, "grad_norm": 0.2675277292728424, "learning_rate": 7.286820892228427e-06, "loss": 1.8126, "step": 3366 }, { "epoch": 0.7756724068421356, "grad_norm": 0.2820471525192261, "learning_rate": 7.272637105508631e-06, "loss": 1.8074, "step": 3367 }, { "epoch": 0.7759027817773426, "grad_norm": 0.2773817181587219, "learning_rate": 7.258464786569549e-06, "loss": 1.8107, "step": 3368 }, { "epoch": 0.7761331567125497, "grad_norm": 0.27804994583129883, "learning_rate": 7.244303944579192e-06, "loss": 1.8099, "step": 3369 }, { "epoch": 0.7763635316477567, "grad_norm": 0.2784268856048584, "learning_rate": 7.230154588698165e-06, "loss": 1.805, "step": 3370 }, { "epoch": 0.7765939065829638, "grad_norm": 0.29355987906455994, "learning_rate": 7.216016728079675e-06, "loss": 1.7257, "step": 3371 }, { "epoch": 0.7768242815181708, "grad_norm": 0.2675478160381317, "learning_rate": 7.201890371869438e-06, "loss": 1.8319, "step": 3372 }, { "epoch": 0.7770546564533779, "grad_norm": 0.29241257905960083, "learning_rate": 7.187775529205759e-06, "loss": 1.768, "step": 3373 }, { "epoch": 0.777285031388585, "grad_norm": 0.2830950915813446, "learning_rate": 7.173672209219495e-06, "loss": 1.7822, "step": 3374 }, { "epoch": 0.777515406323792, "grad_norm": 0.27433449029922485, "learning_rate": 7.159580421034037e-06, "loss": 1.7894, "step": 3375 }, { "epoch": 0.7777457812589991, "grad_norm": 0.25729018449783325, "learning_rate": 7.145500173765329e-06, "loss": 1.7556, "step": 3376 }, { "epoch": 0.7779761561942061, "grad_norm": 0.28381359577178955, "learning_rate": 7.131431476521838e-06, "loss": 1.7894, "step": 3377 }, { "epoch": 0.7782065311294131, "grad_norm": 0.2701699435710907, "learning_rate": 7.11737433840457e-06, "loss": 1.8226, "step": 3378 }, { "epoch": 0.7784369060646201, "grad_norm": 0.2629021406173706, "learning_rate": 7.103328768507039e-06, "loss": 1.8036, "step": 3379 }, { "epoch": 0.7786672809998272, "grad_norm": 0.25210869312286377, "learning_rate": 7.089294775915292e-06, "loss": 1.7906, "step": 3380 }, { "epoch": 0.7788976559350342, "grad_norm": 0.2712438106536865, "learning_rate": 7.075272369707878e-06, "loss": 1.8085, "step": 3381 }, { "epoch": 0.7791280308702413, "grad_norm": 0.2893485724925995, "learning_rate": 7.061261558955848e-06, "loss": 1.7888, "step": 3382 }, { "epoch": 0.7793584058054484, "grad_norm": 0.32781922817230225, "learning_rate": 7.047262352722758e-06, "loss": 1.8114, "step": 3383 }, { "epoch": 0.7795887807406554, "grad_norm": 0.2761306166648865, "learning_rate": 7.0332747600646566e-06, "loss": 1.804, "step": 3384 }, { "epoch": 0.7798191556758625, "grad_norm": 0.30306991934776306, "learning_rate": 7.019298790030074e-06, "loss": 1.7753, "step": 3385 }, { "epoch": 0.7800495306110695, "grad_norm": 0.7466180920600891, "learning_rate": 7.005334451660034e-06, "loss": 1.8098, "step": 3386 }, { "epoch": 0.7802799055462766, "grad_norm": 0.27400726079940796, "learning_rate": 6.991381753988013e-06, "loss": 1.8233, "step": 3387 }, { "epoch": 0.7805102804814836, "grad_norm": 0.27204957604408264, "learning_rate": 6.977440706039973e-06, "loss": 1.7981, "step": 3388 }, { "epoch": 0.7807406554166907, "grad_norm": 0.28643667697906494, "learning_rate": 6.963511316834359e-06, "loss": 1.8335, "step": 3389 }, { "epoch": 0.7809710303518977, "grad_norm": 0.2729566991329193, "learning_rate": 6.9495935953820314e-06, "loss": 1.8506, "step": 3390 }, { "epoch": 0.7812014052871048, "grad_norm": 0.25676408410072327, "learning_rate": 6.935687550686323e-06, "loss": 1.8588, "step": 3391 }, { "epoch": 0.7814317802223119, "grad_norm": 0.2935173511505127, "learning_rate": 6.921793191743043e-06, "loss": 1.7831, "step": 3392 }, { "epoch": 0.7816621551575189, "grad_norm": 0.28663256764411926, "learning_rate": 6.907910527540382e-06, "loss": 1.8229, "step": 3393 }, { "epoch": 0.7818925300927259, "grad_norm": 0.28981101512908936, "learning_rate": 6.894039567059007e-06, "loss": 1.7694, "step": 3394 }, { "epoch": 0.7821229050279329, "grad_norm": 0.2793687880039215, "learning_rate": 6.880180319272006e-06, "loss": 1.7785, "step": 3395 }, { "epoch": 0.78235327996314, "grad_norm": 0.28933969140052795, "learning_rate": 6.86633279314488e-06, "loss": 1.8162, "step": 3396 }, { "epoch": 0.782583654898347, "grad_norm": 0.27542275190353394, "learning_rate": 6.852496997635563e-06, "loss": 1.7951, "step": 3397 }, { "epoch": 0.7828140298335541, "grad_norm": 0.2916474938392639, "learning_rate": 6.838672941694388e-06, "loss": 1.8054, "step": 3398 }, { "epoch": 0.7830444047687611, "grad_norm": 0.264604389667511, "learning_rate": 6.824860634264094e-06, "loss": 1.8234, "step": 3399 }, { "epoch": 0.7832747797039682, "grad_norm": 0.27910399436950684, "learning_rate": 6.811060084279827e-06, "loss": 1.8142, "step": 3400 }, { "epoch": 0.7835051546391752, "grad_norm": 0.28293663263320923, "learning_rate": 6.797271300669123e-06, "loss": 1.7969, "step": 3401 }, { "epoch": 0.7837355295743823, "grad_norm": 0.2689932882785797, "learning_rate": 6.783494292351908e-06, "loss": 1.7829, "step": 3402 }, { "epoch": 0.7839659045095894, "grad_norm": 0.26886123418807983, "learning_rate": 6.769729068240488e-06, "loss": 1.7946, "step": 3403 }, { "epoch": 0.7841962794447964, "grad_norm": 0.3860701620578766, "learning_rate": 6.7559756372395475e-06, "loss": 1.8091, "step": 3404 }, { "epoch": 0.7844266543800035, "grad_norm": 0.2870180308818817, "learning_rate": 6.742234008246143e-06, "loss": 1.776, "step": 3405 }, { "epoch": 0.7846570293152105, "grad_norm": 0.2999970018863678, "learning_rate": 6.7285041901496955e-06, "loss": 1.7466, "step": 3406 }, { "epoch": 0.7848874042504176, "grad_norm": 0.2578742504119873, "learning_rate": 6.714786191831985e-06, "loss": 1.8276, "step": 3407 }, { "epoch": 0.7851177791856246, "grad_norm": 0.2816501557826996, "learning_rate": 6.7010800221671415e-06, "loss": 1.8142, "step": 3408 }, { "epoch": 0.7853481541208317, "grad_norm": 0.29682818055152893, "learning_rate": 6.687385690021653e-06, "loss": 1.7948, "step": 3409 }, { "epoch": 0.7855785290560388, "grad_norm": 0.2863454222679138, "learning_rate": 6.673703204254347e-06, "loss": 1.7535, "step": 3410 }, { "epoch": 0.7858089039912457, "grad_norm": 0.27233752608299255, "learning_rate": 6.660032573716368e-06, "loss": 1.7678, "step": 3411 }, { "epoch": 0.7860392789264528, "grad_norm": 0.29395270347595215, "learning_rate": 6.646373807251227e-06, "loss": 1.7849, "step": 3412 }, { "epoch": 0.7862696538616598, "grad_norm": 0.2767411172389984, "learning_rate": 6.6327269136947395e-06, "loss": 1.7864, "step": 3413 }, { "epoch": 0.7865000287968669, "grad_norm": 0.2986549735069275, "learning_rate": 6.6190919018750215e-06, "loss": 1.7515, "step": 3414 }, { "epoch": 0.7867304037320739, "grad_norm": 0.28480100631713867, "learning_rate": 6.605468780612553e-06, "loss": 1.8344, "step": 3415 }, { "epoch": 0.786960778667281, "grad_norm": 0.27834028005599976, "learning_rate": 6.591857558720071e-06, "loss": 1.7628, "step": 3416 }, { "epoch": 0.787191153602488, "grad_norm": 0.27140113711357117, "learning_rate": 6.578258245002639e-06, "loss": 1.8179, "step": 3417 }, { "epoch": 0.7874215285376951, "grad_norm": 0.2886875867843628, "learning_rate": 6.564670848257629e-06, "loss": 1.7915, "step": 3418 }, { "epoch": 0.7876519034729021, "grad_norm": 0.28771430253982544, "learning_rate": 6.551095377274671e-06, "loss": 1.7993, "step": 3419 }, { "epoch": 0.7878822784081092, "grad_norm": 0.313534677028656, "learning_rate": 6.537531840835703e-06, "loss": 1.8156, "step": 3420 }, { "epoch": 0.7881126533433163, "grad_norm": 0.26439425349235535, "learning_rate": 6.523980247714953e-06, "loss": 1.7989, "step": 3421 }, { "epoch": 0.7883430282785233, "grad_norm": 0.3003896474838257, "learning_rate": 6.5104406066788915e-06, "loss": 1.7709, "step": 3422 }, { "epoch": 0.7885734032137304, "grad_norm": 0.29823043942451477, "learning_rate": 6.496912926486279e-06, "loss": 1.7885, "step": 3423 }, { "epoch": 0.7888037781489374, "grad_norm": 0.27826204895973206, "learning_rate": 6.483397215888135e-06, "loss": 1.7992, "step": 3424 }, { "epoch": 0.7890341530841445, "grad_norm": 0.25662243366241455, "learning_rate": 6.469893483627728e-06, "loss": 1.8208, "step": 3425 }, { "epoch": 0.7892645280193515, "grad_norm": 0.2880914509296417, "learning_rate": 6.4564017384405925e-06, "loss": 1.7587, "step": 3426 }, { "epoch": 0.7894949029545586, "grad_norm": 0.3276144862174988, "learning_rate": 6.442921989054495e-06, "loss": 1.7731, "step": 3427 }, { "epoch": 0.7897252778897655, "grad_norm": 0.2969507873058319, "learning_rate": 6.42945424418945e-06, "loss": 1.7995, "step": 3428 }, { "epoch": 0.7899556528249726, "grad_norm": 0.2581486105918884, "learning_rate": 6.415998512557702e-06, "loss": 1.8401, "step": 3429 }, { "epoch": 0.7901860277601797, "grad_norm": 0.298555463552475, "learning_rate": 6.402554802863725e-06, "loss": 1.7972, "step": 3430 }, { "epoch": 0.7904164026953867, "grad_norm": 0.2843739688396454, "learning_rate": 6.389123123804217e-06, "loss": 1.8142, "step": 3431 }, { "epoch": 0.7906467776305938, "grad_norm": 0.2773067355155945, "learning_rate": 6.375703484068093e-06, "loss": 1.8003, "step": 3432 }, { "epoch": 0.7908771525658008, "grad_norm": 0.27110910415649414, "learning_rate": 6.362295892336481e-06, "loss": 1.7888, "step": 3433 }, { "epoch": 0.7911075275010079, "grad_norm": 0.29920676350593567, "learning_rate": 6.348900357282719e-06, "loss": 1.7722, "step": 3434 }, { "epoch": 0.7913379024362149, "grad_norm": 0.2696082890033722, "learning_rate": 6.335516887572321e-06, "loss": 1.7981, "step": 3435 }, { "epoch": 0.791568277371422, "grad_norm": 0.2669649124145508, "learning_rate": 6.322145491863035e-06, "loss": 1.7587, "step": 3436 }, { "epoch": 0.791798652306629, "grad_norm": 0.3163840174674988, "learning_rate": 6.308786178804782e-06, "loss": 1.7807, "step": 3437 }, { "epoch": 0.7920290272418361, "grad_norm": 0.2642984092235565, "learning_rate": 6.29543895703964e-06, "loss": 1.7657, "step": 3438 }, { "epoch": 0.7922594021770432, "grad_norm": 0.2544874846935272, "learning_rate": 6.2821038352019166e-06, "loss": 1.799, "step": 3439 }, { "epoch": 0.7924897771122502, "grad_norm": 0.2807328701019287, "learning_rate": 6.268780821918044e-06, "loss": 1.7938, "step": 3440 }, { "epoch": 0.7927201520474573, "grad_norm": 0.2601803243160248, "learning_rate": 6.255469925806643e-06, "loss": 1.7933, "step": 3441 }, { "epoch": 0.7929505269826643, "grad_norm": 0.2633059024810791, "learning_rate": 6.242171155478516e-06, "loss": 1.8266, "step": 3442 }, { "epoch": 0.7931809019178714, "grad_norm": 0.2758152186870575, "learning_rate": 6.228884519536573e-06, "loss": 1.8042, "step": 3443 }, { "epoch": 0.7934112768530784, "grad_norm": 0.3228265941143036, "learning_rate": 6.215610026575916e-06, "loss": 1.7856, "step": 3444 }, { "epoch": 0.7936416517882854, "grad_norm": 0.263219952583313, "learning_rate": 6.2023476851837705e-06, "loss": 1.8489, "step": 3445 }, { "epoch": 0.7938720267234924, "grad_norm": 0.2712760269641876, "learning_rate": 6.189097503939509e-06, "loss": 1.8067, "step": 3446 }, { "epoch": 0.7941024016586995, "grad_norm": 0.2982138395309448, "learning_rate": 6.17585949141464e-06, "loss": 1.7629, "step": 3447 }, { "epoch": 0.7943327765939066, "grad_norm": 0.26466450095176697, "learning_rate": 6.162633656172789e-06, "loss": 1.8166, "step": 3448 }, { "epoch": 0.7945631515291136, "grad_norm": 0.26861143112182617, "learning_rate": 6.149420006769718e-06, "loss": 1.8402, "step": 3449 }, { "epoch": 0.7947935264643207, "grad_norm": 0.27286383509635925, "learning_rate": 6.136218551753298e-06, "loss": 1.8167, "step": 3450 }, { "epoch": 0.7950239013995277, "grad_norm": 0.2953748106956482, "learning_rate": 6.12302929966351e-06, "loss": 1.7849, "step": 3451 }, { "epoch": 0.7952542763347348, "grad_norm": 0.29501572251319885, "learning_rate": 6.109852259032445e-06, "loss": 1.7656, "step": 3452 }, { "epoch": 0.7954846512699418, "grad_norm": 0.2747545540332794, "learning_rate": 6.096687438384294e-06, "loss": 1.7968, "step": 3453 }, { "epoch": 0.7957150262051489, "grad_norm": 0.2890208065509796, "learning_rate": 6.083534846235342e-06, "loss": 1.7841, "step": 3454 }, { "epoch": 0.795945401140356, "grad_norm": 0.273743212223053, "learning_rate": 6.070394491093961e-06, "loss": 1.7877, "step": 3455 }, { "epoch": 0.796175776075563, "grad_norm": 0.2798789441585541, "learning_rate": 6.057266381460611e-06, "loss": 1.7586, "step": 3456 }, { "epoch": 0.7964061510107701, "grad_norm": 0.28093093633651733, "learning_rate": 6.0441505258278295e-06, "loss": 1.8267, "step": 3457 }, { "epoch": 0.7966365259459771, "grad_norm": 0.3139149248600006, "learning_rate": 6.031046932680229e-06, "loss": 1.8203, "step": 3458 }, { "epoch": 0.7968669008811842, "grad_norm": 0.24249149858951569, "learning_rate": 6.01795561049448e-06, "loss": 1.8307, "step": 3459 }, { "epoch": 0.7970972758163912, "grad_norm": 0.275146484375, "learning_rate": 6.0048765677393345e-06, "loss": 1.7769, "step": 3460 }, { "epoch": 0.7973276507515982, "grad_norm": 0.2719727158546448, "learning_rate": 5.991809812875562e-06, "loss": 1.7763, "step": 3461 }, { "epoch": 0.7975580256868052, "grad_norm": 0.27837929129600525, "learning_rate": 5.978755354356033e-06, "loss": 1.8483, "step": 3462 }, { "epoch": 0.7977884006220123, "grad_norm": 0.26192572712898254, "learning_rate": 5.965713200625641e-06, "loss": 1.8255, "step": 3463 }, { "epoch": 0.7980187755572193, "grad_norm": 0.2585604786872864, "learning_rate": 5.952683360121297e-06, "loss": 1.7955, "step": 3464 }, { "epoch": 0.7982491504924264, "grad_norm": 0.28838756680488586, "learning_rate": 5.939665841271985e-06, "loss": 1.7764, "step": 3465 }, { "epoch": 0.7984795254276335, "grad_norm": 0.2741096615791321, "learning_rate": 5.926660652498705e-06, "loss": 1.8487, "step": 3466 }, { "epoch": 0.7987099003628405, "grad_norm": 0.2674960494041443, "learning_rate": 5.9136678022144566e-06, "loss": 1.8042, "step": 3467 }, { "epoch": 0.7989402752980476, "grad_norm": 0.2581718862056732, "learning_rate": 5.900687298824301e-06, "loss": 1.7955, "step": 3468 }, { "epoch": 0.7991706502332546, "grad_norm": 0.273858904838562, "learning_rate": 5.887719150725268e-06, "loss": 1.7988, "step": 3469 }, { "epoch": 0.7994010251684617, "grad_norm": 0.26049962639808655, "learning_rate": 5.874763366306421e-06, "loss": 1.8035, "step": 3470 }, { "epoch": 0.7996314001036687, "grad_norm": 0.33755922317504883, "learning_rate": 5.8618199539488355e-06, "loss": 1.7724, "step": 3471 }, { "epoch": 0.7998617750388758, "grad_norm": 0.26017874479293823, "learning_rate": 5.848888922025553e-06, "loss": 1.8374, "step": 3472 }, { "epoch": 0.8000921499740828, "grad_norm": 0.26684948801994324, "learning_rate": 5.835970278901617e-06, "loss": 1.7996, "step": 3473 }, { "epoch": 0.8003225249092899, "grad_norm": 0.27526217699050903, "learning_rate": 5.8230640329340835e-06, "loss": 1.8084, "step": 3474 }, { "epoch": 0.800552899844497, "grad_norm": 0.28736305236816406, "learning_rate": 5.810170192471942e-06, "loss": 1.8053, "step": 3475 }, { "epoch": 0.800783274779704, "grad_norm": 0.2809104919433594, "learning_rate": 5.7972887658561955e-06, "loss": 1.7898, "step": 3476 }, { "epoch": 0.8010136497149111, "grad_norm": 0.2516775131225586, "learning_rate": 5.7844197614197955e-06, "loss": 1.8101, "step": 3477 }, { "epoch": 0.801244024650118, "grad_norm": 0.27304136753082275, "learning_rate": 5.771563187487669e-06, "loss": 1.7978, "step": 3478 }, { "epoch": 0.8014743995853251, "grad_norm": 0.27610763907432556, "learning_rate": 5.758719052376693e-06, "loss": 1.7801, "step": 3479 }, { "epoch": 0.8017047745205321, "grad_norm": 0.28617948293685913, "learning_rate": 5.745887364395708e-06, "loss": 1.7978, "step": 3480 }, { "epoch": 0.8019351494557392, "grad_norm": 0.2868005037307739, "learning_rate": 5.7330681318454955e-06, "loss": 1.7872, "step": 3481 }, { "epoch": 0.8021655243909462, "grad_norm": 0.2709314525127411, "learning_rate": 5.720261363018767e-06, "loss": 1.7681, "step": 3482 }, { "epoch": 0.8023958993261533, "grad_norm": 0.27496281266212463, "learning_rate": 5.7074670662002e-06, "loss": 1.773, "step": 3483 }, { "epoch": 0.8026262742613604, "grad_norm": 0.27368250489234924, "learning_rate": 5.694685249666396e-06, "loss": 1.802, "step": 3484 }, { "epoch": 0.8028566491965674, "grad_norm": 0.2723373770713806, "learning_rate": 5.681915921685846e-06, "loss": 1.8037, "step": 3485 }, { "epoch": 0.8030870241317745, "grad_norm": 0.2968026101589203, "learning_rate": 5.669159090519019e-06, "loss": 1.7684, "step": 3486 }, { "epoch": 0.8033173990669815, "grad_norm": 0.28185680508613586, "learning_rate": 5.6564147644182695e-06, "loss": 1.7761, "step": 3487 }, { "epoch": 0.8035477740021886, "grad_norm": 0.2685316205024719, "learning_rate": 5.643682951627849e-06, "loss": 1.8039, "step": 3488 }, { "epoch": 0.8037781489373956, "grad_norm": 0.283648818731308, "learning_rate": 5.630963660383953e-06, "loss": 1.8129, "step": 3489 }, { "epoch": 0.8040085238726027, "grad_norm": 0.2826639413833618, "learning_rate": 5.6182568989146425e-06, "loss": 1.7591, "step": 3490 }, { "epoch": 0.8042388988078097, "grad_norm": 0.2617439329624176, "learning_rate": 5.6055626754398765e-06, "loss": 1.8286, "step": 3491 }, { "epoch": 0.8044692737430168, "grad_norm": 0.2888737916946411, "learning_rate": 5.592880998171537e-06, "loss": 1.7915, "step": 3492 }, { "epoch": 0.8046996486782239, "grad_norm": 0.2696157395839691, "learning_rate": 5.580211875313346e-06, "loss": 1.7618, "step": 3493 }, { "epoch": 0.8049300236134309, "grad_norm": 0.26172545552253723, "learning_rate": 5.567555315060918e-06, "loss": 1.8323, "step": 3494 }, { "epoch": 0.8051603985486379, "grad_norm": 0.27845850586891174, "learning_rate": 5.554911325601772e-06, "loss": 1.8557, "step": 3495 }, { "epoch": 0.8053907734838449, "grad_norm": 0.27843233942985535, "learning_rate": 5.542279915115245e-06, "loss": 1.7718, "step": 3496 }, { "epoch": 0.805621148419052, "grad_norm": 0.2635432183742523, "learning_rate": 5.52966109177257e-06, "loss": 1.7853, "step": 3497 }, { "epoch": 0.805851523354259, "grad_norm": 0.25862473249435425, "learning_rate": 5.517054863736826e-06, "loss": 1.8104, "step": 3498 }, { "epoch": 0.8060818982894661, "grad_norm": 0.2593885362148285, "learning_rate": 5.504461239162945e-06, "loss": 1.8091, "step": 3499 }, { "epoch": 0.8063122732246731, "grad_norm": 0.28729915618896484, "learning_rate": 5.491880226197707e-06, "loss": 1.7877, "step": 3500 }, { "epoch": 0.8065426481598802, "grad_norm": 0.2921610474586487, "learning_rate": 5.479311832979736e-06, "loss": 1.7869, "step": 3501 }, { "epoch": 0.8067730230950872, "grad_norm": 0.27234649658203125, "learning_rate": 5.4667560676394894e-06, "loss": 1.7769, "step": 3502 }, { "epoch": 0.8070033980302943, "grad_norm": 0.28467509150505066, "learning_rate": 5.454212938299255e-06, "loss": 1.7913, "step": 3503 }, { "epoch": 0.8072337729655014, "grad_norm": 0.4559415578842163, "learning_rate": 5.4416824530731495e-06, "loss": 1.7576, "step": 3504 }, { "epoch": 0.8074641479007084, "grad_norm": 0.27168145775794983, "learning_rate": 5.429164620067107e-06, "loss": 1.8105, "step": 3505 }, { "epoch": 0.8076945228359155, "grad_norm": 0.26982632279396057, "learning_rate": 5.4166594473788746e-06, "loss": 1.7856, "step": 3506 }, { "epoch": 0.8079248977711225, "grad_norm": 0.2898307740688324, "learning_rate": 5.4041669430980186e-06, "loss": 1.7835, "step": 3507 }, { "epoch": 0.8081552727063296, "grad_norm": 0.2863353490829468, "learning_rate": 5.391687115305902e-06, "loss": 1.7605, "step": 3508 }, { "epoch": 0.8083856476415366, "grad_norm": 0.2706702947616577, "learning_rate": 5.379219972075691e-06, "loss": 1.7893, "step": 3509 }, { "epoch": 0.8086160225767437, "grad_norm": 0.2870020866394043, "learning_rate": 5.36676552147235e-06, "loss": 1.7913, "step": 3510 }, { "epoch": 0.8088463975119508, "grad_norm": 0.27112671732902527, "learning_rate": 5.354323771552608e-06, "loss": 1.7973, "step": 3511 }, { "epoch": 0.8090767724471577, "grad_norm": 0.3017749488353729, "learning_rate": 5.3418947303650185e-06, "loss": 1.7121, "step": 3512 }, { "epoch": 0.8093071473823648, "grad_norm": 0.26315397024154663, "learning_rate": 5.329478405949892e-06, "loss": 1.8084, "step": 3513 }, { "epoch": 0.8095375223175718, "grad_norm": 0.2657680809497833, "learning_rate": 5.317074806339295e-06, "loss": 1.7422, "step": 3514 }, { "epoch": 0.8097678972527789, "grad_norm": 0.2675033509731293, "learning_rate": 5.304683939557101e-06, "loss": 1.8466, "step": 3515 }, { "epoch": 0.8099982721879859, "grad_norm": 0.3293367028236389, "learning_rate": 5.292305813618925e-06, "loss": 1.7605, "step": 3516 }, { "epoch": 0.810228647123193, "grad_norm": 0.27591976523399353, "learning_rate": 5.279940436532121e-06, "loss": 1.7898, "step": 3517 }, { "epoch": 0.8104590220584, "grad_norm": 0.26193249225616455, "learning_rate": 5.267587816295847e-06, "loss": 1.8081, "step": 3518 }, { "epoch": 0.8106893969936071, "grad_norm": 0.2728120684623718, "learning_rate": 5.255247960900952e-06, "loss": 1.762, "step": 3519 }, { "epoch": 0.8109197719288141, "grad_norm": 0.263254314661026, "learning_rate": 5.24292087833006e-06, "loss": 1.8102, "step": 3520 }, { "epoch": 0.8111501468640212, "grad_norm": 0.28718048334121704, "learning_rate": 5.23060657655754e-06, "loss": 1.7552, "step": 3521 }, { "epoch": 0.8113805217992283, "grad_norm": 0.3025498688220978, "learning_rate": 5.218305063549461e-06, "loss": 1.7637, "step": 3522 }, { "epoch": 0.8116108967344353, "grad_norm": 0.2588651776313782, "learning_rate": 5.206016347263637e-06, "loss": 1.8372, "step": 3523 }, { "epoch": 0.8118412716696424, "grad_norm": 0.27280688285827637, "learning_rate": 5.193740435649622e-06, "loss": 1.7831, "step": 3524 }, { "epoch": 0.8120716466048494, "grad_norm": 0.31454867124557495, "learning_rate": 5.181477336648652e-06, "loss": 1.7738, "step": 3525 }, { "epoch": 0.8123020215400565, "grad_norm": 0.31411510705947876, "learning_rate": 5.169227058193693e-06, "loss": 1.7812, "step": 3526 }, { "epoch": 0.8125323964752635, "grad_norm": 0.3060874342918396, "learning_rate": 5.15698960820942e-06, "loss": 1.7939, "step": 3527 }, { "epoch": 0.8127627714104705, "grad_norm": 2.180885076522827, "learning_rate": 5.1447649946122e-06, "loss": 1.7747, "step": 3528 }, { "epoch": 0.8129931463456775, "grad_norm": 0.2771254777908325, "learning_rate": 5.132553225310105e-06, "loss": 1.8114, "step": 3529 }, { "epoch": 0.8132235212808846, "grad_norm": 0.27123016119003296, "learning_rate": 5.120354308202893e-06, "loss": 1.8197, "step": 3530 }, { "epoch": 0.8134538962160917, "grad_norm": 2.0324180126190186, "learning_rate": 5.108168251182005e-06, "loss": 1.8834, "step": 3531 }, { "epoch": 0.8136842711512987, "grad_norm": 0.273646742105484, "learning_rate": 5.095995062130571e-06, "loss": 1.8163, "step": 3532 }, { "epoch": 0.8139146460865058, "grad_norm": 0.2800276577472687, "learning_rate": 5.083834748923391e-06, "loss": 1.8342, "step": 3533 }, { "epoch": 0.8141450210217128, "grad_norm": 0.31380462646484375, "learning_rate": 5.071687319426946e-06, "loss": 1.8157, "step": 3534 }, { "epoch": 0.8143753959569199, "grad_norm": 0.2733329236507416, "learning_rate": 5.059552781499349e-06, "loss": 1.8056, "step": 3535 }, { "epoch": 0.8146057708921269, "grad_norm": 0.27243363857269287, "learning_rate": 5.047431142990419e-06, "loss": 1.8185, "step": 3536 }, { "epoch": 0.814836145827334, "grad_norm": 0.2654930055141449, "learning_rate": 5.035322411741614e-06, "loss": 1.8164, "step": 3537 }, { "epoch": 0.815066520762541, "grad_norm": 0.26967835426330566, "learning_rate": 5.023226595586012e-06, "loss": 1.7987, "step": 3538 }, { "epoch": 0.8152968956977481, "grad_norm": 0.281098335981369, "learning_rate": 5.011143702348387e-06, "loss": 1.8027, "step": 3539 }, { "epoch": 0.8155272706329552, "grad_norm": 0.2970602810382843, "learning_rate": 4.999073739845125e-06, "loss": 1.7777, "step": 3540 }, { "epoch": 0.8157576455681622, "grad_norm": 0.27297642827033997, "learning_rate": 4.9870167158842325e-06, "loss": 1.8004, "step": 3541 }, { "epoch": 0.8159880205033693, "grad_norm": 0.3156970739364624, "learning_rate": 4.9749726382653905e-06, "loss": 1.7387, "step": 3542 }, { "epoch": 0.8162183954385763, "grad_norm": 0.28037795424461365, "learning_rate": 4.9629415147798564e-06, "loss": 1.799, "step": 3543 }, { "epoch": 0.8164487703737834, "grad_norm": 0.32190442085266113, "learning_rate": 4.950923353210532e-06, "loss": 1.7838, "step": 3544 }, { "epoch": 0.8166791453089903, "grad_norm": 0.27351251244544983, "learning_rate": 4.938918161331951e-06, "loss": 1.7672, "step": 3545 }, { "epoch": 0.8169095202441974, "grad_norm": 0.2636055052280426, "learning_rate": 4.926925946910218e-06, "loss": 1.8429, "step": 3546 }, { "epoch": 0.8171398951794044, "grad_norm": 0.2881084680557251, "learning_rate": 4.91494671770307e-06, "loss": 1.7319, "step": 3547 }, { "epoch": 0.8173702701146115, "grad_norm": 0.371296763420105, "learning_rate": 4.902980481459835e-06, "loss": 1.823, "step": 3548 }, { "epoch": 0.8176006450498186, "grad_norm": 0.258992075920105, "learning_rate": 4.89102724592144e-06, "loss": 1.7879, "step": 3549 }, { "epoch": 0.8178310199850256, "grad_norm": 0.2720634937286377, "learning_rate": 4.879087018820394e-06, "loss": 1.7411, "step": 3550 }, { "epoch": 0.8180613949202327, "grad_norm": 0.2763260304927826, "learning_rate": 4.8671598078808e-06, "loss": 1.8257, "step": 3551 }, { "epoch": 0.8182917698554397, "grad_norm": 0.3090289235115051, "learning_rate": 4.8552456208183384e-06, "loss": 1.7759, "step": 3552 }, { "epoch": 0.8185221447906468, "grad_norm": 0.2736424207687378, "learning_rate": 4.843344465340258e-06, "loss": 1.7557, "step": 3553 }, { "epoch": 0.8187525197258538, "grad_norm": 0.2622818350791931, "learning_rate": 4.831456349145386e-06, "loss": 1.8118, "step": 3554 }, { "epoch": 0.8189828946610609, "grad_norm": 0.2572660446166992, "learning_rate": 4.8195812799241095e-06, "loss": 1.819, "step": 3555 }, { "epoch": 0.819213269596268, "grad_norm": 0.280509352684021, "learning_rate": 4.807719265358377e-06, "loss": 1.7887, "step": 3556 }, { "epoch": 0.819443644531475, "grad_norm": 0.2571720480918884, "learning_rate": 4.795870313121692e-06, "loss": 1.8458, "step": 3557 }, { "epoch": 0.8196740194666821, "grad_norm": 0.2776300013065338, "learning_rate": 4.784034430879108e-06, "loss": 1.7794, "step": 3558 }, { "epoch": 0.8199043944018891, "grad_norm": 0.27416473627090454, "learning_rate": 4.772211626287227e-06, "loss": 1.7969, "step": 3559 }, { "epoch": 0.8201347693370962, "grad_norm": 0.2527570426464081, "learning_rate": 4.760401906994183e-06, "loss": 1.7795, "step": 3560 }, { "epoch": 0.8203651442723032, "grad_norm": 0.2684236168861389, "learning_rate": 4.748605280639651e-06, "loss": 1.8356, "step": 3561 }, { "epoch": 0.8205955192075102, "grad_norm": 0.2777908742427826, "learning_rate": 4.736821754854837e-06, "loss": 1.7585, "step": 3562 }, { "epoch": 0.8208258941427172, "grad_norm": 0.2791120409965515, "learning_rate": 4.725051337262476e-06, "loss": 1.7574, "step": 3563 }, { "epoch": 0.8210562690779243, "grad_norm": 0.28325480222702026, "learning_rate": 4.713294035476798e-06, "loss": 1.8304, "step": 3564 }, { "epoch": 0.8212866440131313, "grad_norm": 0.26787465810775757, "learning_rate": 4.701549857103588e-06, "loss": 1.8111, "step": 3565 }, { "epoch": 0.8215170189483384, "grad_norm": 0.27594825625419617, "learning_rate": 4.689818809740118e-06, "loss": 1.8527, "step": 3566 }, { "epoch": 0.8217473938835455, "grad_norm": 0.27075332403182983, "learning_rate": 4.678100900975155e-06, "loss": 1.8228, "step": 3567 }, { "epoch": 0.8219777688187525, "grad_norm": 0.3519812226295471, "learning_rate": 4.666396138388998e-06, "loss": 1.7939, "step": 3568 }, { "epoch": 0.8222081437539596, "grad_norm": 0.29282745718955994, "learning_rate": 4.6547045295534245e-06, "loss": 1.7342, "step": 3569 }, { "epoch": 0.8224385186891666, "grad_norm": 0.2600977420806885, "learning_rate": 4.643026082031685e-06, "loss": 1.8239, "step": 3570 }, { "epoch": 0.8226688936243737, "grad_norm": 0.27177929878234863, "learning_rate": 4.6313608033785564e-06, "loss": 1.78, "step": 3571 }, { "epoch": 0.8228992685595807, "grad_norm": 0.2770504057407379, "learning_rate": 4.61970870114026e-06, "loss": 1.7824, "step": 3572 }, { "epoch": 0.8231296434947878, "grad_norm": 0.29904240369796753, "learning_rate": 4.6080697828545045e-06, "loss": 1.7843, "step": 3573 }, { "epoch": 0.8233600184299948, "grad_norm": 0.4818657338619232, "learning_rate": 4.596444056050492e-06, "loss": 1.8135, "step": 3574 }, { "epoch": 0.8235903933652019, "grad_norm": 0.2769756019115448, "learning_rate": 4.584831528248856e-06, "loss": 1.8084, "step": 3575 }, { "epoch": 0.823820768300409, "grad_norm": 0.2978156507015228, "learning_rate": 4.573232206961708e-06, "loss": 1.7796, "step": 3576 }, { "epoch": 0.824051143235616, "grad_norm": 0.31753966212272644, "learning_rate": 4.561646099692623e-06, "loss": 1.7663, "step": 3577 }, { "epoch": 0.8242815181708231, "grad_norm": 0.2788325250148773, "learning_rate": 4.550073213936618e-06, "loss": 1.8431, "step": 3578 }, { "epoch": 0.82451189310603, "grad_norm": 0.26003551483154297, "learning_rate": 4.53851355718016e-06, "loss": 1.7877, "step": 3579 }, { "epoch": 0.8247422680412371, "grad_norm": 0.24495497345924377, "learning_rate": 4.526967136901156e-06, "loss": 1.7919, "step": 3580 }, { "epoch": 0.8249726429764441, "grad_norm": 0.33425742387771606, "learning_rate": 4.5154339605689575e-06, "loss": 1.7784, "step": 3581 }, { "epoch": 0.8252030179116512, "grad_norm": 0.26591095328330994, "learning_rate": 4.503914035644336e-06, "loss": 1.7964, "step": 3582 }, { "epoch": 0.8254333928468582, "grad_norm": 0.2849682867527008, "learning_rate": 4.492407369579505e-06, "loss": 1.7652, "step": 3583 }, { "epoch": 0.8256637677820653, "grad_norm": 0.4519117772579193, "learning_rate": 4.480913969818098e-06, "loss": 1.8168, "step": 3584 }, { "epoch": 0.8258941427172724, "grad_norm": 0.2921490967273712, "learning_rate": 4.469433843795143e-06, "loss": 1.7582, "step": 3585 }, { "epoch": 0.8261245176524794, "grad_norm": 0.27138829231262207, "learning_rate": 4.457966998937121e-06, "loss": 1.7926, "step": 3586 }, { "epoch": 0.8263548925876865, "grad_norm": 0.26565200090408325, "learning_rate": 4.446513442661904e-06, "loss": 1.7895, "step": 3587 }, { "epoch": 0.8265852675228935, "grad_norm": 0.26570242643356323, "learning_rate": 4.435073182378738e-06, "loss": 1.808, "step": 3588 }, { "epoch": 0.8268156424581006, "grad_norm": 0.2859266698360443, "learning_rate": 4.42364622548832e-06, "loss": 1.8057, "step": 3589 }, { "epoch": 0.8270460173933076, "grad_norm": 0.2683204114437103, "learning_rate": 4.412232579382716e-06, "loss": 1.8127, "step": 3590 }, { "epoch": 0.8272763923285147, "grad_norm": 0.2804393172264099, "learning_rate": 4.400832251445361e-06, "loss": 1.7937, "step": 3591 }, { "epoch": 0.8275067672637217, "grad_norm": 0.29413899779319763, "learning_rate": 4.389445249051122e-06, "loss": 1.7518, "step": 3592 }, { "epoch": 0.8277371421989288, "grad_norm": 0.3078686594963074, "learning_rate": 4.378071579566195e-06, "loss": 1.7117, "step": 3593 }, { "epoch": 0.8279675171341359, "grad_norm": 0.3012341558933258, "learning_rate": 4.366711250348176e-06, "loss": 1.7737, "step": 3594 }, { "epoch": 0.8281978920693428, "grad_norm": 0.2690119743347168, "learning_rate": 4.355364268746051e-06, "loss": 1.7968, "step": 3595 }, { "epoch": 0.8284282670045499, "grad_norm": 0.286795049905777, "learning_rate": 4.344030642100133e-06, "loss": 1.753, "step": 3596 }, { "epoch": 0.8286586419397569, "grad_norm": 0.2749379277229309, "learning_rate": 4.332710377742105e-06, "loss": 1.7909, "step": 3597 }, { "epoch": 0.828889016874964, "grad_norm": 0.2630249559879303, "learning_rate": 4.32140348299504e-06, "loss": 1.8208, "step": 3598 }, { "epoch": 0.829119391810171, "grad_norm": 0.24609282612800598, "learning_rate": 4.310109965173317e-06, "loss": 1.79, "step": 3599 }, { "epoch": 0.8293497667453781, "grad_norm": 0.28737586736679077, "learning_rate": 4.298829831582682e-06, "loss": 1.7828, "step": 3600 }, { "epoch": 0.8295801416805851, "grad_norm": 0.26191189885139465, "learning_rate": 4.2875630895202244e-06, "loss": 1.8025, "step": 3601 }, { "epoch": 0.8298105166157922, "grad_norm": 0.2742292582988739, "learning_rate": 4.276309746274368e-06, "loss": 1.8371, "step": 3602 }, { "epoch": 0.8300408915509992, "grad_norm": 0.27158233523368835, "learning_rate": 4.265069809124866e-06, "loss": 1.7426, "step": 3603 }, { "epoch": 0.8302712664862063, "grad_norm": 0.31490814685821533, "learning_rate": 4.253843285342807e-06, "loss": 1.7382, "step": 3604 }, { "epoch": 0.8305016414214134, "grad_norm": 0.2625845968723297, "learning_rate": 4.242630182190594e-06, "loss": 1.7692, "step": 3605 }, { "epoch": 0.8307320163566204, "grad_norm": 0.30127033591270447, "learning_rate": 4.231430506921949e-06, "loss": 1.7434, "step": 3606 }, { "epoch": 0.8309623912918275, "grad_norm": 0.2626434862613678, "learning_rate": 4.220244266781914e-06, "loss": 1.8304, "step": 3607 }, { "epoch": 0.8311927662270345, "grad_norm": 0.26153576374053955, "learning_rate": 4.209071469006834e-06, "loss": 1.8005, "step": 3608 }, { "epoch": 0.8314231411622416, "grad_norm": 0.27199143171310425, "learning_rate": 4.197912120824363e-06, "loss": 1.7916, "step": 3609 }, { "epoch": 0.8316535160974486, "grad_norm": 0.31484249234199524, "learning_rate": 4.186766229453449e-06, "loss": 1.7253, "step": 3610 }, { "epoch": 0.8318838910326557, "grad_norm": 0.27929872274398804, "learning_rate": 4.1756338021043366e-06, "loss": 1.8055, "step": 3611 }, { "epoch": 0.8321142659678626, "grad_norm": 0.2989068031311035, "learning_rate": 4.164514845978562e-06, "loss": 1.7604, "step": 3612 }, { "epoch": 0.8323446409030697, "grad_norm": 0.2954268157482147, "learning_rate": 4.153409368268957e-06, "loss": 1.7647, "step": 3613 }, { "epoch": 0.8325750158382768, "grad_norm": 0.2558306157588959, "learning_rate": 4.142317376159599e-06, "loss": 1.7944, "step": 3614 }, { "epoch": 0.8328053907734838, "grad_norm": 0.29916396737098694, "learning_rate": 4.1312388768258854e-06, "loss": 1.7805, "step": 3615 }, { "epoch": 0.8330357657086909, "grad_norm": 0.2639215886592865, "learning_rate": 4.1201738774344686e-06, "loss": 1.7536, "step": 3616 }, { "epoch": 0.8332661406438979, "grad_norm": 0.2631280720233917, "learning_rate": 4.109122385143249e-06, "loss": 1.821, "step": 3617 }, { "epoch": 0.833496515579105, "grad_norm": 0.2773746848106384, "learning_rate": 4.098084407101418e-06, "loss": 1.7817, "step": 3618 }, { "epoch": 0.833726890514312, "grad_norm": 0.3012809455394745, "learning_rate": 4.087059950449415e-06, "loss": 1.7625, "step": 3619 }, { "epoch": 0.8339572654495191, "grad_norm": 0.3073260486125946, "learning_rate": 4.0760490223189144e-06, "loss": 1.7622, "step": 3620 }, { "epoch": 0.8341876403847261, "grad_norm": 0.27874812483787537, "learning_rate": 4.065051629832872e-06, "loss": 1.8097, "step": 3621 }, { "epoch": 0.8344180153199332, "grad_norm": 0.2632195055484772, "learning_rate": 4.0540677801054584e-06, "loss": 1.8313, "step": 3622 }, { "epoch": 0.8346483902551403, "grad_norm": 0.24968145787715912, "learning_rate": 4.043097480242089e-06, "loss": 1.8381, "step": 3623 }, { "epoch": 0.8348787651903473, "grad_norm": 0.3034682869911194, "learning_rate": 4.032140737339443e-06, "loss": 1.7391, "step": 3624 }, { "epoch": 0.8351091401255544, "grad_norm": 0.2941591739654541, "learning_rate": 4.021197558485384e-06, "loss": 1.7863, "step": 3625 }, { "epoch": 0.8353395150607614, "grad_norm": 0.30816978216171265, "learning_rate": 4.010267950759025e-06, "loss": 1.7622, "step": 3626 }, { "epoch": 0.8355698899959685, "grad_norm": 0.2800792455673218, "learning_rate": 3.9993519212307154e-06, "loss": 1.7367, "step": 3627 }, { "epoch": 0.8358002649311755, "grad_norm": 0.24096329510211945, "learning_rate": 3.988449476961989e-06, "loss": 1.8049, "step": 3628 }, { "epoch": 0.8360306398663825, "grad_norm": 0.2733074724674225, "learning_rate": 3.977560625005608e-06, "loss": 1.7876, "step": 3629 }, { "epoch": 0.8362610148015895, "grad_norm": 0.2731497883796692, "learning_rate": 3.96668537240554e-06, "loss": 1.7852, "step": 3630 }, { "epoch": 0.8364913897367966, "grad_norm": 0.25872042775154114, "learning_rate": 3.955823726196958e-06, "loss": 1.8133, "step": 3631 }, { "epoch": 0.8367217646720037, "grad_norm": 0.2559351325035095, "learning_rate": 3.944975693406228e-06, "loss": 1.8117, "step": 3632 }, { "epoch": 0.8369521396072107, "grad_norm": 0.2757044732570648, "learning_rate": 3.9341412810509095e-06, "loss": 1.8211, "step": 3633 }, { "epoch": 0.8371825145424178, "grad_norm": 0.27081868052482605, "learning_rate": 3.92332049613976e-06, "loss": 1.7612, "step": 3634 }, { "epoch": 0.8374128894776248, "grad_norm": 0.40294644236564636, "learning_rate": 3.912513345672705e-06, "loss": 1.8149, "step": 3635 }, { "epoch": 0.8376432644128319, "grad_norm": 0.27553966641426086, "learning_rate": 3.901719836640868e-06, "loss": 1.7834, "step": 3636 }, { "epoch": 0.8378736393480389, "grad_norm": 0.2753531038761139, "learning_rate": 3.890939976026542e-06, "loss": 1.8141, "step": 3637 }, { "epoch": 0.838104014283246, "grad_norm": 0.26963359117507935, "learning_rate": 3.880173770803169e-06, "loss": 1.7897, "step": 3638 }, { "epoch": 0.838334389218453, "grad_norm": 0.2746419906616211, "learning_rate": 3.869421227935396e-06, "loss": 1.7895, "step": 3639 }, { "epoch": 0.8385647641536601, "grad_norm": 0.2941720485687256, "learning_rate": 3.858682354379012e-06, "loss": 1.7812, "step": 3640 }, { "epoch": 0.8387951390888672, "grad_norm": 0.2740996778011322, "learning_rate": 3.847957157080945e-06, "loss": 1.7673, "step": 3641 }, { "epoch": 0.8390255140240742, "grad_norm": 0.29818686842918396, "learning_rate": 3.8372456429793205e-06, "loss": 1.7395, "step": 3642 }, { "epoch": 0.8392558889592813, "grad_norm": 0.2767641246318817, "learning_rate": 3.826547819003365e-06, "loss": 1.8131, "step": 3643 }, { "epoch": 0.8394862638944883, "grad_norm": 0.2708143889904022, "learning_rate": 3.815863692073476e-06, "loss": 1.8025, "step": 3644 }, { "epoch": 0.8397166388296954, "grad_norm": 0.2947722375392914, "learning_rate": 3.805193269101198e-06, "loss": 1.766, "step": 3645 }, { "epoch": 0.8399470137649023, "grad_norm": 0.26553866267204285, "learning_rate": 3.794536556989181e-06, "loss": 1.8266, "step": 3646 }, { "epoch": 0.8401773887001094, "grad_norm": 0.2684909403324127, "learning_rate": 3.7838935626312242e-06, "loss": 1.8186, "step": 3647 }, { "epoch": 0.8404077636353164, "grad_norm": 0.2840941846370697, "learning_rate": 3.773264292912265e-06, "loss": 1.8246, "step": 3648 }, { "epoch": 0.8406381385705235, "grad_norm": 0.2723708748817444, "learning_rate": 3.762648754708331e-06, "loss": 1.8085, "step": 3649 }, { "epoch": 0.8408685135057306, "grad_norm": 0.32510071992874146, "learning_rate": 3.7520469548865874e-06, "loss": 1.7639, "step": 3650 }, { "epoch": 0.8410988884409376, "grad_norm": 0.27469271421432495, "learning_rate": 3.741458900305314e-06, "loss": 1.7764, "step": 3651 }, { "epoch": 0.8413292633761447, "grad_norm": 0.2656390070915222, "learning_rate": 3.7308845978138886e-06, "loss": 1.7952, "step": 3652 }, { "epoch": 0.8415596383113517, "grad_norm": 0.24979951977729797, "learning_rate": 3.7203240542527995e-06, "loss": 1.8172, "step": 3653 }, { "epoch": 0.8417900132465588, "grad_norm": 0.2560117244720459, "learning_rate": 3.70977727645363e-06, "loss": 1.8032, "step": 3654 }, { "epoch": 0.8420203881817658, "grad_norm": 0.2569649815559387, "learning_rate": 3.6992442712390635e-06, "loss": 1.7922, "step": 3655 }, { "epoch": 0.8422507631169729, "grad_norm": 0.26875054836273193, "learning_rate": 3.688725045422867e-06, "loss": 1.7875, "step": 3656 }, { "epoch": 0.84248113805218, "grad_norm": 0.2919863164424896, "learning_rate": 3.6782196058099046e-06, "loss": 1.8051, "step": 3657 }, { "epoch": 0.842711512987387, "grad_norm": 0.2707894742488861, "learning_rate": 3.66772795919611e-06, "loss": 1.8246, "step": 3658 }, { "epoch": 0.8429418879225941, "grad_norm": 0.25136756896972656, "learning_rate": 3.6572501123685017e-06, "loss": 1.8201, "step": 3659 }, { "epoch": 0.8431722628578011, "grad_norm": 0.2710001766681671, "learning_rate": 3.6467860721051653e-06, "loss": 1.8211, "step": 3660 }, { "epoch": 0.8434026377930082, "grad_norm": 0.30544063448905945, "learning_rate": 3.636335845175265e-06, "loss": 1.7528, "step": 3661 }, { "epoch": 0.8436330127282151, "grad_norm": 0.273754358291626, "learning_rate": 3.62589943833902e-06, "loss": 1.7956, "step": 3662 }, { "epoch": 0.8438633876634222, "grad_norm": 0.2746427655220032, "learning_rate": 3.6154768583477105e-06, "loss": 1.7754, "step": 3663 }, { "epoch": 0.8440937625986292, "grad_norm": 0.28640180826187134, "learning_rate": 3.605068111943674e-06, "loss": 1.7431, "step": 3664 }, { "epoch": 0.8443241375338363, "grad_norm": 0.28516027331352234, "learning_rate": 3.5946732058603023e-06, "loss": 1.8307, "step": 3665 }, { "epoch": 0.8445545124690433, "grad_norm": 0.25588861107826233, "learning_rate": 3.584292146822035e-06, "loss": 1.8115, "step": 3666 }, { "epoch": 0.8447848874042504, "grad_norm": 0.29080790281295776, "learning_rate": 3.573924941544329e-06, "loss": 1.7755, "step": 3667 }, { "epoch": 0.8450152623394575, "grad_norm": 0.2754017412662506, "learning_rate": 3.5635715967337223e-06, "loss": 1.8003, "step": 3668 }, { "epoch": 0.8452456372746645, "grad_norm": 0.28465399146080017, "learning_rate": 3.5532321190877626e-06, "loss": 1.77, "step": 3669 }, { "epoch": 0.8454760122098716, "grad_norm": 0.27329763770103455, "learning_rate": 3.542906515295011e-06, "loss": 1.7444, "step": 3670 }, { "epoch": 0.8457063871450786, "grad_norm": 0.2587280571460724, "learning_rate": 3.5325947920350905e-06, "loss": 1.7937, "step": 3671 }, { "epoch": 0.8459367620802857, "grad_norm": 0.3950975239276886, "learning_rate": 3.5222969559786155e-06, "loss": 1.8335, "step": 3672 }, { "epoch": 0.8461671370154927, "grad_norm": 0.26442593336105347, "learning_rate": 3.5120130137872215e-06, "loss": 1.813, "step": 3673 }, { "epoch": 0.8463975119506998, "grad_norm": 0.25828850269317627, "learning_rate": 3.5017429721135807e-06, "loss": 1.8403, "step": 3674 }, { "epoch": 0.8466278868859068, "grad_norm": 0.25323888659477234, "learning_rate": 3.491486837601338e-06, "loss": 1.8073, "step": 3675 }, { "epoch": 0.8468582618211139, "grad_norm": 0.2531195282936096, "learning_rate": 3.48124461688516e-06, "loss": 1.8116, "step": 3676 }, { "epoch": 0.847088636756321, "grad_norm": 0.31017911434173584, "learning_rate": 3.4710163165907112e-06, "loss": 1.7792, "step": 3677 }, { "epoch": 0.847319011691528, "grad_norm": 0.30353865027427673, "learning_rate": 3.4608019433346516e-06, "loss": 1.7986, "step": 3678 }, { "epoch": 0.847549386626735, "grad_norm": 0.2732103168964386, "learning_rate": 3.450601503724632e-06, "loss": 1.8048, "step": 3679 }, { "epoch": 0.847779761561942, "grad_norm": 0.30000442266464233, "learning_rate": 3.440415004359282e-06, "loss": 1.7702, "step": 3680 }, { "epoch": 0.8480101364971491, "grad_norm": 0.2569717466831207, "learning_rate": 3.430242451828225e-06, "loss": 1.8257, "step": 3681 }, { "epoch": 0.8482405114323561, "grad_norm": 0.3164677917957306, "learning_rate": 3.4200838527120527e-06, "loss": 1.7728, "step": 3682 }, { "epoch": 0.8484708863675632, "grad_norm": 0.2936268448829651, "learning_rate": 3.4099392135823335e-06, "loss": 1.7841, "step": 3683 }, { "epoch": 0.8487012613027702, "grad_norm": 0.2603538930416107, "learning_rate": 3.399808541001609e-06, "loss": 1.7889, "step": 3684 }, { "epoch": 0.8489316362379773, "grad_norm": 0.28272396326065063, "learning_rate": 3.389691841523379e-06, "loss": 1.8264, "step": 3685 }, { "epoch": 0.8491620111731844, "grad_norm": 0.2784312963485718, "learning_rate": 3.3795891216921115e-06, "loss": 1.811, "step": 3686 }, { "epoch": 0.8493923861083914, "grad_norm": 0.545539915561676, "learning_rate": 3.3695003880432286e-06, "loss": 1.7995, "step": 3687 }, { "epoch": 0.8496227610435985, "grad_norm": 0.2769160568714142, "learning_rate": 3.3594256471030898e-06, "loss": 1.7668, "step": 3688 }, { "epoch": 0.8498531359788055, "grad_norm": 0.2583329379558563, "learning_rate": 3.3493649053890326e-06, "loss": 1.7773, "step": 3689 }, { "epoch": 0.8500835109140126, "grad_norm": 0.27910682559013367, "learning_rate": 3.3393181694093224e-06, "loss": 1.7782, "step": 3690 }, { "epoch": 0.8503138858492196, "grad_norm": 0.2692984342575073, "learning_rate": 3.329285445663147e-06, "loss": 1.8109, "step": 3691 }, { "epoch": 0.8505442607844267, "grad_norm": 0.26590093970298767, "learning_rate": 3.319266740640661e-06, "loss": 1.7887, "step": 3692 }, { "epoch": 0.8507746357196337, "grad_norm": 0.2758125364780426, "learning_rate": 3.30926206082294e-06, "loss": 1.8124, "step": 3693 }, { "epoch": 0.8510050106548408, "grad_norm": 0.2616296112537384, "learning_rate": 3.2992714126819644e-06, "loss": 1.773, "step": 3694 }, { "epoch": 0.8512353855900479, "grad_norm": 0.2843475639820099, "learning_rate": 3.2892948026806786e-06, "loss": 1.7605, "step": 3695 }, { "epoch": 0.8514657605252548, "grad_norm": 0.30261722207069397, "learning_rate": 3.2793322372729085e-06, "loss": 1.7957, "step": 3696 }, { "epoch": 0.8516961354604619, "grad_norm": 0.2686760127544403, "learning_rate": 3.2693837229034072e-06, "loss": 1.7705, "step": 3697 }, { "epoch": 0.8519265103956689, "grad_norm": 0.2894749343395233, "learning_rate": 3.2594492660078552e-06, "loss": 1.7601, "step": 3698 }, { "epoch": 0.852156885330876, "grad_norm": 0.26899003982543945, "learning_rate": 3.249528873012814e-06, "loss": 1.77, "step": 3699 }, { "epoch": 0.852387260266083, "grad_norm": 0.3376278281211853, "learning_rate": 3.239622550335755e-06, "loss": 1.7799, "step": 3700 }, { "epoch": 0.8526176352012901, "grad_norm": 0.24409641325473785, "learning_rate": 3.2297303043850565e-06, "loss": 1.8419, "step": 3701 }, { "epoch": 0.8528480101364971, "grad_norm": 0.29038649797439575, "learning_rate": 3.2198521415599843e-06, "loss": 1.768, "step": 3702 }, { "epoch": 0.8530783850717042, "grad_norm": 0.27788245677948, "learning_rate": 3.2099880682506884e-06, "loss": 1.8101, "step": 3703 }, { "epoch": 0.8533087600069112, "grad_norm": 0.3140372931957245, "learning_rate": 3.2001380908382174e-06, "loss": 1.7225, "step": 3704 }, { "epoch": 0.8535391349421183, "grad_norm": 0.27993595600128174, "learning_rate": 3.190302215694485e-06, "loss": 1.8491, "step": 3705 }, { "epoch": 0.8537695098773254, "grad_norm": 0.3101867139339447, "learning_rate": 3.1804804491822993e-06, "loss": 1.7214, "step": 3706 }, { "epoch": 0.8539998848125324, "grad_norm": 0.28268954157829285, "learning_rate": 3.1706727976553272e-06, "loss": 1.774, "step": 3707 }, { "epoch": 0.8542302597477395, "grad_norm": 0.2810278534889221, "learning_rate": 3.160879267458114e-06, "loss": 1.8015, "step": 3708 }, { "epoch": 0.8544606346829465, "grad_norm": 0.2587469816207886, "learning_rate": 3.151099864926066e-06, "loss": 1.7894, "step": 3709 }, { "epoch": 0.8546910096181536, "grad_norm": 0.2503468096256256, "learning_rate": 3.141334596385448e-06, "loss": 1.8169, "step": 3710 }, { "epoch": 0.8549213845533606, "grad_norm": 0.25982117652893066, "learning_rate": 3.1315834681533875e-06, "loss": 1.7944, "step": 3711 }, { "epoch": 0.8551517594885677, "grad_norm": 0.2716895043849945, "learning_rate": 3.1218464865378606e-06, "loss": 1.7607, "step": 3712 }, { "epoch": 0.8553821344237746, "grad_norm": 0.27227047085762024, "learning_rate": 3.1121236578376957e-06, "loss": 1.7977, "step": 3713 }, { "epoch": 0.8556125093589817, "grad_norm": 0.28527015447616577, "learning_rate": 3.1024149883425586e-06, "loss": 1.7254, "step": 3714 }, { "epoch": 0.8558428842941888, "grad_norm": 0.2592174708843231, "learning_rate": 3.0927204843329617e-06, "loss": 1.8222, "step": 3715 }, { "epoch": 0.8560732592293958, "grad_norm": 0.2768539488315582, "learning_rate": 3.0830401520802576e-06, "loss": 1.7816, "step": 3716 }, { "epoch": 0.8563036341646029, "grad_norm": 0.27707651257514954, "learning_rate": 3.0733739978466125e-06, "loss": 1.7993, "step": 3717 }, { "epoch": 0.8565340090998099, "grad_norm": 0.2688905894756317, "learning_rate": 3.0637220278850465e-06, "loss": 1.7937, "step": 3718 }, { "epoch": 0.856764384035017, "grad_norm": 0.28344276547431946, "learning_rate": 3.05408424843939e-06, "loss": 1.7501, "step": 3719 }, { "epoch": 0.856994758970224, "grad_norm": 0.2673041820526123, "learning_rate": 3.044460665744284e-06, "loss": 1.7753, "step": 3720 }, { "epoch": 0.8572251339054311, "grad_norm": 0.2654378116130829, "learning_rate": 3.0348512860252116e-06, "loss": 1.7985, "step": 3721 }, { "epoch": 0.8574555088406381, "grad_norm": 0.2655397653579712, "learning_rate": 3.025256115498451e-06, "loss": 1.7997, "step": 3722 }, { "epoch": 0.8576858837758452, "grad_norm": 0.2756088376045227, "learning_rate": 3.0156751603710816e-06, "loss": 1.789, "step": 3723 }, { "epoch": 0.8579162587110523, "grad_norm": 0.2745193541049957, "learning_rate": 3.0061084268410006e-06, "loss": 1.7822, "step": 3724 }, { "epoch": 0.8581466336462593, "grad_norm": 0.2752840518951416, "learning_rate": 2.9965559210969026e-06, "loss": 1.7434, "step": 3725 }, { "epoch": 0.8583770085814664, "grad_norm": 0.2834746837615967, "learning_rate": 2.9870176493182742e-06, "loss": 1.7402, "step": 3726 }, { "epoch": 0.8586073835166734, "grad_norm": 0.29380345344543457, "learning_rate": 2.9774936176753977e-06, "loss": 1.8109, "step": 3727 }, { "epoch": 0.8588377584518805, "grad_norm": 0.25949278473854065, "learning_rate": 2.967983832329341e-06, "loss": 1.7825, "step": 3728 }, { "epoch": 0.8590681333870875, "grad_norm": 0.2788845896720886, "learning_rate": 2.9584882994319577e-06, "loss": 1.7744, "step": 3729 }, { "epoch": 0.8592985083222945, "grad_norm": 0.2562201917171478, "learning_rate": 2.9490070251258827e-06, "loss": 1.8126, "step": 3730 }, { "epoch": 0.8595288832575015, "grad_norm": 0.2822914719581604, "learning_rate": 2.939540015544523e-06, "loss": 1.7644, "step": 3731 }, { "epoch": 0.8597592581927086, "grad_norm": 0.2840176820755005, "learning_rate": 2.930087276812063e-06, "loss": 1.7708, "step": 3732 }, { "epoch": 0.8599896331279157, "grad_norm": 0.2863113284111023, "learning_rate": 2.9206488150434475e-06, "loss": 1.7776, "step": 3733 }, { "epoch": 0.8602200080631227, "grad_norm": 0.26461535692214966, "learning_rate": 2.9112246363443953e-06, "loss": 1.7974, "step": 3734 }, { "epoch": 0.8604503829983298, "grad_norm": 0.31927552819252014, "learning_rate": 2.9018147468113795e-06, "loss": 1.8103, "step": 3735 }, { "epoch": 0.8606807579335368, "grad_norm": 0.27823638916015625, "learning_rate": 2.892419152531631e-06, "loss": 1.7472, "step": 3736 }, { "epoch": 0.8609111328687439, "grad_norm": 0.26505032181739807, "learning_rate": 2.8830378595831377e-06, "loss": 1.7886, "step": 3737 }, { "epoch": 0.8611415078039509, "grad_norm": 0.675846517086029, "learning_rate": 2.8736708740346146e-06, "loss": 1.7628, "step": 3738 }, { "epoch": 0.861371882739158, "grad_norm": 0.2897227704524994, "learning_rate": 2.8643182019455565e-06, "loss": 1.7764, "step": 3739 }, { "epoch": 0.861602257674365, "grad_norm": 0.2688276469707489, "learning_rate": 2.8549798493661794e-06, "loss": 1.77, "step": 3740 }, { "epoch": 0.8618326326095721, "grad_norm": 0.27003854513168335, "learning_rate": 2.8456558223374204e-06, "loss": 1.8263, "step": 3741 }, { "epoch": 0.8620630075447792, "grad_norm": 0.27300724387168884, "learning_rate": 2.8363461268909818e-06, "loss": 1.8301, "step": 3742 }, { "epoch": 0.8622933824799862, "grad_norm": 0.26381152868270874, "learning_rate": 2.8270507690492804e-06, "loss": 1.7964, "step": 3743 }, { "epoch": 0.8625237574151933, "grad_norm": 0.3591111898422241, "learning_rate": 2.817769754825439e-06, "loss": 1.704, "step": 3744 }, { "epoch": 0.8627541323504003, "grad_norm": 0.26048171520233154, "learning_rate": 2.808503090223344e-06, "loss": 1.7802, "step": 3745 }, { "epoch": 0.8629845072856073, "grad_norm": 0.276032418012619, "learning_rate": 2.7992507812375556e-06, "loss": 1.7823, "step": 3746 }, { "epoch": 0.8632148822208143, "grad_norm": 0.28790581226348877, "learning_rate": 2.790012833853367e-06, "loss": 1.7941, "step": 3747 }, { "epoch": 0.8634452571560214, "grad_norm": 0.2696770131587982, "learning_rate": 2.780789254046795e-06, "loss": 1.7795, "step": 3748 }, { "epoch": 0.8636756320912284, "grad_norm": 0.33539292216300964, "learning_rate": 2.7715800477845334e-06, "loss": 1.7758, "step": 3749 }, { "epoch": 0.8639060070264355, "grad_norm": 0.2705788314342499, "learning_rate": 2.7623852210239885e-06, "loss": 1.7973, "step": 3750 }, { "epoch": 0.8641363819616426, "grad_norm": 0.2746134400367737, "learning_rate": 2.7532047797132867e-06, "loss": 1.8386, "step": 3751 }, { "epoch": 0.8643667568968496, "grad_norm": 0.2932473123073578, "learning_rate": 2.7440387297912123e-06, "loss": 1.7861, "step": 3752 }, { "epoch": 0.8645971318320567, "grad_norm": 0.26591041684150696, "learning_rate": 2.734887077187262e-06, "loss": 1.7892, "step": 3753 }, { "epoch": 0.8648275067672637, "grad_norm": 0.2955741584300995, "learning_rate": 2.7257498278216135e-06, "loss": 1.7622, "step": 3754 }, { "epoch": 0.8650578817024708, "grad_norm": 0.28057587146759033, "learning_rate": 2.716626987605131e-06, "loss": 1.786, "step": 3755 }, { "epoch": 0.8652882566376778, "grad_norm": 0.2699863016605377, "learning_rate": 2.7075185624393485e-06, "loss": 1.8337, "step": 3756 }, { "epoch": 0.8655186315728849, "grad_norm": 0.27778196334838867, "learning_rate": 2.6984245582164807e-06, "loss": 1.7788, "step": 3757 }, { "epoch": 0.865749006508092, "grad_norm": 0.2700488567352295, "learning_rate": 2.6893449808194166e-06, "loss": 1.811, "step": 3758 }, { "epoch": 0.865979381443299, "grad_norm": 0.26211732625961304, "learning_rate": 2.6802798361217103e-06, "loss": 1.7999, "step": 3759 }, { "epoch": 0.8662097563785061, "grad_norm": 0.26070716977119446, "learning_rate": 2.6712291299875735e-06, "loss": 1.749, "step": 3760 }, { "epoch": 0.8664401313137131, "grad_norm": 0.2654944062232971, "learning_rate": 2.6621928682718845e-06, "loss": 1.7724, "step": 3761 }, { "epoch": 0.8666705062489202, "grad_norm": 0.2626260221004486, "learning_rate": 2.653171056820172e-06, "loss": 1.8, "step": 3762 }, { "epoch": 0.8669008811841271, "grad_norm": 0.26670190691947937, "learning_rate": 2.6441637014686273e-06, "loss": 1.7982, "step": 3763 }, { "epoch": 0.8671312561193342, "grad_norm": 0.2687362730503082, "learning_rate": 2.635170808044077e-06, "loss": 1.7983, "step": 3764 }, { "epoch": 0.8673616310545412, "grad_norm": 0.3078904449939728, "learning_rate": 2.626192382364001e-06, "loss": 1.7969, "step": 3765 }, { "epoch": 0.8675920059897483, "grad_norm": 0.2726536691188812, "learning_rate": 2.617228430236521e-06, "loss": 1.7762, "step": 3766 }, { "epoch": 0.8678223809249553, "grad_norm": 0.27011197805404663, "learning_rate": 2.6082789574603805e-06, "loss": 1.8087, "step": 3767 }, { "epoch": 0.8680527558601624, "grad_norm": 0.2601669430732727, "learning_rate": 2.599343969824977e-06, "loss": 1.7815, "step": 3768 }, { "epoch": 0.8682831307953695, "grad_norm": 0.26661884784698486, "learning_rate": 2.5904234731103342e-06, "loss": 1.8195, "step": 3769 }, { "epoch": 0.8685135057305765, "grad_norm": 0.2683071196079254, "learning_rate": 2.5815174730870804e-06, "loss": 1.7979, "step": 3770 }, { "epoch": 0.8687438806657836, "grad_norm": 0.2646176815032959, "learning_rate": 2.5726259755164877e-06, "loss": 1.7939, "step": 3771 }, { "epoch": 0.8689742556009906, "grad_norm": 0.2745078206062317, "learning_rate": 2.563748986150455e-06, "loss": 1.7723, "step": 3772 }, { "epoch": 0.8692046305361977, "grad_norm": 0.3826308250427246, "learning_rate": 2.5548865107314607e-06, "loss": 1.7369, "step": 3773 }, { "epoch": 0.8694350054714047, "grad_norm": 0.2688979208469391, "learning_rate": 2.5460385549926275e-06, "loss": 1.8232, "step": 3774 }, { "epoch": 0.8696653804066118, "grad_norm": 0.30095940828323364, "learning_rate": 2.5372051246576656e-06, "loss": 1.7963, "step": 3775 }, { "epoch": 0.8698957553418188, "grad_norm": 0.2726401090621948, "learning_rate": 2.5283862254409015e-06, "loss": 1.7337, "step": 3776 }, { "epoch": 0.8701261302770259, "grad_norm": 0.25788795948028564, "learning_rate": 2.5195818630472546e-06, "loss": 1.7449, "step": 3777 }, { "epoch": 0.870356505212233, "grad_norm": 0.2743348479270935, "learning_rate": 2.5107920431722414e-06, "loss": 1.7796, "step": 3778 }, { "epoch": 0.87058688014744, "grad_norm": 0.29600656032562256, "learning_rate": 2.5020167715019694e-06, "loss": 1.7759, "step": 3779 }, { "epoch": 0.870817255082647, "grad_norm": 0.29650238156318665, "learning_rate": 2.4932560537131417e-06, "loss": 1.776, "step": 3780 }, { "epoch": 0.871047630017854, "grad_norm": 0.2919641435146332, "learning_rate": 2.4845098954730362e-06, "loss": 1.7782, "step": 3781 }, { "epoch": 0.8712780049530611, "grad_norm": 0.2912849187850952, "learning_rate": 2.475778302439524e-06, "loss": 1.762, "step": 3782 }, { "epoch": 0.8715083798882681, "grad_norm": 0.26826345920562744, "learning_rate": 2.4670612802610405e-06, "loss": 1.7804, "step": 3783 }, { "epoch": 0.8717387548234752, "grad_norm": 0.26084408164024353, "learning_rate": 2.45835883457661e-06, "loss": 1.8047, "step": 3784 }, { "epoch": 0.8719691297586822, "grad_norm": 0.25450974702835083, "learning_rate": 2.449670971015816e-06, "loss": 1.7583, "step": 3785 }, { "epoch": 0.8721995046938893, "grad_norm": 0.28013843297958374, "learning_rate": 2.440997695198813e-06, "loss": 1.719, "step": 3786 }, { "epoch": 0.8724298796290963, "grad_norm": 0.27269431948661804, "learning_rate": 2.4323390127363177e-06, "loss": 1.7812, "step": 3787 }, { "epoch": 0.8726602545643034, "grad_norm": 0.2923828363418579, "learning_rate": 2.4236949292296085e-06, "loss": 1.7711, "step": 3788 }, { "epoch": 0.8728906294995105, "grad_norm": 0.27165845036506653, "learning_rate": 2.415065450270518e-06, "loss": 1.7637, "step": 3789 }, { "epoch": 0.8731210044347175, "grad_norm": 0.28970736265182495, "learning_rate": 2.406450581441436e-06, "loss": 1.7968, "step": 3790 }, { "epoch": 0.8733513793699246, "grad_norm": 0.2869347929954529, "learning_rate": 2.397850328315285e-06, "loss": 1.7652, "step": 3791 }, { "epoch": 0.8735817543051316, "grad_norm": 0.2807677984237671, "learning_rate": 2.3892646964555538e-06, "loss": 1.79, "step": 3792 }, { "epoch": 0.8738121292403387, "grad_norm": 0.2737308144569397, "learning_rate": 2.3806936914162663e-06, "loss": 1.7884, "step": 3793 }, { "epoch": 0.8740425041755457, "grad_norm": 0.26727408170700073, "learning_rate": 2.372137318741968e-06, "loss": 1.8075, "step": 3794 }, { "epoch": 0.8742728791107528, "grad_norm": 0.2712678611278534, "learning_rate": 2.3635955839677647e-06, "loss": 1.752, "step": 3795 }, { "epoch": 0.8745032540459599, "grad_norm": 0.27435538172721863, "learning_rate": 2.3550684926192833e-06, "loss": 1.8137, "step": 3796 }, { "epoch": 0.8747336289811668, "grad_norm": 0.2611575126647949, "learning_rate": 2.346556050212656e-06, "loss": 1.8061, "step": 3797 }, { "epoch": 0.8749640039163739, "grad_norm": 0.2753782570362091, "learning_rate": 2.3380582622545776e-06, "loss": 1.7244, "step": 3798 }, { "epoch": 0.8751943788515809, "grad_norm": 0.26329073309898376, "learning_rate": 2.329575134242232e-06, "loss": 1.8059, "step": 3799 }, { "epoch": 0.875424753786788, "grad_norm": 0.2668122947216034, "learning_rate": 2.3211066716633257e-06, "loss": 1.8038, "step": 3800 }, { "epoch": 0.875655128721995, "grad_norm": 0.2788633108139038, "learning_rate": 2.3126528799961024e-06, "loss": 1.7932, "step": 3801 }, { "epoch": 0.8758855036572021, "grad_norm": 0.27438896894454956, "learning_rate": 2.3042137647092786e-06, "loss": 1.7673, "step": 3802 }, { "epoch": 0.8761158785924091, "grad_norm": 0.27417370676994324, "learning_rate": 2.295789331262099e-06, "loss": 1.7911, "step": 3803 }, { "epoch": 0.8763462535276162, "grad_norm": 0.2674144506454468, "learning_rate": 2.2873795851043073e-06, "loss": 1.7848, "step": 3804 }, { "epoch": 0.8765766284628232, "grad_norm": 0.2691703736782074, "learning_rate": 2.278984531676143e-06, "loss": 1.8089, "step": 3805 }, { "epoch": 0.8768070033980303, "grad_norm": 0.2504115700721741, "learning_rate": 2.2706041764083447e-06, "loss": 1.7982, "step": 3806 }, { "epoch": 0.8770373783332374, "grad_norm": 0.24710707366466522, "learning_rate": 2.262238524722135e-06, "loss": 1.7943, "step": 3807 }, { "epoch": 0.8772677532684444, "grad_norm": 0.2821066379547119, "learning_rate": 2.2538875820292347e-06, "loss": 1.7735, "step": 3808 }, { "epoch": 0.8774981282036515, "grad_norm": 0.2817186117172241, "learning_rate": 2.245551353731845e-06, "loss": 1.7846, "step": 3809 }, { "epoch": 0.8777285031388585, "grad_norm": 0.2728576064109802, "learning_rate": 2.2372298452226465e-06, "loss": 1.7636, "step": 3810 }, { "epoch": 0.8779588780740656, "grad_norm": 0.2894512712955475, "learning_rate": 2.2289230618847997e-06, "loss": 1.7777, "step": 3811 }, { "epoch": 0.8781892530092726, "grad_norm": 0.2817392349243164, "learning_rate": 2.220631009091939e-06, "loss": 1.7963, "step": 3812 }, { "epoch": 0.8784196279444796, "grad_norm": 0.27395039796829224, "learning_rate": 2.212353692208172e-06, "loss": 1.8078, "step": 3813 }, { "epoch": 0.8786500028796866, "grad_norm": 0.26808038353919983, "learning_rate": 2.2040911165880723e-06, "loss": 1.8045, "step": 3814 }, { "epoch": 0.8788803778148937, "grad_norm": 0.2795944809913635, "learning_rate": 2.1958432875766653e-06, "loss": 1.7133, "step": 3815 }, { "epoch": 0.8791107527501008, "grad_norm": 0.2815767526626587, "learning_rate": 2.1876102105094636e-06, "loss": 1.7746, "step": 3816 }, { "epoch": 0.8793411276853078, "grad_norm": 0.2557094693183899, "learning_rate": 2.179391890712415e-06, "loss": 1.7823, "step": 3817 }, { "epoch": 0.8795715026205149, "grad_norm": 0.28132200241088867, "learning_rate": 2.1711883335019225e-06, "loss": 1.7652, "step": 3818 }, { "epoch": 0.8798018775557219, "grad_norm": 0.27041807770729065, "learning_rate": 2.1629995441848543e-06, "loss": 1.8018, "step": 3819 }, { "epoch": 0.880032252490929, "grad_norm": 0.2729186415672302, "learning_rate": 2.1548255280585045e-06, "loss": 1.8025, "step": 3820 }, { "epoch": 0.880262627426136, "grad_norm": 0.2578584551811218, "learning_rate": 2.1466662904106234e-06, "loss": 1.7548, "step": 3821 }, { "epoch": 0.8804930023613431, "grad_norm": 0.37890395522117615, "learning_rate": 2.138521836519408e-06, "loss": 1.7213, "step": 3822 }, { "epoch": 0.8807233772965501, "grad_norm": 0.255022794008255, "learning_rate": 2.13039217165347e-06, "loss": 1.8081, "step": 3823 }, { "epoch": 0.8809537522317572, "grad_norm": 0.2765805125236511, "learning_rate": 2.122277301071868e-06, "loss": 1.7993, "step": 3824 }, { "epoch": 0.8811841271669643, "grad_norm": 0.2774614095687866, "learning_rate": 2.1141772300241004e-06, "loss": 1.8246, "step": 3825 }, { "epoch": 0.8814145021021713, "grad_norm": 0.31486085057258606, "learning_rate": 2.1060919637500674e-06, "loss": 1.718, "step": 3826 }, { "epoch": 0.8816448770373784, "grad_norm": 0.25935253500938416, "learning_rate": 2.098021507480111e-06, "loss": 1.8038, "step": 3827 }, { "epoch": 0.8818752519725854, "grad_norm": 0.2882661819458008, "learning_rate": 2.089965866434984e-06, "loss": 1.8358, "step": 3828 }, { "epoch": 0.8821056269077925, "grad_norm": 0.26044872403144836, "learning_rate": 2.0819250458258584e-06, "loss": 1.7926, "step": 3829 }, { "epoch": 0.8823360018429994, "grad_norm": 0.2650671601295471, "learning_rate": 2.0738990508543194e-06, "loss": 1.8002, "step": 3830 }, { "epoch": 0.8825663767782065, "grad_norm": 0.2976092994213104, "learning_rate": 2.065887886712359e-06, "loss": 1.7824, "step": 3831 }, { "epoch": 0.8827967517134135, "grad_norm": 0.26627394556999207, "learning_rate": 2.0578915585823753e-06, "loss": 1.7988, "step": 3832 }, { "epoch": 0.8830271266486206, "grad_norm": 0.27762570977211, "learning_rate": 2.049910071637173e-06, "loss": 1.8132, "step": 3833 }, { "epoch": 0.8832575015838277, "grad_norm": 0.28084927797317505, "learning_rate": 2.041943431039953e-06, "loss": 1.7395, "step": 3834 }, { "epoch": 0.8834878765190347, "grad_norm": 0.25930294394493103, "learning_rate": 2.033991641944308e-06, "loss": 1.7744, "step": 3835 }, { "epoch": 0.8837182514542418, "grad_norm": 0.30411627888679504, "learning_rate": 2.026054709494235e-06, "loss": 1.7415, "step": 3836 }, { "epoch": 0.8839486263894488, "grad_norm": 0.2520469129085541, "learning_rate": 2.018132638824108e-06, "loss": 1.8098, "step": 3837 }, { "epoch": 0.8841790013246559, "grad_norm": 0.2716251611709595, "learning_rate": 2.0102254350586938e-06, "loss": 1.7826, "step": 3838 }, { "epoch": 0.8844093762598629, "grad_norm": 0.25826627016067505, "learning_rate": 2.0023331033131394e-06, "loss": 1.7952, "step": 3839 }, { "epoch": 0.88463975119507, "grad_norm": 0.2700692415237427, "learning_rate": 1.9944556486929777e-06, "loss": 1.7929, "step": 3840 }, { "epoch": 0.884870126130277, "grad_norm": 0.28802812099456787, "learning_rate": 1.9865930762940967e-06, "loss": 1.716, "step": 3841 }, { "epoch": 0.8851005010654841, "grad_norm": 0.3047880530357361, "learning_rate": 1.9787453912027854e-06, "loss": 1.7879, "step": 3842 }, { "epoch": 0.8853308760006912, "grad_norm": 0.26604163646698, "learning_rate": 1.970912598495689e-06, "loss": 1.8271, "step": 3843 }, { "epoch": 0.8855612509358982, "grad_norm": 0.2672092616558075, "learning_rate": 1.9630947032398067e-06, "loss": 1.7787, "step": 3844 }, { "epoch": 0.8857916258711053, "grad_norm": 0.2728846073150635, "learning_rate": 1.9552917104925267e-06, "loss": 1.7874, "step": 3845 }, { "epoch": 0.8860220008063123, "grad_norm": 0.2974569499492645, "learning_rate": 1.9475036253015805e-06, "loss": 1.7843, "step": 3846 }, { "epoch": 0.8862523757415193, "grad_norm": 0.2908930480480194, "learning_rate": 1.9397304527050463e-06, "loss": 1.7491, "step": 3847 }, { "epoch": 0.8864827506767263, "grad_norm": 0.30390527844429016, "learning_rate": 1.9319721977313856e-06, "loss": 1.7439, "step": 3848 }, { "epoch": 0.8867131256119334, "grad_norm": 0.27142706513404846, "learning_rate": 1.9242288653993774e-06, "loss": 1.7735, "step": 3849 }, { "epoch": 0.8869435005471404, "grad_norm": 0.260288804769516, "learning_rate": 1.9165004607181596e-06, "loss": 1.7856, "step": 3850 }, { "epoch": 0.8871738754823475, "grad_norm": 0.27041852474212646, "learning_rate": 1.9087869886872335e-06, "loss": 1.7837, "step": 3851 }, { "epoch": 0.8874042504175546, "grad_norm": 0.2740664780139923, "learning_rate": 1.901088454296404e-06, "loss": 1.7633, "step": 3852 }, { "epoch": 0.8876346253527616, "grad_norm": 0.2665861248970032, "learning_rate": 1.8934048625258328e-06, "loss": 1.795, "step": 3853 }, { "epoch": 0.8878650002879687, "grad_norm": 0.30832645297050476, "learning_rate": 1.8857362183460264e-06, "loss": 1.7246, "step": 3854 }, { "epoch": 0.8880953752231757, "grad_norm": 0.25342023372650146, "learning_rate": 1.8780825267177975e-06, "loss": 1.8321, "step": 3855 }, { "epoch": 0.8883257501583828, "grad_norm": 0.26880279183387756, "learning_rate": 1.8704437925923023e-06, "loss": 1.8199, "step": 3856 }, { "epoch": 0.8885561250935898, "grad_norm": 0.2763240337371826, "learning_rate": 1.8628200209110131e-06, "loss": 1.8129, "step": 3857 }, { "epoch": 0.8887865000287969, "grad_norm": 0.2782664895057678, "learning_rate": 1.8552112166057262e-06, "loss": 1.8028, "step": 3858 }, { "epoch": 0.889016874964004, "grad_norm": 0.2518719434738159, "learning_rate": 1.8476173845985601e-06, "loss": 1.7897, "step": 3859 }, { "epoch": 0.889247249899211, "grad_norm": 0.259295254945755, "learning_rate": 1.8400385298019378e-06, "loss": 1.7671, "step": 3860 }, { "epoch": 0.8894776248344181, "grad_norm": 0.27285706996917725, "learning_rate": 1.8324746571186046e-06, "loss": 1.7913, "step": 3861 }, { "epoch": 0.8897079997696251, "grad_norm": 0.276236891746521, "learning_rate": 1.824925771441599e-06, "loss": 1.7407, "step": 3862 }, { "epoch": 0.8899383747048322, "grad_norm": 0.2771945595741272, "learning_rate": 1.8173918776542815e-06, "loss": 1.7722, "step": 3863 }, { "epoch": 0.8901687496400391, "grad_norm": 0.285790354013443, "learning_rate": 1.8098729806303116e-06, "loss": 1.7701, "step": 3864 }, { "epoch": 0.8903991245752462, "grad_norm": 0.24917301535606384, "learning_rate": 1.8023690852336238e-06, "loss": 1.7939, "step": 3865 }, { "epoch": 0.8906294995104532, "grad_norm": 0.30395546555519104, "learning_rate": 1.7948801963184854e-06, "loss": 1.7728, "step": 3866 }, { "epoch": 0.8908598744456603, "grad_norm": 0.27691006660461426, "learning_rate": 1.7874063187294316e-06, "loss": 1.7661, "step": 3867 }, { "epoch": 0.8910902493808673, "grad_norm": 0.2707049250602722, "learning_rate": 1.779947457301284e-06, "loss": 1.7634, "step": 3868 }, { "epoch": 0.8913206243160744, "grad_norm": 0.26535773277282715, "learning_rate": 1.7725036168591751e-06, "loss": 1.7818, "step": 3869 }, { "epoch": 0.8915509992512815, "grad_norm": 0.27395084500312805, "learning_rate": 1.7650748022184914e-06, "loss": 1.8261, "step": 3870 }, { "epoch": 0.8917813741864885, "grad_norm": 0.25701987743377686, "learning_rate": 1.7576610181849113e-06, "loss": 1.7864, "step": 3871 }, { "epoch": 0.8920117491216956, "grad_norm": 0.2742486894130707, "learning_rate": 1.7502622695544036e-06, "loss": 1.7675, "step": 3872 }, { "epoch": 0.8922421240569026, "grad_norm": 0.26668581366539, "learning_rate": 1.7428785611131843e-06, "loss": 1.7789, "step": 3873 }, { "epoch": 0.8924724989921097, "grad_norm": 0.2592695653438568, "learning_rate": 1.7355098976377575e-06, "loss": 1.7851, "step": 3874 }, { "epoch": 0.8927028739273167, "grad_norm": 0.26256364583969116, "learning_rate": 1.7281562838948966e-06, "loss": 1.737, "step": 3875 }, { "epoch": 0.8929332488625238, "grad_norm": 0.2503250539302826, "learning_rate": 1.720817724641624e-06, "loss": 1.7854, "step": 3876 }, { "epoch": 0.8931636237977308, "grad_norm": 0.293197900056839, "learning_rate": 1.7134942246252395e-06, "loss": 1.7775, "step": 3877 }, { "epoch": 0.8933939987329379, "grad_norm": 0.29438236355781555, "learning_rate": 1.7061857885832893e-06, "loss": 1.7398, "step": 3878 }, { "epoch": 0.893624373668145, "grad_norm": 0.2593371570110321, "learning_rate": 1.6988924212435864e-06, "loss": 1.7831, "step": 3879 }, { "epoch": 0.8938547486033519, "grad_norm": 0.3070215880870819, "learning_rate": 1.6916141273241842e-06, "loss": 1.7614, "step": 3880 }, { "epoch": 0.894085123538559, "grad_norm": 0.2516513168811798, "learning_rate": 1.6843509115333917e-06, "loss": 1.7994, "step": 3881 }, { "epoch": 0.894315498473766, "grad_norm": 0.26867976784706116, "learning_rate": 1.6771027785697613e-06, "loss": 1.7738, "step": 3882 }, { "epoch": 0.8945458734089731, "grad_norm": 0.254788875579834, "learning_rate": 1.6698697331220925e-06, "loss": 1.7582, "step": 3883 }, { "epoch": 0.8947762483441801, "grad_norm": 0.26115870475769043, "learning_rate": 1.662651779869423e-06, "loss": 1.7839, "step": 3884 }, { "epoch": 0.8950066232793872, "grad_norm": 0.2859269380569458, "learning_rate": 1.6554489234810205e-06, "loss": 1.7177, "step": 3885 }, { "epoch": 0.8952369982145942, "grad_norm": 0.26296180486679077, "learning_rate": 1.6482611686163968e-06, "loss": 1.7466, "step": 3886 }, { "epoch": 0.8954673731498013, "grad_norm": 0.26896125078201294, "learning_rate": 1.6410885199252908e-06, "loss": 1.786, "step": 3887 }, { "epoch": 0.8956977480850083, "grad_norm": 0.2848125100135803, "learning_rate": 1.6339309820476656e-06, "loss": 1.7999, "step": 3888 }, { "epoch": 0.8959281230202154, "grad_norm": 0.25968894362449646, "learning_rate": 1.6267885596137121e-06, "loss": 1.8394, "step": 3889 }, { "epoch": 0.8961584979554225, "grad_norm": 0.2736717462539673, "learning_rate": 1.6196612572438429e-06, "loss": 1.7615, "step": 3890 }, { "epoch": 0.8963888728906295, "grad_norm": 0.2750624418258667, "learning_rate": 1.6125490795486947e-06, "loss": 1.7861, "step": 3891 }, { "epoch": 0.8966192478258366, "grad_norm": 0.29621168971061707, "learning_rate": 1.6054520311291093e-06, "loss": 1.8388, "step": 3892 }, { "epoch": 0.8968496227610436, "grad_norm": 0.27218177914619446, "learning_rate": 1.5983701165761533e-06, "loss": 1.7778, "step": 3893 }, { "epoch": 0.8970799976962507, "grad_norm": 0.2608799934387207, "learning_rate": 1.591303340471084e-06, "loss": 1.8328, "step": 3894 }, { "epoch": 0.8973103726314577, "grad_norm": 0.25736257433891296, "learning_rate": 1.5842517073853952e-06, "loss": 1.7641, "step": 3895 }, { "epoch": 0.8975407475666648, "grad_norm": 0.27398717403411865, "learning_rate": 1.5772152218807624e-06, "loss": 1.7713, "step": 3896 }, { "epoch": 0.8977711225018717, "grad_norm": 0.2896186113357544, "learning_rate": 1.5701938885090584e-06, "loss": 1.7196, "step": 3897 }, { "epoch": 0.8980014974370788, "grad_norm": 0.2713882029056549, "learning_rate": 1.5631877118123805e-06, "loss": 1.7641, "step": 3898 }, { "epoch": 0.8982318723722859, "grad_norm": 0.2665533125400543, "learning_rate": 1.5561966963229924e-06, "loss": 1.8266, "step": 3899 }, { "epoch": 0.8984622473074929, "grad_norm": 0.24209703505039215, "learning_rate": 1.5492208465633594e-06, "loss": 1.8274, "step": 3900 }, { "epoch": 0.8986926222427, "grad_norm": 0.29142555594444275, "learning_rate": 1.5422601670461555e-06, "loss": 1.7461, "step": 3901 }, { "epoch": 0.898922997177907, "grad_norm": 0.2587916851043701, "learning_rate": 1.5353146622742093e-06, "loss": 1.7799, "step": 3902 }, { "epoch": 0.8991533721131141, "grad_norm": 0.2663230895996094, "learning_rate": 1.5283843367405516e-06, "loss": 1.8116, "step": 3903 }, { "epoch": 0.8993837470483211, "grad_norm": 0.3005293309688568, "learning_rate": 1.521469194928396e-06, "loss": 1.7599, "step": 3904 }, { "epoch": 0.8996141219835282, "grad_norm": 0.2518351972103119, "learning_rate": 1.5145692413111228e-06, "loss": 1.7644, "step": 3905 }, { "epoch": 0.8998444969187352, "grad_norm": 0.2855406105518341, "learning_rate": 1.5076844803522922e-06, "loss": 1.7755, "step": 3906 }, { "epoch": 0.9000748718539423, "grad_norm": 0.25400876998901367, "learning_rate": 1.5008149165056384e-06, "loss": 1.8208, "step": 3907 }, { "epoch": 0.9003052467891494, "grad_norm": 0.25932374596595764, "learning_rate": 1.4939605542150598e-06, "loss": 1.7809, "step": 3908 }, { "epoch": 0.9005356217243564, "grad_norm": 0.27212783694267273, "learning_rate": 1.4871213979146286e-06, "loss": 1.8075, "step": 3909 }, { "epoch": 0.9007659966595635, "grad_norm": 0.2517545521259308, "learning_rate": 1.4802974520285729e-06, "loss": 1.7772, "step": 3910 }, { "epoch": 0.9009963715947705, "grad_norm": 0.27263322472572327, "learning_rate": 1.4734887209712833e-06, "loss": 1.8008, "step": 3911 }, { "epoch": 0.9012267465299776, "grad_norm": 0.3099213242530823, "learning_rate": 1.466695209147312e-06, "loss": 1.7514, "step": 3912 }, { "epoch": 0.9014571214651846, "grad_norm": 0.27662307024002075, "learning_rate": 1.4599169209513569e-06, "loss": 1.7811, "step": 3913 }, { "epoch": 0.9016874964003916, "grad_norm": 0.2890753746032715, "learning_rate": 1.4531538607682805e-06, "loss": 1.8195, "step": 3914 }, { "epoch": 0.9019178713355986, "grad_norm": 0.2705698013305664, "learning_rate": 1.4464060329730773e-06, "loss": 1.7865, "step": 3915 }, { "epoch": 0.9021482462708057, "grad_norm": 0.26382890343666077, "learning_rate": 1.4396734419309054e-06, "loss": 1.7732, "step": 3916 }, { "epoch": 0.9023786212060128, "grad_norm": 0.2738267481327057, "learning_rate": 1.4329560919970647e-06, "loss": 1.7154, "step": 3917 }, { "epoch": 0.9026089961412198, "grad_norm": 0.2764747738838196, "learning_rate": 1.4262539875169705e-06, "loss": 1.8204, "step": 3918 }, { "epoch": 0.9028393710764269, "grad_norm": 0.25572848320007324, "learning_rate": 1.4195671328262134e-06, "loss": 1.8224, "step": 3919 }, { "epoch": 0.9030697460116339, "grad_norm": 0.29132264852523804, "learning_rate": 1.4128955322504966e-06, "loss": 1.7904, "step": 3920 }, { "epoch": 0.903300120946841, "grad_norm": 0.2532143294811249, "learning_rate": 1.4062391901056515e-06, "loss": 1.7567, "step": 3921 }, { "epoch": 0.903530495882048, "grad_norm": 0.2658844292163849, "learning_rate": 1.3995981106976585e-06, "loss": 1.7654, "step": 3922 }, { "epoch": 0.9037608708172551, "grad_norm": 0.24553987383842468, "learning_rate": 1.3929722983226034e-06, "loss": 1.8222, "step": 3923 }, { "epoch": 0.9039912457524621, "grad_norm": 0.29920879006385803, "learning_rate": 1.3863617572667076e-06, "loss": 1.8063, "step": 3924 }, { "epoch": 0.9042216206876692, "grad_norm": 0.283247709274292, "learning_rate": 1.379766491806317e-06, "loss": 1.7896, "step": 3925 }, { "epoch": 0.9044519956228763, "grad_norm": 0.25029557943344116, "learning_rate": 1.3731865062078852e-06, "loss": 1.8099, "step": 3926 }, { "epoch": 0.9046823705580833, "grad_norm": 0.27140897512435913, "learning_rate": 1.3666218047279855e-06, "loss": 1.7798, "step": 3927 }, { "epoch": 0.9049127454932904, "grad_norm": 0.2641785442829132, "learning_rate": 1.3600723916133067e-06, "loss": 1.8221, "step": 3928 }, { "epoch": 0.9051431204284974, "grad_norm": 0.2673130929470062, "learning_rate": 1.3535382711006432e-06, "loss": 1.7445, "step": 3929 }, { "epoch": 0.9053734953637045, "grad_norm": 0.2668730914592743, "learning_rate": 1.347019447416903e-06, "loss": 1.7777, "step": 3930 }, { "epoch": 0.9056038702989114, "grad_norm": 0.2747487723827362, "learning_rate": 1.3405159247790927e-06, "loss": 1.764, "step": 3931 }, { "epoch": 0.9058342452341185, "grad_norm": 0.26837095618247986, "learning_rate": 1.3340277073943198e-06, "loss": 1.825, "step": 3932 }, { "epoch": 0.9060646201693255, "grad_norm": 0.26072853803634644, "learning_rate": 1.3275547994597987e-06, "loss": 1.7455, "step": 3933 }, { "epoch": 0.9062949951045326, "grad_norm": 0.2761501371860504, "learning_rate": 1.3210972051628328e-06, "loss": 1.7818, "step": 3934 }, { "epoch": 0.9065253700397397, "grad_norm": 0.29065755009651184, "learning_rate": 1.3146549286808195e-06, "loss": 1.7858, "step": 3935 }, { "epoch": 0.9067557449749467, "grad_norm": 0.24759842455387115, "learning_rate": 1.3082279741812503e-06, "loss": 1.8059, "step": 3936 }, { "epoch": 0.9069861199101538, "grad_norm": 0.27994436025619507, "learning_rate": 1.3018163458217076e-06, "loss": 1.7856, "step": 3937 }, { "epoch": 0.9072164948453608, "grad_norm": 0.34467750787734985, "learning_rate": 1.2954200477498485e-06, "loss": 1.7452, "step": 3938 }, { "epoch": 0.9074468697805679, "grad_norm": 0.2727641761302948, "learning_rate": 1.2890390841034271e-06, "loss": 1.8372, "step": 3939 }, { "epoch": 0.9076772447157749, "grad_norm": 0.27080485224723816, "learning_rate": 1.2826734590102669e-06, "loss": 1.744, "step": 3940 }, { "epoch": 0.907907619650982, "grad_norm": 0.24842745065689087, "learning_rate": 1.2763231765882732e-06, "loss": 1.7991, "step": 3941 }, { "epoch": 0.908137994586189, "grad_norm": 0.25513511896133423, "learning_rate": 1.2699882409454273e-06, "loss": 1.7908, "step": 3942 }, { "epoch": 0.9083683695213961, "grad_norm": 0.2564167082309723, "learning_rate": 1.2636686561797811e-06, "loss": 1.811, "step": 3943 }, { "epoch": 0.9085987444566032, "grad_norm": 0.27313128113746643, "learning_rate": 1.2573644263794483e-06, "loss": 1.7723, "step": 3944 }, { "epoch": 0.9088291193918102, "grad_norm": 0.267307311296463, "learning_rate": 1.2510755556226272e-06, "loss": 1.7674, "step": 3945 }, { "epoch": 0.9090594943270173, "grad_norm": 0.28576526045799255, "learning_rate": 1.2448020479775697e-06, "loss": 1.7349, "step": 3946 }, { "epoch": 0.9092898692622242, "grad_norm": 0.2790226638317108, "learning_rate": 1.238543907502579e-06, "loss": 1.7616, "step": 3947 }, { "epoch": 0.9095202441974313, "grad_norm": 0.27057838439941406, "learning_rate": 1.232301138246042e-06, "loss": 1.8331, "step": 3948 }, { "epoch": 0.9097506191326383, "grad_norm": 0.26730525493621826, "learning_rate": 1.2260737442463865e-06, "loss": 1.7947, "step": 3949 }, { "epoch": 0.9099809940678454, "grad_norm": 0.2706122100353241, "learning_rate": 1.2198617295320846e-06, "loss": 1.7969, "step": 3950 }, { "epoch": 0.9102113690030524, "grad_norm": 0.25715309381484985, "learning_rate": 1.2136650981216858e-06, "loss": 1.745, "step": 3951 }, { "epoch": 0.9104417439382595, "grad_norm": 0.29543158411979675, "learning_rate": 1.2074838540237637e-06, "loss": 1.776, "step": 3952 }, { "epoch": 0.9106721188734666, "grad_norm": 0.27258893847465515, "learning_rate": 1.2013180012369452e-06, "loss": 1.7985, "step": 3953 }, { "epoch": 0.9109024938086736, "grad_norm": 0.27281635999679565, "learning_rate": 1.1951675437499144e-06, "loss": 1.7918, "step": 3954 }, { "epoch": 0.9111328687438807, "grad_norm": 0.2651784121990204, "learning_rate": 1.1890324855413777e-06, "loss": 1.7691, "step": 3955 }, { "epoch": 0.9113632436790877, "grad_norm": 0.26063773036003113, "learning_rate": 1.182912830580085e-06, "loss": 1.8037, "step": 3956 }, { "epoch": 0.9115936186142948, "grad_norm": 0.2580692768096924, "learning_rate": 1.176808582824826e-06, "loss": 1.7722, "step": 3957 }, { "epoch": 0.9118239935495018, "grad_norm": 0.2701328992843628, "learning_rate": 1.1707197462244247e-06, "loss": 1.7702, "step": 3958 }, { "epoch": 0.9120543684847089, "grad_norm": 0.24889741837978363, "learning_rate": 1.1646463247177246e-06, "loss": 1.7972, "step": 3959 }, { "epoch": 0.9122847434199159, "grad_norm": 0.28420981764793396, "learning_rate": 1.1585883222336113e-06, "loss": 1.7638, "step": 3960 }, { "epoch": 0.912515118355123, "grad_norm": 0.26155611872673035, "learning_rate": 1.1525457426909859e-06, "loss": 1.7998, "step": 3961 }, { "epoch": 0.9127454932903301, "grad_norm": 0.25613927841186523, "learning_rate": 1.1465185899987797e-06, "loss": 1.8041, "step": 3962 }, { "epoch": 0.9129758682255371, "grad_norm": 0.29860004782676697, "learning_rate": 1.140506868055935e-06, "loss": 1.7761, "step": 3963 }, { "epoch": 0.9132062431607441, "grad_norm": 0.26331257820129395, "learning_rate": 1.1345105807514272e-06, "loss": 1.7943, "step": 3964 }, { "epoch": 0.9134366180959511, "grad_norm": 0.2937811017036438, "learning_rate": 1.128529731964223e-06, "loss": 1.8144, "step": 3965 }, { "epoch": 0.9136669930311582, "grad_norm": 0.35109883546829224, "learning_rate": 1.1225643255633272e-06, "loss": 1.7634, "step": 3966 }, { "epoch": 0.9138973679663652, "grad_norm": 0.24389058351516724, "learning_rate": 1.1166143654077476e-06, "loss": 1.7978, "step": 3967 }, { "epoch": 0.9141277429015723, "grad_norm": 0.2650395333766937, "learning_rate": 1.1106798553464804e-06, "loss": 1.8181, "step": 3968 }, { "epoch": 0.9143581178367793, "grad_norm": 0.26436546444892883, "learning_rate": 1.10476079921856e-06, "loss": 1.7775, "step": 3969 }, { "epoch": 0.9145884927719864, "grad_norm": 0.28420311212539673, "learning_rate": 1.0988572008530013e-06, "loss": 1.7455, "step": 3970 }, { "epoch": 0.9148188677071935, "grad_norm": 0.3125991225242615, "learning_rate": 1.0929690640688218e-06, "loss": 1.7977, "step": 3971 }, { "epoch": 0.9150492426424005, "grad_norm": 0.28667598962783813, "learning_rate": 1.0870963926750467e-06, "loss": 1.7641, "step": 3972 }, { "epoch": 0.9152796175776076, "grad_norm": 0.26659560203552246, "learning_rate": 1.0812391904706841e-06, "loss": 1.7394, "step": 3973 }, { "epoch": 0.9155099925128146, "grad_norm": 0.28700965642929077, "learning_rate": 1.07539746124474e-06, "loss": 1.8042, "step": 3974 }, { "epoch": 0.9157403674480217, "grad_norm": 0.261210560798645, "learning_rate": 1.0695712087762238e-06, "loss": 1.7921, "step": 3975 }, { "epoch": 0.9159707423832287, "grad_norm": 0.2860430181026459, "learning_rate": 1.063760436834113e-06, "loss": 1.7912, "step": 3976 }, { "epoch": 0.9162011173184358, "grad_norm": 0.2522420287132263, "learning_rate": 1.0579651491773757e-06, "loss": 1.7704, "step": 3977 }, { "epoch": 0.9164314922536428, "grad_norm": 0.330257385969162, "learning_rate": 1.0521853495549793e-06, "loss": 1.7537, "step": 3978 }, { "epoch": 0.9166618671888499, "grad_norm": 0.2674270570278168, "learning_rate": 1.046421041705853e-06, "loss": 1.7757, "step": 3979 }, { "epoch": 0.916892242124057, "grad_norm": 0.27780234813690186, "learning_rate": 1.0406722293589078e-06, "loss": 1.7768, "step": 3980 }, { "epoch": 0.9171226170592639, "grad_norm": 0.25443747639656067, "learning_rate": 1.0349389162330359e-06, "loss": 1.7942, "step": 3981 }, { "epoch": 0.917352991994471, "grad_norm": 0.25874295830726624, "learning_rate": 1.0292211060371065e-06, "loss": 1.7929, "step": 3982 }, { "epoch": 0.917583366929678, "grad_norm": 0.3141564130783081, "learning_rate": 1.023518802469947e-06, "loss": 1.7456, "step": 3983 }, { "epoch": 0.9178137418648851, "grad_norm": 0.27653777599334717, "learning_rate": 1.017832009220368e-06, "loss": 1.7541, "step": 3984 }, { "epoch": 0.9180441168000921, "grad_norm": 0.2609631419181824, "learning_rate": 1.0121607299671375e-06, "loss": 1.7711, "step": 3985 }, { "epoch": 0.9182744917352992, "grad_norm": 0.28042924404144287, "learning_rate": 1.006504968378988e-06, "loss": 1.7731, "step": 3986 }, { "epoch": 0.9185048666705062, "grad_norm": 0.28055423498153687, "learning_rate": 1.00086472811462e-06, "loss": 1.7581, "step": 3987 }, { "epoch": 0.9187352416057133, "grad_norm": 0.2772543728351593, "learning_rate": 9.952400128226847e-07, "loss": 1.8231, "step": 3988 }, { "epoch": 0.9189656165409203, "grad_norm": 0.24740131199359894, "learning_rate": 9.896308261417936e-07, "loss": 1.8077, "step": 3989 }, { "epoch": 0.9191959914761274, "grad_norm": 0.2930602431297302, "learning_rate": 9.840371717005192e-07, "loss": 1.7602, "step": 3990 }, { "epoch": 0.9194263664113345, "grad_norm": 0.27945637702941895, "learning_rate": 9.784590531173753e-07, "loss": 1.8075, "step": 3991 }, { "epoch": 0.9196567413465415, "grad_norm": 0.27651387453079224, "learning_rate": 9.728964740008339e-07, "loss": 1.7791, "step": 3992 }, { "epoch": 0.9198871162817486, "grad_norm": 0.2695119082927704, "learning_rate": 9.673494379493136e-07, "loss": 1.7707, "step": 3993 }, { "epoch": 0.9201174912169556, "grad_norm": 0.2823938727378845, "learning_rate": 9.618179485511691e-07, "loss": 1.7821, "step": 3994 }, { "epoch": 0.9203478661521627, "grad_norm": 0.2890845239162445, "learning_rate": 9.563020093847103e-07, "loss": 1.7704, "step": 3995 }, { "epoch": 0.9205782410873697, "grad_norm": 0.27773234248161316, "learning_rate": 9.508016240181855e-07, "loss": 1.7762, "step": 3996 }, { "epoch": 0.9208086160225768, "grad_norm": 0.2567492723464966, "learning_rate": 9.453167960097709e-07, "loss": 1.781, "step": 3997 }, { "epoch": 0.9210389909577837, "grad_norm": 0.2661401927471161, "learning_rate": 9.398475289075892e-07, "loss": 1.7502, "step": 3998 }, { "epoch": 0.9212693658929908, "grad_norm": 0.31109127402305603, "learning_rate": 9.343938262496993e-07, "loss": 1.8319, "step": 3999 }, { "epoch": 0.9214997408281979, "grad_norm": 0.2655329704284668, "learning_rate": 9.289556915640762e-07, "loss": 1.8167, "step": 4000 }, { "epoch": 0.9217301157634049, "grad_norm": 0.34153833985328674, "learning_rate": 9.23533128368645e-07, "loss": 1.6696, "step": 4001 }, { "epoch": 0.921960490698612, "grad_norm": 0.2522549331188202, "learning_rate": 9.181261401712354e-07, "loss": 1.7807, "step": 4002 }, { "epoch": 0.922190865633819, "grad_norm": 0.28966426849365234, "learning_rate": 9.127347304696138e-07, "loss": 1.8573, "step": 4003 }, { "epoch": 0.9224212405690261, "grad_norm": 0.2820706069469452, "learning_rate": 9.073589027514789e-07, "loss": 1.7949, "step": 4004 }, { "epoch": 0.9226516155042331, "grad_norm": 0.2533314824104309, "learning_rate": 9.019986604944298e-07, "loss": 1.8164, "step": 4005 }, { "epoch": 0.9228819904394402, "grad_norm": 0.32943156361579895, "learning_rate": 8.966540071659867e-07, "loss": 1.727, "step": 4006 }, { "epoch": 0.9231123653746472, "grad_norm": 0.2778649628162384, "learning_rate": 8.913249462236068e-07, "loss": 1.7662, "step": 4007 }, { "epoch": 0.9233427403098543, "grad_norm": 0.2639056444168091, "learning_rate": 8.860114811146292e-07, "loss": 1.7936, "step": 4008 }, { "epoch": 0.9235731152450614, "grad_norm": 0.2539048194885254, "learning_rate": 8.807136152763268e-07, "loss": 1.8121, "step": 4009 }, { "epoch": 0.9238034901802684, "grad_norm": 0.2639763057231903, "learning_rate": 8.754313521358692e-07, "loss": 1.7775, "step": 4010 }, { "epoch": 0.9240338651154755, "grad_norm": 0.28106924891471863, "learning_rate": 8.701646951103425e-07, "loss": 1.7632, "step": 4011 }, { "epoch": 0.9242642400506825, "grad_norm": 0.2800048291683197, "learning_rate": 8.649136476067298e-07, "loss": 1.7534, "step": 4012 }, { "epoch": 0.9244946149858896, "grad_norm": 0.2605953812599182, "learning_rate": 8.596782130219194e-07, "loss": 1.7849, "step": 4013 }, { "epoch": 0.9247249899210965, "grad_norm": 0.2946617007255554, "learning_rate": 8.544583947426993e-07, "loss": 1.7889, "step": 4014 }, { "epoch": 0.9249553648563036, "grad_norm": 0.2641245126724243, "learning_rate": 8.492541961457518e-07, "loss": 1.7787, "step": 4015 }, { "epoch": 0.9251857397915106, "grad_norm": 0.2811609208583832, "learning_rate": 8.440656205976643e-07, "loss": 1.7945, "step": 4016 }, { "epoch": 0.9254161147267177, "grad_norm": 0.26553311944007874, "learning_rate": 8.388926714549106e-07, "loss": 1.7939, "step": 4017 }, { "epoch": 0.9256464896619248, "grad_norm": 0.26104044914245605, "learning_rate": 8.337353520638469e-07, "loss": 1.7894, "step": 4018 }, { "epoch": 0.9258768645971318, "grad_norm": 0.2633711099624634, "learning_rate": 8.285936657607407e-07, "loss": 1.7616, "step": 4019 }, { "epoch": 0.9261072395323389, "grad_norm": 0.27334073185920715, "learning_rate": 8.234676158717313e-07, "loss": 1.7898, "step": 4020 }, { "epoch": 0.9263376144675459, "grad_norm": 0.27447205781936646, "learning_rate": 8.183572057128386e-07, "loss": 1.7467, "step": 4021 }, { "epoch": 0.926567989402753, "grad_norm": 0.3964620530605316, "learning_rate": 8.132624385899818e-07, "loss": 1.7796, "step": 4022 }, { "epoch": 0.92679836433796, "grad_norm": 0.29093995690345764, "learning_rate": 8.081833177989468e-07, "loss": 1.7158, "step": 4023 }, { "epoch": 0.9270287392731671, "grad_norm": 0.29166367650032043, "learning_rate": 8.031198466253998e-07, "loss": 1.7889, "step": 4024 }, { "epoch": 0.9272591142083741, "grad_norm": 0.2701321244239807, "learning_rate": 7.980720283448956e-07, "loss": 1.7593, "step": 4025 }, { "epoch": 0.9274894891435812, "grad_norm": 0.25076669454574585, "learning_rate": 7.930398662228416e-07, "loss": 1.7722, "step": 4026 }, { "epoch": 0.9277198640787883, "grad_norm": 0.26692426204681396, "learning_rate": 7.88023363514534e-07, "loss": 1.7562, "step": 4027 }, { "epoch": 0.9279502390139953, "grad_norm": 0.258328378200531, "learning_rate": 7.830225234651407e-07, "loss": 1.7793, "step": 4028 }, { "epoch": 0.9281806139492024, "grad_norm": 0.2835284471511841, "learning_rate": 7.780373493096793e-07, "loss": 1.7423, "step": 4029 }, { "epoch": 0.9284109888844094, "grad_norm": 0.246015265583992, "learning_rate": 7.730678442730538e-07, "loss": 1.8016, "step": 4030 }, { "epoch": 0.9286413638196164, "grad_norm": 0.2715925872325897, "learning_rate": 7.681140115700175e-07, "loss": 1.7626, "step": 4031 }, { "epoch": 0.9288717387548234, "grad_norm": 0.2513832449913025, "learning_rate": 7.631758544051932e-07, "loss": 1.758, "step": 4032 }, { "epoch": 0.9291021136900305, "grad_norm": 0.355074942111969, "learning_rate": 7.582533759730587e-07, "loss": 1.8394, "step": 4033 }, { "epoch": 0.9293324886252375, "grad_norm": 0.2864769995212555, "learning_rate": 7.533465794579558e-07, "loss": 1.7765, "step": 4034 }, { "epoch": 0.9295628635604446, "grad_norm": 0.2586212158203125, "learning_rate": 7.484554680340733e-07, "loss": 1.7309, "step": 4035 }, { "epoch": 0.9297932384956517, "grad_norm": 0.2906990051269531, "learning_rate": 7.435800448654578e-07, "loss": 1.7473, "step": 4036 }, { "epoch": 0.9300236134308587, "grad_norm": 0.27271151542663574, "learning_rate": 7.387203131060088e-07, "loss": 1.7433, "step": 4037 }, { "epoch": 0.9302539883660658, "grad_norm": 0.2630670666694641, "learning_rate": 7.3387627589947e-07, "loss": 1.7725, "step": 4038 }, { "epoch": 0.9304843633012728, "grad_norm": 0.265766441822052, "learning_rate": 7.290479363794373e-07, "loss": 1.7639, "step": 4039 }, { "epoch": 0.9307147382364799, "grad_norm": 0.27349889278411865, "learning_rate": 7.242352976693484e-07, "loss": 1.778, "step": 4040 }, { "epoch": 0.9309451131716869, "grad_norm": 0.26821669936180115, "learning_rate": 7.194383628824853e-07, "loss": 1.7707, "step": 4041 }, { "epoch": 0.931175488106894, "grad_norm": 0.25653257966041565, "learning_rate": 7.146571351219766e-07, "loss": 1.8079, "step": 4042 }, { "epoch": 0.931405863042101, "grad_norm": 0.2513926029205322, "learning_rate": 7.098916174807763e-07, "loss": 1.7946, "step": 4043 }, { "epoch": 0.9316362379773081, "grad_norm": 0.2680138647556305, "learning_rate": 7.051418130416932e-07, "loss": 1.8038, "step": 4044 }, { "epoch": 0.9318666129125152, "grad_norm": 0.3470703363418579, "learning_rate": 7.004077248773555e-07, "loss": 1.7223, "step": 4045 }, { "epoch": 0.9320969878477222, "grad_norm": 0.2748626172542572, "learning_rate": 6.956893560502359e-07, "loss": 1.8159, "step": 4046 }, { "epoch": 0.9323273627829293, "grad_norm": 0.26987892389297485, "learning_rate": 6.909867096126288e-07, "loss": 1.8059, "step": 4047 }, { "epoch": 0.9325577377181362, "grad_norm": 0.27074915170669556, "learning_rate": 6.862997886066674e-07, "loss": 1.7394, "step": 4048 }, { "epoch": 0.9327881126533433, "grad_norm": 0.26091498136520386, "learning_rate": 6.816285960643071e-07, "loss": 1.7471, "step": 4049 }, { "epoch": 0.9330184875885503, "grad_norm": 0.28246942162513733, "learning_rate": 6.769731350073249e-07, "loss": 1.7712, "step": 4050 }, { "epoch": 0.9332488625237574, "grad_norm": 0.2634035348892212, "learning_rate": 6.723334084473315e-07, "loss": 1.7966, "step": 4051 }, { "epoch": 0.9334792374589644, "grad_norm": 0.26190853118896484, "learning_rate": 6.677094193857508e-07, "loss": 1.7689, "step": 4052 }, { "epoch": 0.9337096123941715, "grad_norm": 0.2692461907863617, "learning_rate": 6.631011708138207e-07, "loss": 1.7855, "step": 4053 }, { "epoch": 0.9339399873293786, "grad_norm": 0.2670055329799652, "learning_rate": 6.585086657126177e-07, "loss": 1.7503, "step": 4054 }, { "epoch": 0.9341703622645856, "grad_norm": 0.2876185476779938, "learning_rate": 6.539319070530098e-07, "loss": 1.766, "step": 4055 }, { "epoch": 0.9344007371997927, "grad_norm": 0.27548885345458984, "learning_rate": 6.493708977956897e-07, "loss": 1.8219, "step": 4056 }, { "epoch": 0.9346311121349997, "grad_norm": 0.25299859046936035, "learning_rate": 6.448256408911724e-07, "loss": 1.7959, "step": 4057 }, { "epoch": 0.9348614870702068, "grad_norm": 0.28717634081840515, "learning_rate": 6.402961392797557e-07, "loss": 1.7986, "step": 4058 }, { "epoch": 0.9350918620054138, "grad_norm": 0.2652818560600281, "learning_rate": 6.357823958915737e-07, "loss": 1.7867, "step": 4059 }, { "epoch": 0.9353222369406209, "grad_norm": 0.2523903250694275, "learning_rate": 6.312844136465463e-07, "loss": 1.8083, "step": 4060 }, { "epoch": 0.9355526118758279, "grad_norm": 0.2569161057472229, "learning_rate": 6.268021954544096e-07, "loss": 1.8053, "step": 4061 }, { "epoch": 0.935782986811035, "grad_norm": 0.26295170187950134, "learning_rate": 6.223357442146971e-07, "loss": 1.7504, "step": 4062 }, { "epoch": 0.9360133617462421, "grad_norm": 0.2720282971858978, "learning_rate": 6.178850628167448e-07, "loss": 1.7962, "step": 4063 }, { "epoch": 0.9362437366814491, "grad_norm": 0.258103609085083, "learning_rate": 6.134501541396831e-07, "loss": 1.7726, "step": 4064 }, { "epoch": 0.9364741116166561, "grad_norm": 0.2552703022956848, "learning_rate": 6.090310210524419e-07, "loss": 1.7944, "step": 4065 }, { "epoch": 0.9367044865518631, "grad_norm": 0.2814948558807373, "learning_rate": 6.046276664137485e-07, "loss": 1.7647, "step": 4066 }, { "epoch": 0.9369348614870702, "grad_norm": 0.2651875913143158, "learning_rate": 6.002400930721186e-07, "loss": 1.7617, "step": 4067 }, { "epoch": 0.9371652364222772, "grad_norm": 0.26740822196006775, "learning_rate": 5.958683038658597e-07, "loss": 1.7774, "step": 4068 }, { "epoch": 0.9373956113574843, "grad_norm": 0.2817474603652954, "learning_rate": 5.915123016230706e-07, "loss": 1.7014, "step": 4069 }, { "epoch": 0.9376259862926913, "grad_norm": 0.2412758767604828, "learning_rate": 5.871720891616444e-07, "loss": 1.8015, "step": 4070 }, { "epoch": 0.9378563612278984, "grad_norm": 0.26886120438575745, "learning_rate": 5.828476692892354e-07, "loss": 1.787, "step": 4071 }, { "epoch": 0.9380867361631055, "grad_norm": 0.25826796889305115, "learning_rate": 5.78539044803314e-07, "loss": 1.8269, "step": 4072 }, { "epoch": 0.9383171110983125, "grad_norm": 0.2605254352092743, "learning_rate": 5.742462184911146e-07, "loss": 1.7782, "step": 4073 }, { "epoch": 0.9385474860335196, "grad_norm": 0.3238794207572937, "learning_rate": 5.699691931296463e-07, "loss": 1.7758, "step": 4074 }, { "epoch": 0.9387778609687266, "grad_norm": 0.27136799693107605, "learning_rate": 5.657079714857128e-07, "loss": 1.7613, "step": 4075 }, { "epoch": 0.9390082359039337, "grad_norm": 0.3310713768005371, "learning_rate": 5.61462556315881e-07, "loss": 1.7246, "step": 4076 }, { "epoch": 0.9392386108391407, "grad_norm": 0.2894034683704376, "learning_rate": 5.572329503664986e-07, "loss": 1.7626, "step": 4077 }, { "epoch": 0.9394689857743478, "grad_norm": 0.27001988887786865, "learning_rate": 5.530191563736936e-07, "loss": 1.745, "step": 4078 }, { "epoch": 0.9396993607095548, "grad_norm": 0.31111031770706177, "learning_rate": 5.488211770633467e-07, "loss": 1.7624, "step": 4079 }, { "epoch": 0.9399297356447619, "grad_norm": 0.26018640398979187, "learning_rate": 5.446390151511188e-07, "loss": 1.764, "step": 4080 }, { "epoch": 0.9401601105799688, "grad_norm": 0.28004926443099976, "learning_rate": 5.404726733424514e-07, "loss": 1.7503, "step": 4081 }, { "epoch": 0.9403904855151759, "grad_norm": 0.2825617790222168, "learning_rate": 5.363221543325248e-07, "loss": 1.7723, "step": 4082 }, { "epoch": 0.940620860450383, "grad_norm": 0.28481289744377136, "learning_rate": 5.321874608063077e-07, "loss": 1.7991, "step": 4083 }, { "epoch": 0.94085123538559, "grad_norm": 0.28604188561439514, "learning_rate": 5.280685954385134e-07, "loss": 1.7676, "step": 4084 }, { "epoch": 0.9410816103207971, "grad_norm": 0.2671407163143158, "learning_rate": 5.239655608936328e-07, "loss": 1.8026, "step": 4085 }, { "epoch": 0.9413119852560041, "grad_norm": 0.25490736961364746, "learning_rate": 5.198783598258983e-07, "loss": 1.7733, "step": 4086 }, { "epoch": 0.9415423601912112, "grad_norm": 0.2667872905731201, "learning_rate": 5.15806994879317e-07, "loss": 1.7695, "step": 4087 }, { "epoch": 0.9417727351264182, "grad_norm": 0.25045669078826904, "learning_rate": 5.117514686876379e-07, "loss": 1.8114, "step": 4088 }, { "epoch": 0.9420031100616253, "grad_norm": 0.25956451892852783, "learning_rate": 5.077117838743706e-07, "loss": 1.8086, "step": 4089 }, { "epoch": 0.9422334849968323, "grad_norm": 0.26716944575309753, "learning_rate": 5.036879430527775e-07, "loss": 1.7441, "step": 4090 }, { "epoch": 0.9424638599320394, "grad_norm": 0.25237393379211426, "learning_rate": 4.99679948825868e-07, "loss": 1.8032, "step": 4091 }, { "epoch": 0.9426942348672465, "grad_norm": 0.26093488931655884, "learning_rate": 4.956878037864043e-07, "loss": 1.7632, "step": 4092 }, { "epoch": 0.9429246098024535, "grad_norm": 0.2832764685153961, "learning_rate": 4.917115105168901e-07, "loss": 1.8152, "step": 4093 }, { "epoch": 0.9431549847376606, "grad_norm": 0.2411074936389923, "learning_rate": 4.877510715895817e-07, "loss": 1.8269, "step": 4094 }, { "epoch": 0.9433853596728676, "grad_norm": 0.2627483904361725, "learning_rate": 4.83806489566474e-07, "loss": 1.7456, "step": 4095 }, { "epoch": 0.9436157346080747, "grad_norm": 0.27924713492393494, "learning_rate": 4.798777669993066e-07, "loss": 1.7268, "step": 4096 }, { "epoch": 0.9438461095432817, "grad_norm": 0.2604641914367676, "learning_rate": 4.759649064295546e-07, "loss": 1.8224, "step": 4097 }, { "epoch": 0.9440764844784887, "grad_norm": 0.2611843943595886, "learning_rate": 4.7206791038844625e-07, "loss": 1.7772, "step": 4098 }, { "epoch": 0.9443068594136957, "grad_norm": 0.2753793001174927, "learning_rate": 4.6818678139692873e-07, "loss": 1.7737, "step": 4099 }, { "epoch": 0.9445372343489028, "grad_norm": 0.27269861102104187, "learning_rate": 4.6432152196569377e-07, "loss": 1.7146, "step": 4100 }, { "epoch": 0.9447676092841099, "grad_norm": 0.25339266657829285, "learning_rate": 4.604721345951718e-07, "loss": 1.7629, "step": 4101 }, { "epoch": 0.9449979842193169, "grad_norm": 0.24309135973453522, "learning_rate": 4.566386217755181e-07, "loss": 1.817, "step": 4102 }, { "epoch": 0.945228359154524, "grad_norm": 0.27703043818473816, "learning_rate": 4.528209859866184e-07, "loss": 1.7918, "step": 4103 }, { "epoch": 0.945458734089731, "grad_norm": 0.26368093490600586, "learning_rate": 4.490192296980972e-07, "loss": 1.7565, "step": 4104 }, { "epoch": 0.9456891090249381, "grad_norm": 0.2544001042842865, "learning_rate": 4.4523335536929e-07, "loss": 1.82, "step": 4105 }, { "epoch": 0.9459194839601451, "grad_norm": 0.25994694232940674, "learning_rate": 4.4146336544927667e-07, "loss": 1.7767, "step": 4106 }, { "epoch": 0.9461498588953522, "grad_norm": 0.303298681974411, "learning_rate": 4.377092623768508e-07, "loss": 1.8002, "step": 4107 }, { "epoch": 0.9463802338305592, "grad_norm": 0.25199151039123535, "learning_rate": 4.3397104858052817e-07, "loss": 1.8053, "step": 4108 }, { "epoch": 0.9466106087657663, "grad_norm": 0.2632235586643219, "learning_rate": 4.302487264785521e-07, "loss": 1.7817, "step": 4109 }, { "epoch": 0.9468409837009734, "grad_norm": 0.2582700848579407, "learning_rate": 4.2654229847887974e-07, "loss": 1.8244, "step": 4110 }, { "epoch": 0.9470713586361804, "grad_norm": 0.2902224361896515, "learning_rate": 4.2285176697919037e-07, "loss": 1.7879, "step": 4111 }, { "epoch": 0.9473017335713875, "grad_norm": 0.2508311867713928, "learning_rate": 4.1917713436687713e-07, "loss": 1.8158, "step": 4112 }, { "epoch": 0.9475321085065945, "grad_norm": 0.2747316360473633, "learning_rate": 4.1551840301904954e-07, "loss": 1.7734, "step": 4113 }, { "epoch": 0.9477624834418016, "grad_norm": 0.2523157000541687, "learning_rate": 4.1187557530253105e-07, "loss": 1.767, "step": 4114 }, { "epoch": 0.9479928583770085, "grad_norm": 0.2723200023174286, "learning_rate": 4.082486535738589e-07, "loss": 1.7854, "step": 4115 }, { "epoch": 0.9482232333122156, "grad_norm": 0.3218958377838135, "learning_rate": 4.0463764017927565e-07, "loss": 1.7785, "step": 4116 }, { "epoch": 0.9484536082474226, "grad_norm": 0.24193210899829865, "learning_rate": 4.0104253745473497e-07, "loss": 1.814, "step": 4117 }, { "epoch": 0.9486839831826297, "grad_norm": 0.26736122369766235, "learning_rate": 3.974633477258988e-07, "loss": 1.7881, "step": 4118 }, { "epoch": 0.9489143581178368, "grad_norm": 0.3155902028083801, "learning_rate": 3.9390007330813714e-07, "loss": 1.7679, "step": 4119 }, { "epoch": 0.9491447330530438, "grad_norm": 0.25503677129745483, "learning_rate": 3.9035271650652295e-07, "loss": 1.8118, "step": 4120 }, { "epoch": 0.9493751079882509, "grad_norm": 0.24576549232006073, "learning_rate": 3.868212796158233e-07, "loss": 1.7926, "step": 4121 }, { "epoch": 0.9496054829234579, "grad_norm": 0.27662503719329834, "learning_rate": 3.833057649205246e-07, "loss": 1.8037, "step": 4122 }, { "epoch": 0.949835857858665, "grad_norm": 0.2700939476490021, "learning_rate": 3.7980617469479953e-07, "loss": 1.8094, "step": 4123 }, { "epoch": 0.950066232793872, "grad_norm": 0.2819182872772217, "learning_rate": 3.7632251120252036e-07, "loss": 1.7517, "step": 4124 }, { "epoch": 0.9502966077290791, "grad_norm": 0.2641523480415344, "learning_rate": 3.728547766972651e-07, "loss": 1.7632, "step": 4125 }, { "epoch": 0.9505269826642861, "grad_norm": 0.26501795649528503, "learning_rate": 3.694029734222948e-07, "loss": 1.7942, "step": 4126 }, { "epoch": 0.9507573575994932, "grad_norm": 0.2773406505584717, "learning_rate": 3.6596710361057053e-07, "loss": 1.7389, "step": 4127 }, { "epoch": 0.9509877325347003, "grad_norm": 0.2727321684360504, "learning_rate": 3.62547169484756e-07, "loss": 1.7646, "step": 4128 }, { "epoch": 0.9512181074699073, "grad_norm": 0.26943257451057434, "learning_rate": 3.5914317325718985e-07, "loss": 1.7961, "step": 4129 }, { "epoch": 0.9514484824051144, "grad_norm": 0.2777213454246521, "learning_rate": 3.557551171299051e-07, "loss": 1.7776, "step": 4130 }, { "epoch": 0.9516788573403214, "grad_norm": 0.2587621808052063, "learning_rate": 3.5238300329463456e-07, "loss": 1.754, "step": 4131 }, { "epoch": 0.9519092322755284, "grad_norm": 0.2645566165447235, "learning_rate": 3.490268339327807e-07, "loss": 1.7813, "step": 4132 }, { "epoch": 0.9521396072107354, "grad_norm": 0.2793973982334137, "learning_rate": 3.456866112154428e-07, "loss": 1.7433, "step": 4133 }, { "epoch": 0.9523699821459425, "grad_norm": 0.26182234287261963, "learning_rate": 3.423623373034035e-07, "loss": 1.7534, "step": 4134 }, { "epoch": 0.9526003570811495, "grad_norm": 0.26164743304252625, "learning_rate": 3.3905401434712327e-07, "loss": 1.7659, "step": 4135 }, { "epoch": 0.9528307320163566, "grad_norm": 0.27625489234924316, "learning_rate": 3.357616444867484e-07, "loss": 1.7806, "step": 4136 }, { "epoch": 0.9530611069515637, "grad_norm": 0.2842690348625183, "learning_rate": 3.3248522985210306e-07, "loss": 1.7889, "step": 4137 }, { "epoch": 0.9532914818867707, "grad_norm": 0.2529942989349365, "learning_rate": 3.292247725626918e-07, "loss": 1.7958, "step": 4138 }, { "epoch": 0.9535218568219778, "grad_norm": 0.27739956974983215, "learning_rate": 3.259802747276941e-07, "loss": 1.7658, "step": 4139 }, { "epoch": 0.9537522317571848, "grad_norm": 0.2583999037742615, "learning_rate": 3.227517384459644e-07, "loss": 1.8316, "step": 4140 }, { "epoch": 0.9539826066923919, "grad_norm": 0.2879232168197632, "learning_rate": 3.195391658060376e-07, "loss": 1.7874, "step": 4141 }, { "epoch": 0.9542129816275989, "grad_norm": 0.24966998398303986, "learning_rate": 3.163425588861152e-07, "loss": 1.772, "step": 4142 }, { "epoch": 0.954443356562806, "grad_norm": 0.27188992500305176, "learning_rate": 3.1316191975407363e-07, "loss": 1.7863, "step": 4143 }, { "epoch": 0.954673731498013, "grad_norm": 0.2877216935157776, "learning_rate": 3.0999725046745866e-07, "loss": 1.7142, "step": 4144 }, { "epoch": 0.9549041064332201, "grad_norm": 0.2779586911201477, "learning_rate": 3.068485530734883e-07, "loss": 1.779, "step": 4145 }, { "epoch": 0.9551344813684272, "grad_norm": 0.2878383696079254, "learning_rate": 3.0371582960904144e-07, "loss": 1.8215, "step": 4146 }, { "epoch": 0.9553648563036342, "grad_norm": 0.2689281702041626, "learning_rate": 3.005990821006749e-07, "loss": 1.7783, "step": 4147 }, { "epoch": 0.9555952312388413, "grad_norm": 0.2636195123195648, "learning_rate": 2.9749831256459524e-07, "loss": 1.7514, "step": 4148 }, { "epoch": 0.9558256061740482, "grad_norm": 0.2736721932888031, "learning_rate": 2.9441352300669243e-07, "loss": 1.7762, "step": 4149 }, { "epoch": 0.9560559811092553, "grad_norm": 0.28581103682518005, "learning_rate": 2.913447154224952e-07, "loss": 1.7324, "step": 4150 }, { "epoch": 0.9562863560444623, "grad_norm": 0.2848505675792694, "learning_rate": 2.8829189179721547e-07, "loss": 1.7378, "step": 4151 }, { "epoch": 0.9565167309796694, "grad_norm": 0.261308878660202, "learning_rate": 2.852550541057153e-07, "loss": 1.7433, "step": 4152 }, { "epoch": 0.9567471059148764, "grad_norm": 0.2566760182380676, "learning_rate": 2.82234204312512e-07, "loss": 1.7779, "step": 4153 }, { "epoch": 0.9569774808500835, "grad_norm": 0.2670471668243408, "learning_rate": 2.7922934437178695e-07, "loss": 1.7804, "step": 4154 }, { "epoch": 0.9572078557852906, "grad_norm": 0.25484156608581543, "learning_rate": 2.762404762273768e-07, "loss": 1.7705, "step": 4155 }, { "epoch": 0.9574382307204976, "grad_norm": 0.2490370124578476, "learning_rate": 2.732676018127711e-07, "loss": 1.8238, "step": 4156 }, { "epoch": 0.9576686056557047, "grad_norm": 0.27447959780693054, "learning_rate": 2.703107230511148e-07, "loss": 1.8038, "step": 4157 }, { "epoch": 0.9578989805909117, "grad_norm": 0.27052563428878784, "learning_rate": 2.6736984185520284e-07, "loss": 1.7878, "step": 4158 }, { "epoch": 0.9581293555261188, "grad_norm": 0.2961488664150238, "learning_rate": 2.644449601274829e-07, "loss": 1.7128, "step": 4159 }, { "epoch": 0.9583597304613258, "grad_norm": 0.256944477558136, "learning_rate": 2.6153607976005247e-07, "loss": 1.8064, "step": 4160 }, { "epoch": 0.9585901053965329, "grad_norm": 0.24895551800727844, "learning_rate": 2.5864320263465914e-07, "loss": 1.8245, "step": 4161 }, { "epoch": 0.9588204803317399, "grad_norm": 0.2782500982284546, "learning_rate": 2.557663306226976e-07, "loss": 1.8029, "step": 4162 }, { "epoch": 0.959050855266947, "grad_norm": 0.2896950840950012, "learning_rate": 2.5290546558520423e-07, "loss": 1.7235, "step": 4163 }, { "epoch": 0.9592812302021541, "grad_norm": 0.26593372225761414, "learning_rate": 2.500606093728708e-07, "loss": 1.7919, "step": 4164 }, { "epoch": 0.959511605137361, "grad_norm": 0.26334747672080994, "learning_rate": 2.4723176382601966e-07, "loss": 1.7916, "step": 4165 }, { "epoch": 0.9597419800725681, "grad_norm": 0.27312639355659485, "learning_rate": 2.444189307746286e-07, "loss": 1.7606, "step": 4166 }, { "epoch": 0.9599723550077751, "grad_norm": 0.2676125466823578, "learning_rate": 2.4162211203830887e-07, "loss": 1.8242, "step": 4167 }, { "epoch": 0.9602027299429822, "grad_norm": 0.260727196931839, "learning_rate": 2.388413094263159e-07, "loss": 1.7827, "step": 4168 }, { "epoch": 0.9604331048781892, "grad_norm": 0.28924962878227234, "learning_rate": 2.3607652473754128e-07, "loss": 1.7757, "step": 4169 }, { "epoch": 0.9606634798133963, "grad_norm": 0.2682488262653351, "learning_rate": 2.3332775976051823e-07, "loss": 1.7744, "step": 4170 }, { "epoch": 0.9608938547486033, "grad_norm": 0.30081987380981445, "learning_rate": 2.3059501627341052e-07, "loss": 1.7319, "step": 4171 }, { "epoch": 0.9611242296838104, "grad_norm": 0.25415855646133423, "learning_rate": 2.27878296044029e-07, "loss": 1.808, "step": 4172 }, { "epoch": 0.9613546046190174, "grad_norm": 0.2703853249549866, "learning_rate": 2.2517760082980677e-07, "loss": 1.8098, "step": 4173 }, { "epoch": 0.9615849795542245, "grad_norm": 0.34255844354629517, "learning_rate": 2.2249293237781854e-07, "loss": 1.761, "step": 4174 }, { "epoch": 0.9618153544894316, "grad_norm": 0.3607204854488373, "learning_rate": 2.1982429242476677e-07, "loss": 1.7492, "step": 4175 }, { "epoch": 0.9620457294246386, "grad_norm": 0.3023635745048523, "learning_rate": 2.1717168269699273e-07, "loss": 1.8051, "step": 4176 }, { "epoch": 0.9622761043598457, "grad_norm": 0.2656382620334625, "learning_rate": 2.145351049104516e-07, "loss": 1.7549, "step": 4177 }, { "epoch": 0.9625064792950527, "grad_norm": 0.2802880108356476, "learning_rate": 2.1191456077075122e-07, "loss": 1.7868, "step": 4178 }, { "epoch": 0.9627368542302598, "grad_norm": 0.2904033660888672, "learning_rate": 2.0931005197310227e-07, "loss": 1.7546, "step": 4179 }, { "epoch": 0.9629672291654668, "grad_norm": 0.24773968756198883, "learning_rate": 2.0672158020235977e-07, "loss": 1.8235, "step": 4180 }, { "epoch": 0.9631976041006739, "grad_norm": 0.2680169641971588, "learning_rate": 2.0414914713299816e-07, "loss": 1.7707, "step": 4181 }, { "epoch": 0.9634279790358808, "grad_norm": 0.2589126527309418, "learning_rate": 2.0159275442911685e-07, "loss": 1.7853, "step": 4182 }, { "epoch": 0.9636583539710879, "grad_norm": 0.28590062260627747, "learning_rate": 1.9905240374444022e-07, "loss": 1.7467, "step": 4183 }, { "epoch": 0.963888728906295, "grad_norm": 0.28953662514686584, "learning_rate": 1.9652809672231209e-07, "loss": 1.7145, "step": 4184 }, { "epoch": 0.964119103841502, "grad_norm": 0.26825034618377686, "learning_rate": 1.9401983499569842e-07, "loss": 1.7953, "step": 4185 }, { "epoch": 0.9643494787767091, "grad_norm": 0.2861626446247101, "learning_rate": 1.9152762018719017e-07, "loss": 1.7303, "step": 4186 }, { "epoch": 0.9645798537119161, "grad_norm": 0.2771601676940918, "learning_rate": 1.8905145390899216e-07, "loss": 1.7723, "step": 4187 }, { "epoch": 0.9648102286471232, "grad_norm": 0.28574442863464355, "learning_rate": 1.865913377629286e-07, "loss": 1.7678, "step": 4188 }, { "epoch": 0.9650406035823302, "grad_norm": 0.2910350263118744, "learning_rate": 1.8414727334044313e-07, "loss": 1.7815, "step": 4189 }, { "epoch": 0.9652709785175373, "grad_norm": 0.24827316403388977, "learning_rate": 1.817192622225905e-07, "loss": 1.8199, "step": 4190 }, { "epoch": 0.9655013534527443, "grad_norm": 0.257604718208313, "learning_rate": 1.7930730598005042e-07, "loss": 1.7728, "step": 4191 }, { "epoch": 0.9657317283879514, "grad_norm": 0.2622743248939514, "learning_rate": 1.769114061731053e-07, "loss": 1.8062, "step": 4192 }, { "epoch": 0.9659621033231585, "grad_norm": 0.2592167854309082, "learning_rate": 1.7453156435165986e-07, "loss": 1.8032, "step": 4193 }, { "epoch": 0.9661924782583655, "grad_norm": 0.24100200831890106, "learning_rate": 1.721677820552242e-07, "loss": 1.8017, "step": 4194 }, { "epoch": 0.9664228531935726, "grad_norm": 0.25201138854026794, "learning_rate": 1.6982006081292245e-07, "loss": 1.7777, "step": 4195 }, { "epoch": 0.9666532281287796, "grad_norm": 0.2676345109939575, "learning_rate": 1.674884021434897e-07, "loss": 1.7653, "step": 4196 }, { "epoch": 0.9668836030639867, "grad_norm": 0.2540442943572998, "learning_rate": 1.6517280755526944e-07, "loss": 1.7711, "step": 4197 }, { "epoch": 0.9671139779991937, "grad_norm": 0.2714833617210388, "learning_rate": 1.628732785462106e-07, "loss": 1.7923, "step": 4198 }, { "epoch": 0.9673443529344007, "grad_norm": 0.27133917808532715, "learning_rate": 1.6058981660387608e-07, "loss": 1.7822, "step": 4199 }, { "epoch": 0.9675747278696077, "grad_norm": 0.30849945545196533, "learning_rate": 1.5832242320543144e-07, "loss": 1.777, "step": 4200 }, { "epoch": 0.9678051028048148, "grad_norm": 0.27475303411483765, "learning_rate": 1.5607109981763956e-07, "loss": 1.8328, "step": 4201 }, { "epoch": 0.9680354777400219, "grad_norm": 0.30650976300239563, "learning_rate": 1.5383584789688544e-07, "loss": 1.7769, "step": 4202 }, { "epoch": 0.9682658526752289, "grad_norm": 0.2954446077346802, "learning_rate": 1.5161666888913738e-07, "loss": 1.7596, "step": 4203 }, { "epoch": 0.968496227610436, "grad_norm": 0.27174028754234314, "learning_rate": 1.494135642299832e-07, "loss": 1.7897, "step": 4204 }, { "epoch": 0.968726602545643, "grad_norm": 1.195704698562622, "learning_rate": 1.4722653534460228e-07, "loss": 1.8079, "step": 4205 }, { "epoch": 0.9689569774808501, "grad_norm": 0.25678643584251404, "learning_rate": 1.4505558364777683e-07, "loss": 1.8006, "step": 4206 }, { "epoch": 0.9691873524160571, "grad_norm": 0.3100367486476898, "learning_rate": 1.4290071054389176e-07, "loss": 1.7667, "step": 4207 }, { "epoch": 0.9694177273512642, "grad_norm": 0.2710176408290863, "learning_rate": 1.4076191742692368e-07, "loss": 1.7866, "step": 4208 }, { "epoch": 0.9696481022864712, "grad_norm": 0.26499757170677185, "learning_rate": 1.3863920568045752e-07, "loss": 1.7853, "step": 4209 }, { "epoch": 0.9698784772216783, "grad_norm": 0.2630395293235779, "learning_rate": 1.3653257667766428e-07, "loss": 1.7791, "step": 4210 }, { "epoch": 0.9701088521568854, "grad_norm": 0.2925940454006195, "learning_rate": 1.3444203178132054e-07, "loss": 1.807, "step": 4211 }, { "epoch": 0.9703392270920924, "grad_norm": 0.28171178698539734, "learning_rate": 1.3236757234379171e-07, "loss": 1.7318, "step": 4212 }, { "epoch": 0.9705696020272995, "grad_norm": 0.27340689301490784, "learning_rate": 1.3030919970704046e-07, "loss": 1.798, "step": 4213 }, { "epoch": 0.9707999769625065, "grad_norm": 0.2787783443927765, "learning_rate": 1.2826691520262114e-07, "loss": 1.7622, "step": 4214 }, { "epoch": 0.9710303518977136, "grad_norm": 0.25163254141807556, "learning_rate": 1.2624072015168241e-07, "loss": 1.8006, "step": 4215 }, { "epoch": 0.9712607268329205, "grad_norm": 0.26329368352890015, "learning_rate": 1.2423061586496477e-07, "loss": 1.8165, "step": 4216 }, { "epoch": 0.9714911017681276, "grad_norm": 0.259724885225296, "learning_rate": 1.2223660364279742e-07, "loss": 1.7829, "step": 4217 }, { "epoch": 0.9717214767033346, "grad_norm": 0.2786312997341156, "learning_rate": 1.2025868477510416e-07, "loss": 1.7647, "step": 4218 }, { "epoch": 0.9719518516385417, "grad_norm": 0.29287925362586975, "learning_rate": 1.1829686054138922e-07, "loss": 1.7561, "step": 4219 }, { "epoch": 0.9721822265737488, "grad_norm": 0.286925345659256, "learning_rate": 1.1635113221075966e-07, "loss": 1.7383, "step": 4220 }, { "epoch": 0.9724126015089558, "grad_norm": 0.27853915095329285, "learning_rate": 1.1442150104189198e-07, "loss": 1.745, "step": 4221 }, { "epoch": 0.9726429764441629, "grad_norm": 0.26434198021888733, "learning_rate": 1.125079682830682e-07, "loss": 1.7567, "step": 4222 }, { "epoch": 0.9728733513793699, "grad_norm": 0.2519270181655884, "learning_rate": 1.1061053517214259e-07, "loss": 1.7771, "step": 4223 }, { "epoch": 0.973103726314577, "grad_norm": 0.2908085882663727, "learning_rate": 1.0872920293655553e-07, "loss": 1.78, "step": 4224 }, { "epoch": 0.973334101249784, "grad_norm": 0.2928467392921448, "learning_rate": 1.0686397279334182e-07, "loss": 1.7441, "step": 4225 }, { "epoch": 0.9735644761849911, "grad_norm": 0.28663837909698486, "learning_rate": 1.0501484594911403e-07, "loss": 1.7968, "step": 4226 }, { "epoch": 0.9737948511201981, "grad_norm": 0.2974708080291748, "learning_rate": 1.0318182360006257e-07, "loss": 1.7036, "step": 4227 }, { "epoch": 0.9740252260554052, "grad_norm": 0.2814171016216278, "learning_rate": 1.0136490693196665e-07, "loss": 1.776, "step": 4228 }, { "epoch": 0.9742556009906123, "grad_norm": 0.26693016290664673, "learning_rate": 9.956409712018333e-08, "loss": 1.7184, "step": 4229 }, { "epoch": 0.9744859759258193, "grad_norm": 0.269056499004364, "learning_rate": 9.77793953296502e-08, "loss": 1.8227, "step": 4230 }, { "epoch": 0.9747163508610264, "grad_norm": 0.2553209662437439, "learning_rate": 9.601080271489093e-08, "loss": 1.8071, "step": 4231 }, { "epoch": 0.9749467257962333, "grad_norm": 0.24732141196727753, "learning_rate": 9.425832041999871e-08, "loss": 1.7685, "step": 4232 }, { "epoch": 0.9751771007314404, "grad_norm": 0.26393789052963257, "learning_rate": 9.252194957865002e-08, "loss": 1.7861, "step": 4233 }, { "epoch": 0.9754074756666474, "grad_norm": 0.27824461460113525, "learning_rate": 9.08016913140991e-08, "loss": 1.7576, "step": 4234 }, { "epoch": 0.9756378506018545, "grad_norm": 0.2505800127983093, "learning_rate": 8.909754673917525e-08, "loss": 1.7855, "step": 4235 }, { "epoch": 0.9758682255370615, "grad_norm": 0.2619543969631195, "learning_rate": 8.740951695628552e-08, "loss": 1.7504, "step": 4236 }, { "epoch": 0.9760986004722686, "grad_norm": 0.300963819026947, "learning_rate": 8.573760305741196e-08, "loss": 1.7643, "step": 4237 }, { "epoch": 0.9763289754074757, "grad_norm": 0.27613428235054016, "learning_rate": 8.408180612410888e-08, "loss": 1.737, "step": 4238 }, { "epoch": 0.9765593503426827, "grad_norm": 0.27568864822387695, "learning_rate": 8.244212722750833e-08, "loss": 1.7853, "step": 4239 }, { "epoch": 0.9767897252778898, "grad_norm": 0.27018463611602783, "learning_rate": 8.081856742831462e-08, "loss": 1.784, "step": 4240 }, { "epoch": 0.9770201002130968, "grad_norm": 0.29115763306617737, "learning_rate": 7.92111277768015e-08, "loss": 1.7827, "step": 4241 }, { "epoch": 0.9772504751483039, "grad_norm": 0.25466400384902954, "learning_rate": 7.761980931282053e-08, "loss": 1.7622, "step": 4242 }, { "epoch": 0.9774808500835109, "grad_norm": 0.2708583176136017, "learning_rate": 7.604461306578715e-08, "loss": 1.7844, "step": 4243 }, { "epoch": 0.977711225018718, "grad_norm": 0.263793408870697, "learning_rate": 7.448554005469455e-08, "loss": 1.8025, "step": 4244 }, { "epoch": 0.977941599953925, "grad_norm": 0.289811909198761, "learning_rate": 7.294259128809988e-08, "loss": 1.7541, "step": 4245 }, { "epoch": 0.9781719748891321, "grad_norm": 0.27439385652542114, "learning_rate": 7.141576776413527e-08, "loss": 1.7575, "step": 4246 }, { "epoch": 0.9784023498243392, "grad_norm": 0.24961285293102264, "learning_rate": 6.990507047049676e-08, "loss": 1.8297, "step": 4247 }, { "epoch": 0.9786327247595462, "grad_norm": 0.27382025122642517, "learning_rate": 6.841050038444985e-08, "loss": 1.7912, "step": 4248 }, { "epoch": 0.9788630996947532, "grad_norm": 0.26430410146713257, "learning_rate": 6.693205847282947e-08, "loss": 1.8168, "step": 4249 }, { "epoch": 0.9790934746299602, "grad_norm": 0.2804317772388458, "learning_rate": 6.546974569203446e-08, "loss": 1.8135, "step": 4250 }, { "epoch": 0.9793238495651673, "grad_norm": 0.2765677869319916, "learning_rate": 6.402356298802758e-08, "loss": 1.8094, "step": 4251 }, { "epoch": 0.9795542245003743, "grad_norm": 0.291985422372818, "learning_rate": 6.259351129634661e-08, "loss": 1.7258, "step": 4252 }, { "epoch": 0.9797845994355814, "grad_norm": 0.2901996374130249, "learning_rate": 6.117959154208208e-08, "loss": 1.7975, "step": 4253 }, { "epoch": 0.9800149743707884, "grad_norm": 0.2774510681629181, "learning_rate": 5.978180463989958e-08, "loss": 1.7859, "step": 4254 }, { "epoch": 0.9802453493059955, "grad_norm": 0.27681079506874084, "learning_rate": 5.8400151494020226e-08, "loss": 1.7796, "step": 4255 }, { "epoch": 0.9804757242412026, "grad_norm": 0.2486700564622879, "learning_rate": 5.7034632998231865e-08, "loss": 1.8233, "step": 4256 }, { "epoch": 0.9807060991764096, "grad_norm": 0.2817420959472656, "learning_rate": 5.5685250035886204e-08, "loss": 1.7224, "step": 4257 }, { "epoch": 0.9809364741116167, "grad_norm": 0.26391735672950745, "learning_rate": 5.435200347989333e-08, "loss": 1.7829, "step": 4258 }, { "epoch": 0.9811668490468237, "grad_norm": 0.3148118555545807, "learning_rate": 5.3034894192727224e-08, "loss": 1.7445, "step": 4259 }, { "epoch": 0.9813972239820308, "grad_norm": 0.2634636163711548, "learning_rate": 5.173392302642299e-08, "loss": 1.7687, "step": 4260 }, { "epoch": 0.9816275989172378, "grad_norm": 0.2809937596321106, "learning_rate": 5.044909082257687e-08, "loss": 1.7696, "step": 4261 }, { "epoch": 0.9818579738524449, "grad_norm": 0.262640118598938, "learning_rate": 4.9180398412337904e-08, "loss": 1.8166, "step": 4262 }, { "epoch": 0.9820883487876519, "grad_norm": 0.2694169580936432, "learning_rate": 4.7927846616424576e-08, "loss": 1.7606, "step": 4263 }, { "epoch": 0.982318723722859, "grad_norm": 0.26924917101860046, "learning_rate": 4.669143624510541e-08, "loss": 1.7706, "step": 4264 }, { "epoch": 0.9825490986580661, "grad_norm": 0.2402264028787613, "learning_rate": 4.547116809821283e-08, "loss": 1.7789, "step": 4265 }, { "epoch": 0.982779473593273, "grad_norm": 0.28191110491752625, "learning_rate": 4.426704296513484e-08, "loss": 1.7132, "step": 4266 }, { "epoch": 0.9830098485284801, "grad_norm": 0.2789383828639984, "learning_rate": 4.3079061624815006e-08, "loss": 1.8299, "step": 4267 }, { "epoch": 0.9832402234636871, "grad_norm": 0.2523559629917145, "learning_rate": 4.190722484575804e-08, "loss": 1.8044, "step": 4268 }, { "epoch": 0.9834705983988942, "grad_norm": 0.2828279137611389, "learning_rate": 4.075153338601867e-08, "loss": 1.7361, "step": 4269 }, { "epoch": 0.9837009733341012, "grad_norm": 0.2675442397594452, "learning_rate": 3.961198799321275e-08, "loss": 1.7972, "step": 4270 }, { "epoch": 0.9839313482693083, "grad_norm": 0.2832237780094147, "learning_rate": 3.848858940450895e-08, "loss": 1.7513, "step": 4271 }, { "epoch": 0.9841617232045153, "grad_norm": 0.2621678113937378, "learning_rate": 3.7381338346628716e-08, "loss": 1.7639, "step": 4272 }, { "epoch": 0.9843920981397224, "grad_norm": 0.2597004771232605, "learning_rate": 3.629023553584909e-08, "loss": 1.7822, "step": 4273 }, { "epoch": 0.9846224730749294, "grad_norm": 0.2453511655330658, "learning_rate": 3.521528167800547e-08, "loss": 1.7876, "step": 4274 }, { "epoch": 0.9848528480101365, "grad_norm": 0.28829237818717957, "learning_rate": 3.415647746847772e-08, "loss": 1.803, "step": 4275 }, { "epoch": 0.9850832229453436, "grad_norm": 0.29990941286087036, "learning_rate": 3.3113823592206825e-08, "loss": 1.7632, "step": 4276 }, { "epoch": 0.9853135978805506, "grad_norm": 0.2771884799003601, "learning_rate": 3.208732072368104e-08, "loss": 1.7491, "step": 4277 }, { "epoch": 0.9855439728157577, "grad_norm": 0.2800425589084625, "learning_rate": 3.107696952694139e-08, "loss": 1.7958, "step": 4278 }, { "epoch": 0.9857743477509647, "grad_norm": 0.26478487253189087, "learning_rate": 3.0082770655581735e-08, "loss": 1.8262, "step": 4279 }, { "epoch": 0.9860047226861718, "grad_norm": 0.28578710556030273, "learning_rate": 2.9104724752748723e-08, "loss": 1.7925, "step": 4280 }, { "epoch": 0.9862350976213788, "grad_norm": 0.3033472001552582, "learning_rate": 2.8142832451133472e-08, "loss": 1.7674, "step": 4281 }, { "epoch": 0.9864654725565859, "grad_norm": 0.2761196196079254, "learning_rate": 2.7197094372985455e-08, "loss": 1.7375, "step": 4282 }, { "epoch": 0.9866958474917928, "grad_norm": 0.2723095118999481, "learning_rate": 2.626751113009862e-08, "loss": 1.8094, "step": 4283 }, { "epoch": 0.9869262224269999, "grad_norm": 0.26924756169319153, "learning_rate": 2.535408332381417e-08, "loss": 1.7702, "step": 4284 }, { "epoch": 0.987156597362207, "grad_norm": 0.29670268297195435, "learning_rate": 2.4456811545031656e-08, "loss": 1.7576, "step": 4285 }, { "epoch": 0.987386972297414, "grad_norm": 1.3475399017333984, "learning_rate": 2.3575696374189548e-08, "loss": 1.7972, "step": 4286 }, { "epoch": 0.9876173472326211, "grad_norm": 0.25185251235961914, "learning_rate": 2.2710738381281905e-08, "loss": 1.8032, "step": 4287 }, { "epoch": 0.9878477221678281, "grad_norm": 0.2539080083370209, "learning_rate": 2.1861938125844473e-08, "loss": 1.7994, "step": 4288 }, { "epoch": 0.9880780971030352, "grad_norm": 0.26694729924201965, "learning_rate": 2.1029296156965804e-08, "loss": 1.7591, "step": 4289 }, { "epoch": 0.9883084720382422, "grad_norm": 0.2624475955963135, "learning_rate": 2.0212813013276157e-08, "loss": 1.7677, "step": 4290 }, { "epoch": 0.9885388469734493, "grad_norm": 0.2835000455379486, "learning_rate": 1.941248922296135e-08, "loss": 1.8019, "step": 4291 }, { "epoch": 0.9887692219086563, "grad_norm": 0.28315114974975586, "learning_rate": 1.862832530374614e-08, "loss": 1.7795, "step": 4292 }, { "epoch": 0.9889995968438634, "grad_norm": 0.4306008219718933, "learning_rate": 1.7860321762902534e-08, "loss": 1.7095, "step": 4293 }, { "epoch": 0.9892299717790705, "grad_norm": 0.24382415413856506, "learning_rate": 1.7108479097252548e-08, "loss": 1.7895, "step": 4294 }, { "epoch": 0.9894603467142775, "grad_norm": 0.2746715545654297, "learning_rate": 1.6372797793159923e-08, "loss": 1.7704, "step": 4295 }, { "epoch": 0.9896907216494846, "grad_norm": 0.258238285779953, "learning_rate": 1.565327832653285e-08, "loss": 1.7793, "step": 4296 }, { "epoch": 0.9899210965846916, "grad_norm": 0.25616490840911865, "learning_rate": 1.494992116282956e-08, "loss": 1.7711, "step": 4297 }, { "epoch": 0.9901514715198987, "grad_norm": 0.343627393245697, "learning_rate": 1.4262726757049982e-08, "loss": 1.7754, "step": 4298 }, { "epoch": 0.9903818464551056, "grad_norm": 0.27245140075683594, "learning_rate": 1.3591695553732963e-08, "loss": 1.7248, "step": 4299 }, { "epoch": 0.9906122213903127, "grad_norm": 0.2667664587497711, "learning_rate": 1.2936827986972933e-08, "loss": 1.7626, "step": 4300 }, { "epoch": 0.9908425963255197, "grad_norm": 0.2636578381061554, "learning_rate": 1.2298124480397688e-08, "loss": 1.7821, "step": 4301 }, { "epoch": 0.9910729712607268, "grad_norm": 0.2618109881877899, "learning_rate": 1.1675585447187832e-08, "loss": 1.7836, "step": 4302 }, { "epoch": 0.9913033461959339, "grad_norm": 0.2531501054763794, "learning_rate": 1.106921129005456e-08, "loss": 1.7591, "step": 4303 }, { "epoch": 0.9915337211311409, "grad_norm": 0.2475627213716507, "learning_rate": 1.0479002401264648e-08, "loss": 1.8243, "step": 4304 }, { "epoch": 0.991764096066348, "grad_norm": 0.26907676458358765, "learning_rate": 9.904959162623795e-09, "loss": 1.7896, "step": 4305 }, { "epoch": 0.991994471001555, "grad_norm": 0.2568984925746918, "learning_rate": 9.347081945473846e-09, "loss": 1.8022, "step": 4306 }, { "epoch": 0.9922248459367621, "grad_norm": 0.2767137587070465, "learning_rate": 8.805371110709449e-09, "loss": 1.795, "step": 4307 }, { "epoch": 0.9924552208719691, "grad_norm": 0.27918118238449097, "learning_rate": 8.279827008758623e-09, "loss": 1.7992, "step": 4308 }, { "epoch": 0.9926855958071762, "grad_norm": 0.29065123200416565, "learning_rate": 7.770449979593864e-09, "loss": 1.7657, "step": 4309 }, { "epoch": 0.9929159707423832, "grad_norm": 0.2926769554615021, "learning_rate": 7.2772403527293644e-09, "loss": 1.7627, "step": 4310 }, { "epoch": 0.9931463456775903, "grad_norm": 0.24382512271404266, "learning_rate": 6.800198447223793e-09, "loss": 1.8191, "step": 4311 }, { "epoch": 0.9933767206127974, "grad_norm": 0.26016396284103394, "learning_rate": 6.339324571674743e-09, "loss": 1.7802, "step": 4312 }, { "epoch": 0.9936070955480044, "grad_norm": 0.2621302008628845, "learning_rate": 5.8946190242159525e-09, "loss": 1.7511, "step": 4313 }, { "epoch": 0.9938374704832115, "grad_norm": 0.2669559121131897, "learning_rate": 5.466082092531188e-09, "loss": 1.7832, "step": 4314 }, { "epoch": 0.9940678454184185, "grad_norm": 0.2675173878669739, "learning_rate": 5.053714053834813e-09, "loss": 1.8079, "step": 4315 }, { "epoch": 0.9942982203536255, "grad_norm": 0.2768224775791168, "learning_rate": 4.657515174888438e-09, "loss": 1.7915, "step": 4316 }, { "epoch": 0.9945285952888325, "grad_norm": 0.2702178359031677, "learning_rate": 4.277485711992601e-09, "loss": 1.7701, "step": 4317 }, { "epoch": 0.9947589702240396, "grad_norm": 0.25208351016044617, "learning_rate": 3.913625910989538e-09, "loss": 1.7816, "step": 4318 }, { "epoch": 0.9949893451592466, "grad_norm": 0.24955019354820251, "learning_rate": 3.565936007254855e-09, "loss": 1.7478, "step": 4319 }, { "epoch": 0.9952197200944537, "grad_norm": 0.2651907503604889, "learning_rate": 3.2344162257086362e-09, "loss": 1.7895, "step": 4320 }, { "epoch": 0.9954500950296608, "grad_norm": 0.2685059607028961, "learning_rate": 2.919066780815438e-09, "loss": 1.7643, "step": 4321 }, { "epoch": 0.9956804699648678, "grad_norm": 0.24525055289268494, "learning_rate": 2.619887876564864e-09, "loss": 1.8244, "step": 4322 }, { "epoch": 0.9959108449000749, "grad_norm": 0.3180790841579437, "learning_rate": 2.3368797065048687e-09, "loss": 1.7347, "step": 4323 }, { "epoch": 0.9961412198352819, "grad_norm": 0.2510049045085907, "learning_rate": 2.0700424537056785e-09, "loss": 1.8023, "step": 4324 }, { "epoch": 0.996371594770489, "grad_norm": 0.27403464913368225, "learning_rate": 1.819376290784769e-09, "loss": 1.8072, "step": 4325 }, { "epoch": 0.996601969705696, "grad_norm": 0.26196104288101196, "learning_rate": 1.5848813798985396e-09, "loss": 1.7577, "step": 4326 }, { "epoch": 0.9968323446409031, "grad_norm": 0.2601703405380249, "learning_rate": 1.366557872739538e-09, "loss": 1.7662, "step": 4327 }, { "epoch": 0.9970627195761101, "grad_norm": 0.2559305429458618, "learning_rate": 1.1644059105447858e-09, "loss": 1.7306, "step": 4328 }, { "epoch": 0.9972930945113172, "grad_norm": 0.30713629722595215, "learning_rate": 9.784256240819023e-10, "loss": 1.7592, "step": 4329 }, { "epoch": 0.9975234694465243, "grad_norm": 0.28403905034065247, "learning_rate": 8.086171336602055e-10, "loss": 1.7921, "step": 4330 }, { "epoch": 0.9977538443817313, "grad_norm": 0.28185123205184937, "learning_rate": 6.549805491307126e-10, "loss": 1.7671, "step": 4331 }, { "epoch": 0.9979842193169384, "grad_norm": 0.2864314913749695, "learning_rate": 5.175159698805887e-10, "loss": 1.7816, "step": 4332 }, { "epoch": 0.9982145942521453, "grad_norm": 0.29817822575569153, "learning_rate": 3.9622348483592254e-10, "loss": 1.7918, "step": 4333 }, { "epoch": 0.9984449691873524, "grad_norm": 0.26756229996681213, "learning_rate": 2.911031724561752e-10, "loss": 1.8186, "step": 4334 }, { "epoch": 0.9986753441225594, "grad_norm": 0.29117920994758606, "learning_rate": 2.0215510074805822e-10, "loss": 1.7581, "step": 4335 }, { "epoch": 0.9989057190577665, "grad_norm": 0.278814435005188, "learning_rate": 1.2937932725165525e-10, "loss": 1.7383, "step": 4336 }, { "epoch": 0.9991360939929735, "grad_norm": 0.26253387331962585, "learning_rate": 7.27758990404226e-11, "loss": 1.7625, "step": 4337 }, { "epoch": 0.9993664689281806, "grad_norm": 0.2880919575691223, "learning_rate": 3.234485273784227e-11, "loss": 1.7774, "step": 4338 }, { "epoch": 0.9995968438633877, "grad_norm": 0.2638327479362488, "learning_rate": 8.086214492442067e-12, "loss": 1.7916, "step": 4339 }, { "epoch": 0.9998272187985947, "grad_norm": 0.25953325629234314, "learning_rate": 0.0, "loss": 1.7776, "step": 4340 } ], "logging_steps": 1, "max_steps": 4340, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 108, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.486730505006612e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }