{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999194392975107, "eval_steps": 500, "global_step": 6206, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 10.210924424365194, "learning_rate": 1.0695187165775401e-07, "loss": 1.5682, "step": 1 }, { "epoch": 0.0, "grad_norm": 16.44163316174493, "learning_rate": 2.1390374331550802e-07, "loss": 1.6639, "step": 2 }, { "epoch": 0.0, "grad_norm": 16.90602255507819, "learning_rate": 3.208556149732621e-07, "loss": 1.5548, "step": 3 }, { "epoch": 0.0, "grad_norm": 7.568761330051903, "learning_rate": 4.2780748663101604e-07, "loss": 1.6964, "step": 4 }, { "epoch": 0.0, "grad_norm": 10.053495749958499, "learning_rate": 5.347593582887701e-07, "loss": 1.5811, "step": 5 }, { "epoch": 0.0, "grad_norm": 11.100255299974382, "learning_rate": 6.417112299465242e-07, "loss": 1.7673, "step": 6 }, { "epoch": 0.0, "grad_norm": 12.395908763666682, "learning_rate": 7.486631016042781e-07, "loss": 1.5399, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.7593110880922818, "learning_rate": 8.556149732620321e-07, "loss": 0.283, "step": 8 }, { "epoch": 0.0, "grad_norm": 7.877567901948208, "learning_rate": 9.625668449197862e-07, "loss": 1.6874, "step": 9 }, { "epoch": 0.0, "grad_norm": 8.180540432040777, "learning_rate": 1.0695187165775401e-06, "loss": 1.5504, "step": 10 }, { "epoch": 0.0, "grad_norm": 7.546607097664714, "learning_rate": 1.1764705882352942e-06, "loss": 1.785, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.3329589834806792, "learning_rate": 1.2834224598930483e-06, "loss": 0.2854, "step": 12 }, { "epoch": 0.0, "grad_norm": 9.891753844916378, "learning_rate": 1.3903743315508022e-06, "loss": 1.6723, "step": 13 }, { "epoch": 0.0, "grad_norm": 7.490876306830749, "learning_rate": 1.4973262032085562e-06, "loss": 1.5442, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.710966193272349, "learning_rate": 1.6042780748663103e-06, "loss": 1.6014, "step": 15 }, { "epoch": 0.0, "grad_norm": 4.984603063454031, "learning_rate": 1.7112299465240642e-06, "loss": 1.4806, "step": 16 }, { "epoch": 0.0, "grad_norm": 6.291844784402714, "learning_rate": 1.8181818181818183e-06, "loss": 1.4763, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.782835041211681, "learning_rate": 1.9251336898395724e-06, "loss": 1.4745, "step": 18 }, { "epoch": 0.0, "grad_norm": 5.442729846479861, "learning_rate": 2.0320855614973265e-06, "loss": 1.3744, "step": 19 }, { "epoch": 0.0, "grad_norm": 8.788392769622016, "learning_rate": 2.1390374331550802e-06, "loss": 1.6037, "step": 20 }, { "epoch": 0.0, "grad_norm": 4.585367664501108, "learning_rate": 2.2459893048128343e-06, "loss": 1.3593, "step": 21 }, { "epoch": 0.0, "grad_norm": 6.074743433428479, "learning_rate": 2.3529411764705885e-06, "loss": 1.4885, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.0173051322601316, "learning_rate": 2.4598930481283426e-06, "loss": 0.2511, "step": 23 }, { "epoch": 0.0, "grad_norm": 6.072324991166056, "learning_rate": 2.5668449197860967e-06, "loss": 1.5356, "step": 24 }, { "epoch": 0.0, "grad_norm": 3.726151481690184, "learning_rate": 2.673796791443851e-06, "loss": 1.3969, "step": 25 }, { "epoch": 0.0, "grad_norm": 0.8822707142569528, "learning_rate": 2.7807486631016045e-06, "loss": 0.3016, "step": 26 }, { "epoch": 0.0, "grad_norm": 3.7138646196250655, "learning_rate": 2.8877005347593586e-06, "loss": 1.3831, "step": 27 }, { "epoch": 0.0, "grad_norm": 4.921320808333385, "learning_rate": 2.9946524064171123e-06, "loss": 1.3347, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.625067210472347, "learning_rate": 3.101604278074867e-06, "loss": 1.3745, "step": 29 }, { "epoch": 0.0, "grad_norm": 3.418171729464003, "learning_rate": 3.2085561497326205e-06, "loss": 1.2971, "step": 30 }, { "epoch": 0.0, "grad_norm": 6.644998559772229, "learning_rate": 3.3155080213903747e-06, "loss": 1.2838, "step": 31 }, { "epoch": 0.01, "grad_norm": 5.046403703444144, "learning_rate": 3.4224598930481284e-06, "loss": 1.2544, "step": 32 }, { "epoch": 0.01, "grad_norm": 4.2000351223158265, "learning_rate": 3.529411764705883e-06, "loss": 1.3135, "step": 33 }, { "epoch": 0.01, "grad_norm": 4.050533805801215, "learning_rate": 3.6363636363636366e-06, "loss": 1.2859, "step": 34 }, { "epoch": 0.01, "grad_norm": 4.549021630370546, "learning_rate": 3.7433155080213907e-06, "loss": 1.1747, "step": 35 }, { "epoch": 0.01, "grad_norm": 5.4856185155410575, "learning_rate": 3.850267379679145e-06, "loss": 1.2429, "step": 36 }, { "epoch": 0.01, "grad_norm": 4.3131386104380605, "learning_rate": 3.957219251336899e-06, "loss": 1.289, "step": 37 }, { "epoch": 0.01, "grad_norm": 5.3799660385931825, "learning_rate": 4.064171122994653e-06, "loss": 1.282, "step": 38 }, { "epoch": 0.01, "grad_norm": 3.5661477938505928, "learning_rate": 4.171122994652407e-06, "loss": 1.2853, "step": 39 }, { "epoch": 0.01, "grad_norm": 3.802041388850755, "learning_rate": 4.2780748663101604e-06, "loss": 1.1343, "step": 40 }, { "epoch": 0.01, "grad_norm": 4.849944847815846, "learning_rate": 4.385026737967915e-06, "loss": 1.2374, "step": 41 }, { "epoch": 0.01, "grad_norm": 3.695438826419944, "learning_rate": 4.491978609625669e-06, "loss": 1.1394, "step": 42 }, { "epoch": 0.01, "grad_norm": 5.122985630054711, "learning_rate": 4.598930481283423e-06, "loss": 1.1535, "step": 43 }, { "epoch": 0.01, "grad_norm": 4.356150100854179, "learning_rate": 4.705882352941177e-06, "loss": 1.2786, "step": 44 }, { "epoch": 0.01, "grad_norm": 4.820111683228299, "learning_rate": 4.812834224598931e-06, "loss": 1.2676, "step": 45 }, { "epoch": 0.01, "grad_norm": 3.3369797947688866, "learning_rate": 4.919786096256685e-06, "loss": 1.1732, "step": 46 }, { "epoch": 0.01, "grad_norm": 4.704785025506529, "learning_rate": 5.026737967914439e-06, "loss": 1.1879, "step": 47 }, { "epoch": 0.01, "grad_norm": 3.8922076702169557, "learning_rate": 5.133689839572193e-06, "loss": 1.2301, "step": 48 }, { "epoch": 0.01, "grad_norm": 3.157835760318758, "learning_rate": 5.240641711229947e-06, "loss": 1.2211, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.304697431122671, "learning_rate": 5.347593582887702e-06, "loss": 1.1832, "step": 50 }, { "epoch": 0.01, "grad_norm": 3.712957920385671, "learning_rate": 5.4545454545454545e-06, "loss": 1.3803, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.654241183765462, "learning_rate": 5.561497326203209e-06, "loss": 1.1827, "step": 52 }, { "epoch": 0.01, "grad_norm": 4.124523482108906, "learning_rate": 5.6684491978609635e-06, "loss": 1.1819, "step": 53 }, { "epoch": 0.01, "grad_norm": 4.566931675556272, "learning_rate": 5.775401069518717e-06, "loss": 1.1791, "step": 54 }, { "epoch": 0.01, "grad_norm": 5.199412628158727, "learning_rate": 5.882352941176471e-06, "loss": 1.1403, "step": 55 }, { "epoch": 0.01, "grad_norm": 4.220437849570805, "learning_rate": 5.989304812834225e-06, "loss": 1.1777, "step": 56 }, { "epoch": 0.01, "grad_norm": 2.595673089467168, "learning_rate": 6.096256684491979e-06, "loss": 1.2512, "step": 57 }, { "epoch": 0.01, "grad_norm": 3.8822831650785776, "learning_rate": 6.203208556149734e-06, "loss": 1.1477, "step": 58 }, { "epoch": 0.01, "grad_norm": 3.180319083657637, "learning_rate": 6.3101604278074865e-06, "loss": 1.2286, "step": 59 }, { "epoch": 0.01, "grad_norm": 5.036979871832728, "learning_rate": 6.417112299465241e-06, "loss": 1.1092, "step": 60 }, { "epoch": 0.01, "grad_norm": 5.611300252280236, "learning_rate": 6.524064171122996e-06, "loss": 1.2598, "step": 61 }, { "epoch": 0.01, "grad_norm": 4.506870746581446, "learning_rate": 6.631016042780749e-06, "loss": 1.1729, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.018573703685524, "learning_rate": 6.737967914438504e-06, "loss": 1.2374, "step": 63 }, { "epoch": 0.01, "grad_norm": 3.731831853793168, "learning_rate": 6.844919786096257e-06, "loss": 1.2315, "step": 64 }, { "epoch": 0.01, "grad_norm": 4.0551691669591605, "learning_rate": 6.951871657754011e-06, "loss": 1.0309, "step": 65 }, { "epoch": 0.01, "grad_norm": 4.5210576624375145, "learning_rate": 7.058823529411766e-06, "loss": 1.1146, "step": 66 }, { "epoch": 0.01, "grad_norm": 4.686121623513556, "learning_rate": 7.1657754010695195e-06, "loss": 1.1706, "step": 67 }, { "epoch": 0.01, "grad_norm": 4.171001671522999, "learning_rate": 7.272727272727273e-06, "loss": 1.1249, "step": 68 }, { "epoch": 0.01, "grad_norm": 4.156222006337822, "learning_rate": 7.379679144385027e-06, "loss": 1.1191, "step": 69 }, { "epoch": 0.01, "grad_norm": 3.981084804012072, "learning_rate": 7.486631016042781e-06, "loss": 1.1027, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.425376280860305, "learning_rate": 7.593582887700536e-06, "loss": 1.175, "step": 71 }, { "epoch": 0.01, "grad_norm": 4.37901318272368, "learning_rate": 7.70053475935829e-06, "loss": 1.2242, "step": 72 }, { "epoch": 0.01, "grad_norm": 4.0266389720376985, "learning_rate": 7.807486631016043e-06, "loss": 1.1567, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.500432835302937, "learning_rate": 7.914438502673799e-06, "loss": 1.1513, "step": 74 }, { "epoch": 0.01, "grad_norm": 3.932288217437781, "learning_rate": 8.02139037433155e-06, "loss": 1.0977, "step": 75 }, { "epoch": 0.01, "grad_norm": 4.68143467456106, "learning_rate": 8.128342245989306e-06, "loss": 1.0792, "step": 76 }, { "epoch": 0.01, "grad_norm": 5.159806568157452, "learning_rate": 8.23529411764706e-06, "loss": 1.1924, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.2607111819553096, "learning_rate": 8.342245989304813e-06, "loss": 1.1163, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.5783478985483297, "learning_rate": 8.449197860962567e-06, "loss": 0.3135, "step": 79 }, { "epoch": 0.01, "grad_norm": 5.179712540003577, "learning_rate": 8.556149732620321e-06, "loss": 1.1929, "step": 80 }, { "epoch": 0.01, "grad_norm": 3.738452042491816, "learning_rate": 8.663101604278076e-06, "loss": 1.1628, "step": 81 }, { "epoch": 0.01, "grad_norm": 3.791208190586816, "learning_rate": 8.77005347593583e-06, "loss": 1.1728, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.6904540571434876, "learning_rate": 8.877005347593584e-06, "loss": 1.1414, "step": 83 }, { "epoch": 0.01, "grad_norm": 3.033991881206918, "learning_rate": 8.983957219251337e-06, "loss": 1.1572, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.9106652453435673, "learning_rate": 9.090909090909091e-06, "loss": 1.132, "step": 85 }, { "epoch": 0.01, "grad_norm": 4.166014480411821, "learning_rate": 9.197860962566846e-06, "loss": 1.0467, "step": 86 }, { "epoch": 0.01, "grad_norm": 4.944598153876644, "learning_rate": 9.3048128342246e-06, "loss": 1.1405, "step": 87 }, { "epoch": 0.01, "grad_norm": 3.3336139242966705, "learning_rate": 9.411764705882354e-06, "loss": 1.1207, "step": 88 }, { "epoch": 0.01, "grad_norm": 4.558471573448441, "learning_rate": 9.518716577540108e-06, "loss": 1.1251, "step": 89 }, { "epoch": 0.01, "grad_norm": 3.986500849853027, "learning_rate": 9.625668449197861e-06, "loss": 1.0962, "step": 90 }, { "epoch": 0.01, "grad_norm": 3.6213128641904944, "learning_rate": 9.732620320855617e-06, "loss": 1.1198, "step": 91 }, { "epoch": 0.01, "grad_norm": 4.127989408242883, "learning_rate": 9.83957219251337e-06, "loss": 1.1249, "step": 92 }, { "epoch": 0.01, "grad_norm": 3.6941513592206183, "learning_rate": 9.946524064171124e-06, "loss": 1.143, "step": 93 }, { "epoch": 0.02, "grad_norm": 4.02597576184469, "learning_rate": 1.0053475935828878e-05, "loss": 1.1641, "step": 94 }, { "epoch": 0.02, "grad_norm": 1.9520570810914715, "learning_rate": 1.0160427807486633e-05, "loss": 1.1835, "step": 95 }, { "epoch": 0.02, "grad_norm": 2.3315114219888047, "learning_rate": 1.0267379679144387e-05, "loss": 1.1465, "step": 96 }, { "epoch": 0.02, "grad_norm": 2.7551548934013312, "learning_rate": 1.0374331550802139e-05, "loss": 1.2029, "step": 97 }, { "epoch": 0.02, "grad_norm": 3.8026878538259297, "learning_rate": 1.0481283422459894e-05, "loss": 1.1506, "step": 98 }, { "epoch": 0.02, "grad_norm": 1.4776218388598408, "learning_rate": 1.0588235294117648e-05, "loss": 0.3035, "step": 99 }, { "epoch": 0.02, "grad_norm": 2.192019235756278, "learning_rate": 1.0695187165775403e-05, "loss": 1.1457, "step": 100 }, { "epoch": 0.02, "grad_norm": 5.1049838080228795, "learning_rate": 1.0802139037433157e-05, "loss": 1.2099, "step": 101 }, { "epoch": 0.02, "grad_norm": 3.6395551558511805, "learning_rate": 1.0909090909090909e-05, "loss": 1.1281, "step": 102 }, { "epoch": 0.02, "grad_norm": 3.025030933568755, "learning_rate": 1.1016042780748664e-05, "loss": 1.1878, "step": 103 }, { "epoch": 0.02, "grad_norm": 0.993296236528529, "learning_rate": 1.1122994652406418e-05, "loss": 0.2876, "step": 104 }, { "epoch": 0.02, "grad_norm": 3.0080588499967793, "learning_rate": 1.1229946524064172e-05, "loss": 1.1651, "step": 105 }, { "epoch": 0.02, "grad_norm": 5.819118546267775, "learning_rate": 1.1336898395721927e-05, "loss": 1.0435, "step": 106 }, { "epoch": 0.02, "grad_norm": 3.1986050605210576, "learning_rate": 1.1443850267379679e-05, "loss": 1.1411, "step": 107 }, { "epoch": 0.02, "grad_norm": 6.09927446396835, "learning_rate": 1.1550802139037434e-05, "loss": 1.1903, "step": 108 }, { "epoch": 0.02, "grad_norm": 3.9400379539249544, "learning_rate": 1.1657754010695188e-05, "loss": 1.1381, "step": 109 }, { "epoch": 0.02, "grad_norm": 4.553340339673872, "learning_rate": 1.1764705882352942e-05, "loss": 1.1399, "step": 110 }, { "epoch": 0.02, "grad_norm": 3.959159376720755, "learning_rate": 1.1871657754010697e-05, "loss": 1.1779, "step": 111 }, { "epoch": 0.02, "grad_norm": 4.379100730585923, "learning_rate": 1.197860962566845e-05, "loss": 1.0423, "step": 112 }, { "epoch": 0.02, "grad_norm": 3.48508394838034, "learning_rate": 1.2085561497326203e-05, "loss": 1.1648, "step": 113 }, { "epoch": 0.02, "grad_norm": 2.953125161468663, "learning_rate": 1.2192513368983958e-05, "loss": 1.0112, "step": 114 }, { "epoch": 0.02, "grad_norm": 3.8289059212246084, "learning_rate": 1.2299465240641712e-05, "loss": 1.1163, "step": 115 }, { "epoch": 0.02, "grad_norm": 4.498769591920276, "learning_rate": 1.2406417112299467e-05, "loss": 1.0837, "step": 116 }, { "epoch": 0.02, "grad_norm": 2.410796588925799, "learning_rate": 1.251336898395722e-05, "loss": 1.226, "step": 117 }, { "epoch": 0.02, "grad_norm": 5.005238268627421, "learning_rate": 1.2620320855614973e-05, "loss": 1.0762, "step": 118 }, { "epoch": 0.02, "grad_norm": 3.4394454999294384, "learning_rate": 1.2727272727272728e-05, "loss": 1.0955, "step": 119 }, { "epoch": 0.02, "grad_norm": 2.87001434588419, "learning_rate": 1.2834224598930482e-05, "loss": 1.054, "step": 120 }, { "epoch": 0.02, "grad_norm": 4.915377151439507, "learning_rate": 1.2941176470588238e-05, "loss": 1.1133, "step": 121 }, { "epoch": 0.02, "grad_norm": 3.89918367205376, "learning_rate": 1.3048128342245991e-05, "loss": 1.1305, "step": 122 }, { "epoch": 0.02, "grad_norm": 3.772317597749237, "learning_rate": 1.3155080213903743e-05, "loss": 1.0654, "step": 123 }, { "epoch": 0.02, "grad_norm": 3.050619944119069, "learning_rate": 1.3262032085561499e-05, "loss": 1.1007, "step": 124 }, { "epoch": 0.02, "grad_norm": 1.6119808839985004, "learning_rate": 1.3368983957219252e-05, "loss": 1.2982, "step": 125 }, { "epoch": 0.02, "grad_norm": 3.1414138031104795, "learning_rate": 1.3475935828877008e-05, "loss": 1.1385, "step": 126 }, { "epoch": 0.02, "grad_norm": 5.181972454214259, "learning_rate": 1.3582887700534761e-05, "loss": 1.0735, "step": 127 }, { "epoch": 0.02, "grad_norm": 2.867819568284049, "learning_rate": 1.3689839572192513e-05, "loss": 1.117, "step": 128 }, { "epoch": 0.02, "grad_norm": 4.414633276841253, "learning_rate": 1.3796791443850269e-05, "loss": 1.132, "step": 129 }, { "epoch": 0.02, "grad_norm": 4.002953869201478, "learning_rate": 1.3903743315508022e-05, "loss": 1.1215, "step": 130 }, { "epoch": 0.02, "grad_norm": 4.698949436705594, "learning_rate": 1.4010695187165778e-05, "loss": 1.1504, "step": 131 }, { "epoch": 0.02, "grad_norm": 2.1252324594245677, "learning_rate": 1.4117647058823532e-05, "loss": 0.2829, "step": 132 }, { "epoch": 0.02, "grad_norm": 3.286448692795306, "learning_rate": 1.4224598930481284e-05, "loss": 1.1724, "step": 133 }, { "epoch": 0.02, "grad_norm": 3.9539891681821855, "learning_rate": 1.4331550802139039e-05, "loss": 1.1439, "step": 134 }, { "epoch": 0.02, "grad_norm": 2.586842358446137, "learning_rate": 1.4438502673796793e-05, "loss": 1.0103, "step": 135 }, { "epoch": 0.02, "grad_norm": 3.23223053979603, "learning_rate": 1.4545454545454546e-05, "loss": 1.0873, "step": 136 }, { "epoch": 0.02, "grad_norm": 3.0062170776340964, "learning_rate": 1.4652406417112302e-05, "loss": 1.0575, "step": 137 }, { "epoch": 0.02, "grad_norm": 4.389017374167273, "learning_rate": 1.4759358288770054e-05, "loss": 1.0298, "step": 138 }, { "epoch": 0.02, "grad_norm": 3.538584561107161, "learning_rate": 1.4866310160427807e-05, "loss": 1.1226, "step": 139 }, { "epoch": 0.02, "grad_norm": 3.3243927422609625, "learning_rate": 1.4973262032085563e-05, "loss": 1.0977, "step": 140 }, { "epoch": 0.02, "grad_norm": 2.640616783716184, "learning_rate": 1.5080213903743316e-05, "loss": 1.1768, "step": 141 }, { "epoch": 0.02, "grad_norm": 3.299906417213331, "learning_rate": 1.5187165775401072e-05, "loss": 1.0503, "step": 142 }, { "epoch": 0.02, "grad_norm": 4.08688011018603, "learning_rate": 1.5294117647058822e-05, "loss": 1.1132, "step": 143 }, { "epoch": 0.02, "grad_norm": 2.1042198199618345, "learning_rate": 1.540106951871658e-05, "loss": 1.134, "step": 144 }, { "epoch": 0.02, "grad_norm": 4.454007031274598, "learning_rate": 1.5508021390374333e-05, "loss": 0.9956, "step": 145 }, { "epoch": 0.02, "grad_norm": 4.659073146608585, "learning_rate": 1.5614973262032087e-05, "loss": 1.1384, "step": 146 }, { "epoch": 0.02, "grad_norm": 4.613062324465107, "learning_rate": 1.572192513368984e-05, "loss": 1.1259, "step": 147 }, { "epoch": 0.02, "grad_norm": 3.7366814932300754, "learning_rate": 1.5828877005347597e-05, "loss": 1.1549, "step": 148 }, { "epoch": 0.02, "grad_norm": 3.8637232204608827, "learning_rate": 1.5935828877005348e-05, "loss": 1.0361, "step": 149 }, { "epoch": 0.02, "grad_norm": 2.6781386634362385, "learning_rate": 1.60427807486631e-05, "loss": 1.0989, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.2605859732083367, "learning_rate": 1.614973262032086e-05, "loss": 1.0131, "step": 151 }, { "epoch": 0.02, "grad_norm": 4.282382564682639, "learning_rate": 1.6256684491978612e-05, "loss": 1.0431, "step": 152 }, { "epoch": 0.02, "grad_norm": 1.645145111408581, "learning_rate": 1.6363636363636366e-05, "loss": 0.3041, "step": 153 }, { "epoch": 0.02, "grad_norm": 3.531275386212676, "learning_rate": 1.647058823529412e-05, "loss": 1.113, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.9859551046462323, "learning_rate": 1.6577540106951873e-05, "loss": 1.1458, "step": 155 }, { "epoch": 0.03, "grad_norm": 3.5548054099177313, "learning_rate": 1.6684491978609627e-05, "loss": 1.0592, "step": 156 }, { "epoch": 0.03, "grad_norm": 3.9203388503623198, "learning_rate": 1.679144385026738e-05, "loss": 1.1476, "step": 157 }, { "epoch": 0.03, "grad_norm": 3.607594765246559, "learning_rate": 1.6898395721925134e-05, "loss": 1.0805, "step": 158 }, { "epoch": 0.03, "grad_norm": 5.186909285773443, "learning_rate": 1.7005347593582888e-05, "loss": 1.1162, "step": 159 }, { "epoch": 0.03, "grad_norm": 1.625133949040952, "learning_rate": 1.7112299465240642e-05, "loss": 0.3011, "step": 160 }, { "epoch": 0.03, "grad_norm": 4.5609876660642525, "learning_rate": 1.7219251336898395e-05, "loss": 1.1284, "step": 161 }, { "epoch": 0.03, "grad_norm": 3.5211294527446504, "learning_rate": 1.7326203208556153e-05, "loss": 0.9977, "step": 162 }, { "epoch": 0.03, "grad_norm": 2.740132662352993, "learning_rate": 1.7433155080213906e-05, "loss": 1.1346, "step": 163 }, { "epoch": 0.03, "grad_norm": 3.7801585237878346, "learning_rate": 1.754010695187166e-05, "loss": 1.1239, "step": 164 }, { "epoch": 0.03, "grad_norm": 4.551966382698663, "learning_rate": 1.7647058823529414e-05, "loss": 1.0934, "step": 165 }, { "epoch": 0.03, "grad_norm": 2.3808776003583603, "learning_rate": 1.7754010695187167e-05, "loss": 1.0991, "step": 166 }, { "epoch": 0.03, "grad_norm": 3.474153451239422, "learning_rate": 1.786096256684492e-05, "loss": 1.0915, "step": 167 }, { "epoch": 0.03, "grad_norm": 1.9786141946232345, "learning_rate": 1.7967914438502675e-05, "loss": 1.0472, "step": 168 }, { "epoch": 0.03, "grad_norm": 1.4752475595433048, "learning_rate": 1.807486631016043e-05, "loss": 1.076, "step": 169 }, { "epoch": 0.03, "grad_norm": 4.100860942604742, "learning_rate": 1.8181818181818182e-05, "loss": 1.0482, "step": 170 }, { "epoch": 0.03, "grad_norm": 2.8358746145963356, "learning_rate": 1.8288770053475936e-05, "loss": 1.0332, "step": 171 }, { "epoch": 0.03, "grad_norm": 2.9559920990855098, "learning_rate": 1.8395721925133693e-05, "loss": 1.0734, "step": 172 }, { "epoch": 0.03, "grad_norm": 4.79569550467241, "learning_rate": 1.8502673796791447e-05, "loss": 1.1261, "step": 173 }, { "epoch": 0.03, "grad_norm": 4.165380101256001, "learning_rate": 1.86096256684492e-05, "loss": 1.0725, "step": 174 }, { "epoch": 0.03, "grad_norm": 3.5645502448241597, "learning_rate": 1.8716577540106954e-05, "loss": 1.0731, "step": 175 }, { "epoch": 0.03, "grad_norm": 2.7809697556481683, "learning_rate": 1.8823529411764708e-05, "loss": 1.1384, "step": 176 }, { "epoch": 0.03, "grad_norm": 3.591741780309166, "learning_rate": 1.893048128342246e-05, "loss": 1.1162, "step": 177 }, { "epoch": 0.03, "grad_norm": 3.3557636100346975, "learning_rate": 1.9037433155080215e-05, "loss": 1.0289, "step": 178 }, { "epoch": 0.03, "grad_norm": 4.199645009252015, "learning_rate": 1.9144385026737972e-05, "loss": 1.0517, "step": 179 }, { "epoch": 0.03, "grad_norm": 3.973490609591486, "learning_rate": 1.9251336898395722e-05, "loss": 1.1863, "step": 180 }, { "epoch": 0.03, "grad_norm": 4.359931062625831, "learning_rate": 1.9358288770053476e-05, "loss": 1.1293, "step": 181 }, { "epoch": 0.03, "grad_norm": 1.8141138849858254, "learning_rate": 1.9465240641711233e-05, "loss": 0.2871, "step": 182 }, { "epoch": 0.03, "grad_norm": 3.842646494686947, "learning_rate": 1.9572192513368987e-05, "loss": 1.0309, "step": 183 }, { "epoch": 0.03, "grad_norm": 5.0835172692879125, "learning_rate": 1.967914438502674e-05, "loss": 1.1584, "step": 184 }, { "epoch": 0.03, "grad_norm": 1.6672865032913862, "learning_rate": 1.9786096256684494e-05, "loss": 0.2867, "step": 185 }, { "epoch": 0.03, "grad_norm": 2.241202427135258, "learning_rate": 1.9893048128342248e-05, "loss": 1.1752, "step": 186 }, { "epoch": 0.03, "grad_norm": 3.5061789874333207, "learning_rate": 2e-05, "loss": 1.0874, "step": 187 }, { "epoch": 0.03, "grad_norm": 2.569976042550484, "learning_rate": 1.9999998637862175e-05, "loss": 1.0155, "step": 188 }, { "epoch": 0.03, "grad_norm": 3.5533871462566022, "learning_rate": 1.999999455144907e-05, "loss": 1.1303, "step": 189 }, { "epoch": 0.03, "grad_norm": 5.1081000087569635, "learning_rate": 1.9999987740761794e-05, "loss": 1.0998, "step": 190 }, { "epoch": 0.03, "grad_norm": 4.170861053120624, "learning_rate": 1.999997820580221e-05, "loss": 1.0311, "step": 191 }, { "epoch": 0.03, "grad_norm": 5.175164873380829, "learning_rate": 1.999996594657291e-05, "loss": 1.046, "step": 192 }, { "epoch": 0.03, "grad_norm": 3.8106149484565335, "learning_rate": 1.9999950963077235e-05, "loss": 1.0873, "step": 193 }, { "epoch": 0.03, "grad_norm": 4.107151604874103, "learning_rate": 1.999993325531927e-05, "loss": 1.0914, "step": 194 }, { "epoch": 0.03, "grad_norm": 1.6952122249920365, "learning_rate": 1.9999912823303832e-05, "loss": 1.0086, "step": 195 }, { "epoch": 0.03, "grad_norm": 3.7636617394129224, "learning_rate": 1.9999889667036496e-05, "loss": 1.0907, "step": 196 }, { "epoch": 0.03, "grad_norm": 4.336205826438395, "learning_rate": 1.9999863786523567e-05, "loss": 1.1719, "step": 197 }, { "epoch": 0.03, "grad_norm": 3.4170431224322475, "learning_rate": 1.999983518177209e-05, "loss": 1.0934, "step": 198 }, { "epoch": 0.03, "grad_norm": 5.386561888692349, "learning_rate": 1.9999803852789864e-05, "loss": 1.102, "step": 199 }, { "epoch": 0.03, "grad_norm": 3.713869755373159, "learning_rate": 1.999976979958542e-05, "loss": 1.0665, "step": 200 }, { "epoch": 0.03, "grad_norm": 4.968536564303448, "learning_rate": 1.9999733022168043e-05, "loss": 1.0938, "step": 201 }, { "epoch": 0.03, "grad_norm": 3.8635737633351126, "learning_rate": 1.9999693520547745e-05, "loss": 1.0827, "step": 202 }, { "epoch": 0.03, "grad_norm": 4.070953728656745, "learning_rate": 1.9999651294735285e-05, "loss": 1.0696, "step": 203 }, { "epoch": 0.03, "grad_norm": 3.943809419356627, "learning_rate": 1.9999606344742176e-05, "loss": 1.1081, "step": 204 }, { "epoch": 0.03, "grad_norm": 4.935695849737721, "learning_rate": 1.9999558670580656e-05, "loss": 1.1193, "step": 205 }, { "epoch": 0.03, "grad_norm": 1.298136453812181, "learning_rate": 1.999950827226371e-05, "loss": 0.3235, "step": 206 }, { "epoch": 0.03, "grad_norm": 3.4506032720350515, "learning_rate": 1.9999455149805076e-05, "loss": 1.0994, "step": 207 }, { "epoch": 0.03, "grad_norm": 1.7650033312109614, "learning_rate": 1.9999399303219222e-05, "loss": 0.988, "step": 208 }, { "epoch": 0.03, "grad_norm": 3.263739958988378, "learning_rate": 1.9999340732521363e-05, "loss": 1.073, "step": 209 }, { "epoch": 0.03, "grad_norm": 4.145010529471392, "learning_rate": 1.9999279437727456e-05, "loss": 1.0739, "step": 210 }, { "epoch": 0.03, "grad_norm": 3.3481527558985498, "learning_rate": 1.99992154188542e-05, "loss": 1.0093, "step": 211 }, { "epoch": 0.03, "grad_norm": 2.3841545953037233, "learning_rate": 1.999914867591903e-05, "loss": 1.0673, "step": 212 }, { "epoch": 0.03, "grad_norm": 4.439012726313702, "learning_rate": 1.9999079208940137e-05, "loss": 1.1529, "step": 213 }, { "epoch": 0.03, "grad_norm": 2.913422296883936, "learning_rate": 1.9999007017936436e-05, "loss": 1.0376, "step": 214 }, { "epoch": 0.03, "grad_norm": 3.809663812017499, "learning_rate": 1.99989321029276e-05, "loss": 1.0014, "step": 215 }, { "epoch": 0.03, "grad_norm": 4.180615131436673, "learning_rate": 1.999885446393404e-05, "loss": 1.1109, "step": 216 }, { "epoch": 0.03, "grad_norm": 4.760738281248398, "learning_rate": 1.9998774100976903e-05, "loss": 1.0083, "step": 217 }, { "epoch": 0.04, "grad_norm": 2.293150704490095, "learning_rate": 1.999869101407808e-05, "loss": 1.0446, "step": 218 }, { "epoch": 0.04, "grad_norm": 3.282029268281879, "learning_rate": 1.999860520326021e-05, "loss": 1.0652, "step": 219 }, { "epoch": 0.04, "grad_norm": 3.025047957615552, "learning_rate": 1.9998516668546675e-05, "loss": 0.9968, "step": 220 }, { "epoch": 0.04, "grad_norm": 2.8004100771747336, "learning_rate": 1.9998425409961585e-05, "loss": 0.3122, "step": 221 }, { "epoch": 0.04, "grad_norm": 3.3814780618468405, "learning_rate": 1.9998331427529803e-05, "loss": 1.036, "step": 222 }, { "epoch": 0.04, "grad_norm": 4.41483071993078, "learning_rate": 1.9998234721276938e-05, "loss": 0.9827, "step": 223 }, { "epoch": 0.04, "grad_norm": 3.5040571675710366, "learning_rate": 1.999813529122933e-05, "loss": 1.1587, "step": 224 }, { "epoch": 0.04, "grad_norm": 3.6151380891925364, "learning_rate": 1.999803313741407e-05, "loss": 1.0639, "step": 225 }, { "epoch": 0.04, "grad_norm": 1.8104232202666626, "learning_rate": 1.9997928259858985e-05, "loss": 0.3139, "step": 226 }, { "epoch": 0.04, "grad_norm": 1.7222172530249549, "learning_rate": 1.9997820658592645e-05, "loss": 1.1178, "step": 227 }, { "epoch": 0.04, "grad_norm": 3.0938703725497474, "learning_rate": 1.999771033364437e-05, "loss": 1.0455, "step": 228 }, { "epoch": 0.04, "grad_norm": 3.418749073872275, "learning_rate": 1.999759728504421e-05, "loss": 1.0551, "step": 229 }, { "epoch": 0.04, "grad_norm": 2.7277682461111294, "learning_rate": 1.9997481512822966e-05, "loss": 1.1177, "step": 230 }, { "epoch": 0.04, "grad_norm": 3.101123846056707, "learning_rate": 1.9997363017012174e-05, "loss": 1.0091, "step": 231 }, { "epoch": 0.04, "grad_norm": 2.5607634452057937, "learning_rate": 1.9997241797644117e-05, "loss": 1.004, "step": 232 }, { "epoch": 0.04, "grad_norm": 5.0702474204943195, "learning_rate": 1.9997117854751818e-05, "loss": 1.0946, "step": 233 }, { "epoch": 0.04, "grad_norm": 4.222104469687374, "learning_rate": 1.9996991188369045e-05, "loss": 0.9981, "step": 234 }, { "epoch": 0.04, "grad_norm": 4.7799130234725915, "learning_rate": 1.9996861798530304e-05, "loss": 1.0069, "step": 235 }, { "epoch": 0.04, "grad_norm": 4.271307098658991, "learning_rate": 1.9996729685270844e-05, "loss": 1.0343, "step": 236 }, { "epoch": 0.04, "grad_norm": 1.9079624676321771, "learning_rate": 1.9996594848626655e-05, "loss": 1.0429, "step": 237 }, { "epoch": 0.04, "grad_norm": 2.957735529571673, "learning_rate": 1.999645728863447e-05, "loss": 1.098, "step": 238 }, { "epoch": 0.04, "grad_norm": 2.5607403551877357, "learning_rate": 1.9996317005331768e-05, "loss": 1.0731, "step": 239 }, { "epoch": 0.04, "grad_norm": 2.111048282000672, "learning_rate": 1.999617399875676e-05, "loss": 1.0383, "step": 240 }, { "epoch": 0.04, "grad_norm": 3.4585368697851244, "learning_rate": 1.9996028268948414e-05, "loss": 1.0922, "step": 241 }, { "epoch": 0.04, "grad_norm": 4.9508823648715286, "learning_rate": 1.9995879815946423e-05, "loss": 1.0897, "step": 242 }, { "epoch": 0.04, "grad_norm": 2.334136268429585, "learning_rate": 1.999572863979123e-05, "loss": 1.0838, "step": 243 }, { "epoch": 0.04, "grad_norm": 4.081922611816925, "learning_rate": 1.9995574740524024e-05, "loss": 0.2976, "step": 244 }, { "epoch": 0.04, "grad_norm": 3.680849964834623, "learning_rate": 1.9995418118186728e-05, "loss": 0.2656, "step": 245 }, { "epoch": 0.04, "grad_norm": 2.390143152682224, "learning_rate": 1.9995258772822012e-05, "loss": 1.0317, "step": 246 }, { "epoch": 0.04, "grad_norm": 2.3097701585048735, "learning_rate": 1.9995096704473287e-05, "loss": 1.0307, "step": 247 }, { "epoch": 0.04, "grad_norm": 2.1701439195362697, "learning_rate": 1.99949319131847e-05, "loss": 1.0382, "step": 248 }, { "epoch": 0.04, "grad_norm": 2.582994336271556, "learning_rate": 1.9994764399001153e-05, "loss": 1.1301, "step": 249 }, { "epoch": 0.04, "grad_norm": 3.524810861694252, "learning_rate": 1.999459416196827e-05, "loss": 1.0718, "step": 250 }, { "epoch": 0.04, "grad_norm": 3.5916102134239036, "learning_rate": 1.9994421202132436e-05, "loss": 1.0191, "step": 251 }, { "epoch": 0.04, "grad_norm": 2.2669956665064954, "learning_rate": 1.9994245519540772e-05, "loss": 1.0679, "step": 252 }, { "epoch": 0.04, "grad_norm": 1.8356507422331019, "learning_rate": 1.9994067114241135e-05, "loss": 1.0872, "step": 253 }, { "epoch": 0.04, "grad_norm": 4.70069594404897, "learning_rate": 1.9993885986282125e-05, "loss": 1.0382, "step": 254 }, { "epoch": 0.04, "grad_norm": 3.1299435994974663, "learning_rate": 1.9993702135713093e-05, "loss": 0.3399, "step": 255 }, { "epoch": 0.04, "grad_norm": 2.2389354981250364, "learning_rate": 1.9993515562584117e-05, "loss": 1.0331, "step": 256 }, { "epoch": 0.04, "grad_norm": 4.439254628756448, "learning_rate": 1.9993326266946033e-05, "loss": 1.013, "step": 257 }, { "epoch": 0.04, "grad_norm": 3.578566344915368, "learning_rate": 1.9993134248850402e-05, "loss": 1.0298, "step": 258 }, { "epoch": 0.04, "grad_norm": 3.0232901919257977, "learning_rate": 1.9992939508349544e-05, "loss": 1.0504, "step": 259 }, { "epoch": 0.04, "grad_norm": 3.8502519401256596, "learning_rate": 1.9992742045496502e-05, "loss": 1.0141, "step": 260 }, { "epoch": 0.04, "grad_norm": 3.3888932125571207, "learning_rate": 1.999254186034508e-05, "loss": 1.0134, "step": 261 }, { "epoch": 0.04, "grad_norm": 3.331826028172365, "learning_rate": 1.9992338952949805e-05, "loss": 1.0918, "step": 262 }, { "epoch": 0.04, "grad_norm": 3.030564692788744, "learning_rate": 1.9992133323365963e-05, "loss": 1.0609, "step": 263 }, { "epoch": 0.04, "grad_norm": 4.015015077099454, "learning_rate": 1.9991924971649566e-05, "loss": 1.0951, "step": 264 }, { "epoch": 0.04, "grad_norm": 3.6759107123023727, "learning_rate": 1.9991713897857376e-05, "loss": 1.0112, "step": 265 }, { "epoch": 0.04, "grad_norm": 3.313144549154001, "learning_rate": 1.99915001020469e-05, "loss": 1.0219, "step": 266 }, { "epoch": 0.04, "grad_norm": 3.2049168482808614, "learning_rate": 1.999128358427638e-05, "loss": 1.042, "step": 267 }, { "epoch": 0.04, "grad_norm": 5.224070868859944, "learning_rate": 1.9991064344604798e-05, "loss": 1.0386, "step": 268 }, { "epoch": 0.04, "grad_norm": 4.053985828994049, "learning_rate": 1.9990842383091884e-05, "loss": 1.0201, "step": 269 }, { "epoch": 0.04, "grad_norm": 3.73368286358524, "learning_rate": 1.9990617699798104e-05, "loss": 1.1931, "step": 270 }, { "epoch": 0.04, "grad_norm": 2.7946802830594923, "learning_rate": 1.999039029478467e-05, "loss": 1.0501, "step": 271 }, { "epoch": 0.04, "grad_norm": 3.672770938733888, "learning_rate": 1.9990160168113534e-05, "loss": 1.1261, "step": 272 }, { "epoch": 0.04, "grad_norm": 4.440149080677649, "learning_rate": 1.998992731984739e-05, "loss": 0.9421, "step": 273 }, { "epoch": 0.04, "grad_norm": 2.4172906618106063, "learning_rate": 1.998969175004967e-05, "loss": 1.0316, "step": 274 }, { "epoch": 0.04, "grad_norm": 2.6872221337322504, "learning_rate": 1.9989453458784544e-05, "loss": 1.058, "step": 275 }, { "epoch": 0.04, "grad_norm": 2.4264451744666276, "learning_rate": 1.998921244611694e-05, "loss": 0.3148, "step": 276 }, { "epoch": 0.04, "grad_norm": 3.0130544191871804, "learning_rate": 1.9988968712112512e-05, "loss": 1.0578, "step": 277 }, { "epoch": 0.04, "grad_norm": 2.6927859641320753, "learning_rate": 1.9988722256837656e-05, "loss": 0.3352, "step": 278 }, { "epoch": 0.04, "grad_norm": 4.055855584622735, "learning_rate": 1.998847308035952e-05, "loss": 1.021, "step": 279 }, { "epoch": 0.05, "grad_norm": 3.9933480742558727, "learning_rate": 1.998822118274598e-05, "loss": 1.1402, "step": 280 }, { "epoch": 0.05, "grad_norm": 4.553008566724533, "learning_rate": 1.9987966564065663e-05, "loss": 1.1009, "step": 281 }, { "epoch": 0.05, "grad_norm": 3.3869351468585696, "learning_rate": 1.9987709224387935e-05, "loss": 1.1061, "step": 282 }, { "epoch": 0.05, "grad_norm": 3.4356702616504906, "learning_rate": 1.9987449163782902e-05, "loss": 1.0734, "step": 283 }, { "epoch": 0.05, "grad_norm": 3.754717847278394, "learning_rate": 1.9987186382321408e-05, "loss": 1.132, "step": 284 }, { "epoch": 0.05, "grad_norm": 2.9460100314299074, "learning_rate": 1.998692088007505e-05, "loss": 1.0666, "step": 285 }, { "epoch": 0.05, "grad_norm": 2.7379181494617137, "learning_rate": 1.998665265711615e-05, "loss": 1.0594, "step": 286 }, { "epoch": 0.05, "grad_norm": 6.356408029277854, "learning_rate": 1.9986381713517783e-05, "loss": 1.0314, "step": 287 }, { "epoch": 0.05, "grad_norm": 3.0459350701147554, "learning_rate": 1.998610804935376e-05, "loss": 1.0638, "step": 288 }, { "epoch": 0.05, "grad_norm": 3.79520194158424, "learning_rate": 1.998583166469864e-05, "loss": 1.0585, "step": 289 }, { "epoch": 0.05, "grad_norm": 3.150749928654294, "learning_rate": 1.9985552559627708e-05, "loss": 1.0441, "step": 290 }, { "epoch": 0.05, "grad_norm": 2.6904383719800853, "learning_rate": 1.998527073421701e-05, "loss": 1.0591, "step": 291 }, { "epoch": 0.05, "grad_norm": 2.354622912011555, "learning_rate": 1.9984986188543314e-05, "loss": 0.9526, "step": 292 }, { "epoch": 0.05, "grad_norm": 4.258713261227788, "learning_rate": 1.9984698922684146e-05, "loss": 1.0602, "step": 293 }, { "epoch": 0.05, "grad_norm": 3.4368485266898445, "learning_rate": 1.9984408936717758e-05, "loss": 1.0235, "step": 294 }, { "epoch": 0.05, "grad_norm": 1.9601219274587898, "learning_rate": 1.9984116230723157e-05, "loss": 1.026, "step": 295 }, { "epoch": 0.05, "grad_norm": 3.8640906061800044, "learning_rate": 1.9983820804780082e-05, "loss": 1.114, "step": 296 }, { "epoch": 0.05, "grad_norm": 4.2854698156838875, "learning_rate": 1.998352265896901e-05, "loss": 1.0039, "step": 297 }, { "epoch": 0.05, "grad_norm": 3.677587472794652, "learning_rate": 1.9983221793371173e-05, "loss": 1.0286, "step": 298 }, { "epoch": 0.05, "grad_norm": 2.2883824715150873, "learning_rate": 1.9982918208068525e-05, "loss": 0.3091, "step": 299 }, { "epoch": 0.05, "grad_norm": 3.364586340998357, "learning_rate": 1.9982611903143782e-05, "loss": 1.1396, "step": 300 }, { "epoch": 0.05, "grad_norm": 3.145824019731962, "learning_rate": 1.998230287868038e-05, "loss": 1.0186, "step": 301 }, { "epoch": 0.05, "grad_norm": 1.4770409348845364, "learning_rate": 1.998199113476251e-05, "loss": 1.0341, "step": 302 }, { "epoch": 0.05, "grad_norm": 4.925138620662291, "learning_rate": 1.9981676671475103e-05, "loss": 1.0617, "step": 303 }, { "epoch": 0.05, "grad_norm": 4.681073271773045, "learning_rate": 1.9981359488903818e-05, "loss": 0.9611, "step": 304 }, { "epoch": 0.05, "grad_norm": 3.034412582589011, "learning_rate": 1.9981039587135078e-05, "loss": 1.0466, "step": 305 }, { "epoch": 0.05, "grad_norm": 3.392329758692605, "learning_rate": 1.9980716966256023e-05, "loss": 1.0832, "step": 306 }, { "epoch": 0.05, "grad_norm": 5.215972343933179, "learning_rate": 1.9980391626354543e-05, "loss": 1.0298, "step": 307 }, { "epoch": 0.05, "grad_norm": 3.2881849844470974, "learning_rate": 1.998006356751928e-05, "loss": 1.11, "step": 308 }, { "epoch": 0.05, "grad_norm": 2.5082124290180867, "learning_rate": 1.997973278983959e-05, "loss": 1.0058, "step": 309 }, { "epoch": 0.05, "grad_norm": 3.7201927656046045, "learning_rate": 1.99793992934056e-05, "loss": 1.0268, "step": 310 }, { "epoch": 0.05, "grad_norm": 1.9103127631910042, "learning_rate": 1.997906307830816e-05, "loss": 1.0264, "step": 311 }, { "epoch": 0.05, "grad_norm": 3.962486791739126, "learning_rate": 1.9978724144638863e-05, "loss": 1.0168, "step": 312 }, { "epoch": 0.05, "grad_norm": 3.0592197834731896, "learning_rate": 1.997838249249004e-05, "loss": 1.0824, "step": 313 }, { "epoch": 0.05, "grad_norm": 4.21513109756716, "learning_rate": 1.9978038121954775e-05, "loss": 1.1066, "step": 314 }, { "epoch": 0.05, "grad_norm": 3.09030499011761, "learning_rate": 1.9977691033126875e-05, "loss": 1.0578, "step": 315 }, { "epoch": 0.05, "grad_norm": 2.863210471658884, "learning_rate": 1.9977341226100905e-05, "loss": 1.0618, "step": 316 }, { "epoch": 0.05, "grad_norm": 2.240165090778278, "learning_rate": 1.9976988700972154e-05, "loss": 1.1073, "step": 317 }, { "epoch": 0.05, "grad_norm": 3.943778466848899, "learning_rate": 1.9976633457836664e-05, "loss": 1.0466, "step": 318 }, { "epoch": 0.05, "grad_norm": 3.5502667447736105, "learning_rate": 1.9976275496791216e-05, "loss": 1.0187, "step": 319 }, { "epoch": 0.05, "grad_norm": 4.7050308978923905, "learning_rate": 1.9975914817933325e-05, "loss": 1.1583, "step": 320 }, { "epoch": 0.05, "grad_norm": 2.779489109792715, "learning_rate": 1.9975551421361244e-05, "loss": 1.104, "step": 321 }, { "epoch": 0.05, "grad_norm": 3.8705767335350822, "learning_rate": 1.9975185307173985e-05, "loss": 1.0678, "step": 322 }, { "epoch": 0.05, "grad_norm": 3.0792881660768954, "learning_rate": 1.9974816475471277e-05, "loss": 1.0582, "step": 323 }, { "epoch": 0.05, "grad_norm": 2.2569040761092496, "learning_rate": 1.9974444926353605e-05, "loss": 1.0268, "step": 324 }, { "epoch": 0.05, "grad_norm": 2.5333100401493467, "learning_rate": 1.9974070659922184e-05, "loss": 1.0583, "step": 325 }, { "epoch": 0.05, "grad_norm": 2.0165520243991115, "learning_rate": 1.9973693676278985e-05, "loss": 0.3035, "step": 326 }, { "epoch": 0.05, "grad_norm": 4.439533251380409, "learning_rate": 1.9973313975526696e-05, "loss": 1.0745, "step": 327 }, { "epoch": 0.05, "grad_norm": 3.470164929738557, "learning_rate": 1.9972931557768768e-05, "loss": 1.0928, "step": 328 }, { "epoch": 0.05, "grad_norm": 2.9885127757020995, "learning_rate": 1.9972546423109378e-05, "loss": 1.051, "step": 329 }, { "epoch": 0.05, "grad_norm": 4.016268548717817, "learning_rate": 1.9972158571653442e-05, "loss": 1.0445, "step": 330 }, { "epoch": 0.05, "grad_norm": 2.737339178787335, "learning_rate": 1.9971768003506635e-05, "loss": 1.0692, "step": 331 }, { "epoch": 0.05, "grad_norm": 4.115312239711862, "learning_rate": 1.9971374718775346e-05, "loss": 1.0007, "step": 332 }, { "epoch": 0.05, "grad_norm": 2.621550518776542, "learning_rate": 1.997097871756672e-05, "loss": 1.0413, "step": 333 }, { "epoch": 0.05, "grad_norm": 4.812099489662811, "learning_rate": 1.9970579999988643e-05, "loss": 0.9342, "step": 334 }, { "epoch": 0.05, "grad_norm": 3.5183680704830373, "learning_rate": 1.9970178566149734e-05, "loss": 1.0273, "step": 335 }, { "epoch": 0.05, "grad_norm": 2.0134262986014417, "learning_rate": 1.996977441615935e-05, "loss": 1.0873, "step": 336 }, { "epoch": 0.05, "grad_norm": 4.576237937329922, "learning_rate": 1.99693675501276e-05, "loss": 1.0406, "step": 337 }, { "epoch": 0.05, "grad_norm": 3.0626258045748354, "learning_rate": 1.996895796816532e-05, "loss": 1.0039, "step": 338 }, { "epoch": 0.05, "grad_norm": 3.613944063996131, "learning_rate": 1.9968545670384094e-05, "loss": 0.985, "step": 339 }, { "epoch": 0.05, "grad_norm": 4.0787147300193665, "learning_rate": 1.9968130656896244e-05, "loss": 1.1332, "step": 340 }, { "epoch": 0.05, "grad_norm": 4.473855256864598, "learning_rate": 1.996771292781483e-05, "loss": 1.0736, "step": 341 }, { "epoch": 0.06, "grad_norm": 3.4554287919578814, "learning_rate": 1.996729248325365e-05, "loss": 1.0947, "step": 342 }, { "epoch": 0.06, "grad_norm": 4.032248203388584, "learning_rate": 1.996686932332725e-05, "loss": 1.0272, "step": 343 }, { "epoch": 0.06, "grad_norm": 2.9876794545691134, "learning_rate": 1.9966443448150906e-05, "loss": 1.0302, "step": 344 }, { "epoch": 0.06, "grad_norm": 5.0714142106465, "learning_rate": 1.9966014857840644e-05, "loss": 1.0046, "step": 345 }, { "epoch": 0.06, "grad_norm": 3.8161930895403176, "learning_rate": 1.9965583552513216e-05, "loss": 1.0095, "step": 346 }, { "epoch": 0.06, "grad_norm": 2.3506985214651324, "learning_rate": 1.9965149532286126e-05, "loss": 1.0541, "step": 347 }, { "epoch": 0.06, "grad_norm": 4.0792506042094745, "learning_rate": 1.9964712797277614e-05, "loss": 1.0922, "step": 348 }, { "epoch": 0.06, "grad_norm": 3.471284507439832, "learning_rate": 1.9964273347606656e-05, "loss": 1.0989, "step": 349 }, { "epoch": 0.06, "grad_norm": 3.23849033835688, "learning_rate": 1.996383118339297e-05, "loss": 0.9968, "step": 350 }, { "epoch": 0.06, "grad_norm": 3.0461232358332615, "learning_rate": 1.9963386304757018e-05, "loss": 1.0389, "step": 351 }, { "epoch": 0.06, "grad_norm": 1.9561114252996596, "learning_rate": 1.9962938711819993e-05, "loss": 1.0916, "step": 352 }, { "epoch": 0.06, "grad_norm": 3.180665561674501, "learning_rate": 1.9962488404703832e-05, "loss": 1.1246, "step": 353 }, { "epoch": 0.06, "grad_norm": 4.135699412227138, "learning_rate": 1.996203538353121e-05, "loss": 1.0704, "step": 354 }, { "epoch": 0.06, "grad_norm": 3.525489902947693, "learning_rate": 1.9961579648425552e-05, "loss": 0.9729, "step": 355 }, { "epoch": 0.06, "grad_norm": 2.304421309276625, "learning_rate": 1.9961121199510996e-05, "loss": 1.0821, "step": 356 }, { "epoch": 0.06, "grad_norm": 3.1805393287063364, "learning_rate": 1.9960660036912453e-05, "loss": 1.0275, "step": 357 }, { "epoch": 0.06, "grad_norm": 4.031075880445101, "learning_rate": 1.9960196160755542e-05, "loss": 1.1032, "step": 358 }, { "epoch": 0.06, "grad_norm": 3.605014561638278, "learning_rate": 1.995972957116665e-05, "loss": 0.9767, "step": 359 }, { "epoch": 0.06, "grad_norm": 3.752571622913441, "learning_rate": 1.9959260268272876e-05, "loss": 1.0188, "step": 360 }, { "epoch": 0.06, "grad_norm": 2.936862632918637, "learning_rate": 1.9958788252202078e-05, "loss": 1.1367, "step": 361 }, { "epoch": 0.06, "grad_norm": 2.1273205372373614, "learning_rate": 1.9958313523082842e-05, "loss": 1.117, "step": 362 }, { "epoch": 0.06, "grad_norm": 3.0331614651739063, "learning_rate": 1.9957836081044498e-05, "loss": 1.0008, "step": 363 }, { "epoch": 0.06, "grad_norm": 2.539510927949283, "learning_rate": 1.995735592621712e-05, "loss": 1.0453, "step": 364 }, { "epoch": 0.06, "grad_norm": 2.8456298986267514, "learning_rate": 1.9956873058731514e-05, "loss": 1.1708, "step": 365 }, { "epoch": 0.06, "grad_norm": 3.614819272537413, "learning_rate": 1.9956387478719222e-05, "loss": 1.036, "step": 366 }, { "epoch": 0.06, "grad_norm": 4.482015061877032, "learning_rate": 1.9955899186312527e-05, "loss": 1.0395, "step": 367 }, { "epoch": 0.06, "grad_norm": 3.273292993528745, "learning_rate": 1.9955408181644464e-05, "loss": 1.099, "step": 368 }, { "epoch": 0.06, "grad_norm": 2.687597849639246, "learning_rate": 1.9954914464848787e-05, "loss": 1.038, "step": 369 }, { "epoch": 0.06, "grad_norm": 3.9104209909362675, "learning_rate": 1.995441803606e-05, "loss": 1.0359, "step": 370 }, { "epoch": 0.06, "grad_norm": 4.513088900109113, "learning_rate": 1.9953918895413346e-05, "loss": 1.0615, "step": 371 }, { "epoch": 0.06, "grad_norm": 3.0211946117925303, "learning_rate": 1.9953417043044806e-05, "loss": 1.0484, "step": 372 }, { "epoch": 0.06, "grad_norm": 3.566844232245026, "learning_rate": 1.9952912479091094e-05, "loss": 1.0291, "step": 373 }, { "epoch": 0.06, "grad_norm": 4.064727407326846, "learning_rate": 1.9952405203689668e-05, "loss": 1.0326, "step": 374 }, { "epoch": 0.06, "grad_norm": 4.9872927362116934, "learning_rate": 1.9951895216978725e-05, "loss": 1.0252, "step": 375 }, { "epoch": 0.06, "grad_norm": 3.060015390644139, "learning_rate": 1.9951382519097197e-05, "loss": 1.046, "step": 376 }, { "epoch": 0.06, "grad_norm": 3.2392476552919587, "learning_rate": 1.9950867110184765e-05, "loss": 1.0698, "step": 377 }, { "epoch": 0.06, "grad_norm": 2.63385143213254, "learning_rate": 1.995034899038183e-05, "loss": 1.0213, "step": 378 }, { "epoch": 0.06, "grad_norm": 2.9630124514032015, "learning_rate": 1.994982815982955e-05, "loss": 0.9705, "step": 379 }, { "epoch": 0.06, "grad_norm": 2.0925585644911338, "learning_rate": 1.994930461866981e-05, "loss": 1.0641, "step": 380 }, { "epoch": 0.06, "grad_norm": 2.7776268494187293, "learning_rate": 1.9948778367045235e-05, "loss": 0.9872, "step": 381 }, { "epoch": 0.06, "grad_norm": 2.7840933677648327, "learning_rate": 1.99482494050992e-05, "loss": 1.0306, "step": 382 }, { "epoch": 0.06, "grad_norm": 3.9643195947404597, "learning_rate": 1.9947717732975795e-05, "loss": 1.0725, "step": 383 }, { "epoch": 0.06, "grad_norm": 4.079111498783385, "learning_rate": 1.994718335081987e-05, "loss": 1.0535, "step": 384 }, { "epoch": 0.06, "grad_norm": 1.9597484737662323, "learning_rate": 1.9946646258777008e-05, "loss": 1.0306, "step": 385 }, { "epoch": 0.06, "grad_norm": 3.5208875813715976, "learning_rate": 1.994610645699352e-05, "loss": 0.9799, "step": 386 }, { "epoch": 0.06, "grad_norm": 2.533458264516243, "learning_rate": 1.994556394561647e-05, "loss": 0.9793, "step": 387 }, { "epoch": 0.06, "grad_norm": 4.097699073298636, "learning_rate": 1.994501872479365e-05, "loss": 1.0295, "step": 388 }, { "epoch": 0.06, "grad_norm": 4.855956810308253, "learning_rate": 1.9944470794673592e-05, "loss": 0.9777, "step": 389 }, { "epoch": 0.06, "grad_norm": 4.010849305963934, "learning_rate": 1.994392015540557e-05, "loss": 1.0109, "step": 390 }, { "epoch": 0.06, "grad_norm": 3.272736613183227, "learning_rate": 1.994336680713959e-05, "loss": 1.0862, "step": 391 }, { "epoch": 0.06, "grad_norm": 2.9448403706057835, "learning_rate": 1.9942810750026403e-05, "loss": 1.0175, "step": 392 }, { "epoch": 0.06, "grad_norm": 4.1041133251047945, "learning_rate": 1.9942251984217492e-05, "loss": 1.0088, "step": 393 }, { "epoch": 0.06, "grad_norm": 5.089226896859173, "learning_rate": 1.994169050986508e-05, "loss": 1.0467, "step": 394 }, { "epoch": 0.06, "grad_norm": 2.8035040353549023, "learning_rate": 1.9941126327122128e-05, "loss": 1.0038, "step": 395 }, { "epoch": 0.06, "grad_norm": 3.5327009072915327, "learning_rate": 1.9940559436142338e-05, "loss": 1.0498, "step": 396 }, { "epoch": 0.06, "grad_norm": 3.268523467695239, "learning_rate": 1.9939989837080143e-05, "loss": 0.9903, "step": 397 }, { "epoch": 0.06, "grad_norm": 3.152234383993884, "learning_rate": 1.9939417530090722e-05, "loss": 1.0475, "step": 398 }, { "epoch": 0.06, "grad_norm": 1.9653250557911925, "learning_rate": 1.993884251532998e-05, "loss": 1.0644, "step": 399 }, { "epoch": 0.06, "grad_norm": 2.57539519082927, "learning_rate": 1.9938264792954573e-05, "loss": 1.0546, "step": 400 }, { "epoch": 0.06, "grad_norm": 2.724674653395871, "learning_rate": 1.9937684363121886e-05, "loss": 0.9831, "step": 401 }, { "epoch": 0.06, "grad_norm": 2.0465567537961875, "learning_rate": 1.9937101225990046e-05, "loss": 1.0153, "step": 402 }, { "epoch": 0.06, "grad_norm": 5.176045099406858, "learning_rate": 1.993651538171791e-05, "loss": 1.064, "step": 403 }, { "epoch": 0.07, "grad_norm": 2.912793084980154, "learning_rate": 1.993592683046509e-05, "loss": 1.05, "step": 404 }, { "epoch": 0.07, "grad_norm": 3.5253827800293602, "learning_rate": 1.993533557239191e-05, "loss": 0.9856, "step": 405 }, { "epoch": 0.07, "grad_norm": 2.232105730020474, "learning_rate": 1.993474160765945e-05, "loss": 0.9621, "step": 406 }, { "epoch": 0.07, "grad_norm": 3.182971055618596, "learning_rate": 1.9934144936429526e-05, "loss": 1.0295, "step": 407 }, { "epoch": 0.07, "grad_norm": 1.4949481452445816, "learning_rate": 1.9933545558864686e-05, "loss": 0.338, "step": 408 }, { "epoch": 0.07, "grad_norm": 4.017124237547624, "learning_rate": 1.9932943475128215e-05, "loss": 0.9388, "step": 409 }, { "epoch": 0.07, "grad_norm": 2.7686574227245884, "learning_rate": 1.993233868538414e-05, "loss": 1.0336, "step": 410 }, { "epoch": 0.07, "grad_norm": 4.2163746115724, "learning_rate": 1.9931731189797216e-05, "loss": 1.006, "step": 411 }, { "epoch": 0.07, "grad_norm": 1.4347508721303275, "learning_rate": 1.993112098853295e-05, "loss": 1.0655, "step": 412 }, { "epoch": 0.07, "grad_norm": 4.498298747290252, "learning_rate": 1.9930508081757572e-05, "loss": 1.0191, "step": 413 }, { "epoch": 0.07, "grad_norm": 2.4471326397614197, "learning_rate": 1.9929892469638056e-05, "loss": 1.0888, "step": 414 }, { "epoch": 0.07, "grad_norm": 3.7646277915695223, "learning_rate": 1.9929274152342113e-05, "loss": 1.0169, "step": 415 }, { "epoch": 0.07, "grad_norm": 2.2949271733648944, "learning_rate": 1.9928653130038188e-05, "loss": 1.0706, "step": 416 }, { "epoch": 0.07, "grad_norm": 2.7611901021447047, "learning_rate": 1.9928029402895466e-05, "loss": 1.0495, "step": 417 }, { "epoch": 0.07, "grad_norm": 2.0638812670674644, "learning_rate": 1.992740297108387e-05, "loss": 0.3245, "step": 418 }, { "epoch": 0.07, "grad_norm": 3.58230959144686, "learning_rate": 1.992677383477405e-05, "loss": 1.0581, "step": 419 }, { "epoch": 0.07, "grad_norm": 2.378778764287219, "learning_rate": 1.9926141994137404e-05, "loss": 1.0348, "step": 420 }, { "epoch": 0.07, "grad_norm": 3.7062833170173293, "learning_rate": 1.9925507449346066e-05, "loss": 1.0495, "step": 421 }, { "epoch": 0.07, "grad_norm": 4.402779152298695, "learning_rate": 1.99248702005729e-05, "loss": 1.0662, "step": 422 }, { "epoch": 0.07, "grad_norm": 4.0936165125391755, "learning_rate": 1.9924230247991508e-05, "loss": 1.0842, "step": 423 }, { "epoch": 0.07, "grad_norm": 4.212984114994767, "learning_rate": 1.9923587591776236e-05, "loss": 1.0057, "step": 424 }, { "epoch": 0.07, "grad_norm": 2.3155022798418665, "learning_rate": 1.992294223210216e-05, "loss": 0.9849, "step": 425 }, { "epoch": 0.07, "grad_norm": 2.966761354656933, "learning_rate": 1.9922294169145088e-05, "loss": 1.0287, "step": 426 }, { "epoch": 0.07, "grad_norm": 1.7730459041386897, "learning_rate": 1.992164340308158e-05, "loss": 1.0671, "step": 427 }, { "epoch": 0.07, "grad_norm": 2.4906877650988073, "learning_rate": 1.9920989934088914e-05, "loss": 0.9781, "step": 428 }, { "epoch": 0.07, "grad_norm": 4.121714352708381, "learning_rate": 1.9920333762345116e-05, "loss": 0.9305, "step": 429 }, { "epoch": 0.07, "grad_norm": 2.330317444194364, "learning_rate": 1.9919674888028946e-05, "loss": 1.0166, "step": 430 }, { "epoch": 0.07, "grad_norm": 2.5911876477009814, "learning_rate": 1.99190133113199e-05, "loss": 1.0899, "step": 431 }, { "epoch": 0.07, "grad_norm": 3.62402580588039, "learning_rate": 1.991834903239821e-05, "loss": 0.9891, "step": 432 }, { "epoch": 0.07, "grad_norm": 3.2672668418207933, "learning_rate": 1.991768205144484e-05, "loss": 1.0552, "step": 433 }, { "epoch": 0.07, "grad_norm": 2.66764183890039, "learning_rate": 1.9917012368641497e-05, "loss": 0.9861, "step": 434 }, { "epoch": 0.07, "grad_norm": 4.118091230718606, "learning_rate": 1.991633998417062e-05, "loss": 1.0077, "step": 435 }, { "epoch": 0.07, "grad_norm": 2.486165487119814, "learning_rate": 1.991566489821539e-05, "loss": 0.3261, "step": 436 }, { "epoch": 0.07, "grad_norm": 4.328790606606507, "learning_rate": 1.9914987110959713e-05, "loss": 0.9804, "step": 437 }, { "epoch": 0.07, "grad_norm": 4.445346610905483, "learning_rate": 1.9914306622588237e-05, "loss": 1.0262, "step": 438 }, { "epoch": 0.07, "grad_norm": 3.735067263486503, "learning_rate": 1.9913623433286346e-05, "loss": 1.0832, "step": 439 }, { "epoch": 0.07, "grad_norm": 1.8048337406504735, "learning_rate": 1.9912937543240164e-05, "loss": 0.3319, "step": 440 }, { "epoch": 0.07, "grad_norm": 3.24350691455982, "learning_rate": 1.9912248952636543e-05, "loss": 1.0434, "step": 441 }, { "epoch": 0.07, "grad_norm": 4.496778818762721, "learning_rate": 1.9911557661663073e-05, "loss": 1.0062, "step": 442 }, { "epoch": 0.07, "grad_norm": 2.204911610049229, "learning_rate": 1.9910863670508088e-05, "loss": 1.1262, "step": 443 }, { "epoch": 0.07, "grad_norm": 3.2299522346767695, "learning_rate": 1.991016697936064e-05, "loss": 1.0487, "step": 444 }, { "epoch": 0.07, "grad_norm": 4.029265395765909, "learning_rate": 1.990946758841053e-05, "loss": 1.0111, "step": 445 }, { "epoch": 0.07, "grad_norm": 2.0427363830642906, "learning_rate": 1.9908765497848296e-05, "loss": 0.9231, "step": 446 }, { "epoch": 0.07, "grad_norm": 4.8396279792625725, "learning_rate": 1.99080607078652e-05, "loss": 1.0711, "step": 447 }, { "epoch": 0.07, "grad_norm": 3.9113403112872556, "learning_rate": 1.9907353218653254e-05, "loss": 1.0374, "step": 448 }, { "epoch": 0.07, "grad_norm": 3.725555347802007, "learning_rate": 1.9906643030405194e-05, "loss": 1.0418, "step": 449 }, { "epoch": 0.07, "grad_norm": 2.156081151570929, "learning_rate": 1.990593014331449e-05, "loss": 1.0904, "step": 450 }, { "epoch": 0.07, "grad_norm": 4.398372453171333, "learning_rate": 1.9905214557575357e-05, "loss": 1.0527, "step": 451 }, { "epoch": 0.07, "grad_norm": 3.9744988803360455, "learning_rate": 1.990449627338274e-05, "loss": 0.9587, "step": 452 }, { "epoch": 0.07, "grad_norm": 3.014395033081482, "learning_rate": 1.990377529093232e-05, "loss": 1.0371, "step": 453 }, { "epoch": 0.07, "grad_norm": 3.6623648348398716, "learning_rate": 1.9903051610420513e-05, "loss": 1.0348, "step": 454 }, { "epoch": 0.07, "grad_norm": 3.353845347934053, "learning_rate": 1.990232523204447e-05, "loss": 1.0683, "step": 455 }, { "epoch": 0.07, "grad_norm": 3.1535725331732682, "learning_rate": 1.9901596156002068e-05, "loss": 1.0542, "step": 456 }, { "epoch": 0.07, "grad_norm": 3.110982508120629, "learning_rate": 1.9900864382491936e-05, "loss": 1.0992, "step": 457 }, { "epoch": 0.07, "grad_norm": 3.350056343174157, "learning_rate": 1.9900129911713432e-05, "loss": 1.0037, "step": 458 }, { "epoch": 0.07, "grad_norm": 3.2380677303513545, "learning_rate": 1.9899392743866638e-05, "loss": 1.0468, "step": 459 }, { "epoch": 0.07, "grad_norm": 2.8899162454579406, "learning_rate": 1.989865287915238e-05, "loss": 1.0219, "step": 460 }, { "epoch": 0.07, "grad_norm": 3.5343800403886196, "learning_rate": 1.9897910317772225e-05, "loss": 0.9964, "step": 461 }, { "epoch": 0.07, "grad_norm": 2.6976153722207417, "learning_rate": 1.989716505992846e-05, "loss": 1.0125, "step": 462 }, { "epoch": 0.07, "grad_norm": 3.386098063437449, "learning_rate": 1.9896417105824113e-05, "loss": 0.9987, "step": 463 }, { "epoch": 0.07, "grad_norm": 2.500282653088829, "learning_rate": 1.9895666455662953e-05, "loss": 1.0021, "step": 464 }, { "epoch": 0.07, "grad_norm": 2.125968936596337, "learning_rate": 1.9894913109649473e-05, "loss": 1.011, "step": 465 }, { "epoch": 0.08, "grad_norm": 4.340896876961158, "learning_rate": 1.9894157067988908e-05, "loss": 0.9719, "step": 466 }, { "epoch": 0.08, "grad_norm": 3.297011964673868, "learning_rate": 1.989339833088722e-05, "loss": 1.0313, "step": 467 }, { "epoch": 0.08, "grad_norm": 3.5324634728834576, "learning_rate": 1.989263689855112e-05, "loss": 1.0035, "step": 468 }, { "epoch": 0.08, "grad_norm": 2.259044271615358, "learning_rate": 1.9891872771188033e-05, "loss": 0.9243, "step": 469 }, { "epoch": 0.08, "grad_norm": 2.1116074792786206, "learning_rate": 1.989110594900613e-05, "loss": 1.1036, "step": 470 }, { "epoch": 0.08, "grad_norm": 2.2157704939102674, "learning_rate": 1.989033643221432e-05, "loss": 1.0843, "step": 471 }, { "epoch": 0.08, "grad_norm": 3.881955457082985, "learning_rate": 1.9889564221022238e-05, "loss": 0.9621, "step": 472 }, { "epoch": 0.08, "grad_norm": 3.5050876968883986, "learning_rate": 1.9888789315640253e-05, "loss": 1.0188, "step": 473 }, { "epoch": 0.08, "grad_norm": 3.5070220804381407, "learning_rate": 1.9888011716279473e-05, "loss": 0.972, "step": 474 }, { "epoch": 0.08, "grad_norm": 1.4395646733989813, "learning_rate": 1.9887231423151734e-05, "loss": 0.9938, "step": 475 }, { "epoch": 0.08, "grad_norm": 2.458247382606518, "learning_rate": 1.9886448436469618e-05, "loss": 0.9817, "step": 476 }, { "epoch": 0.08, "grad_norm": 2.3243305596136716, "learning_rate": 1.988566275644642e-05, "loss": 0.3161, "step": 477 }, { "epoch": 0.08, "grad_norm": 2.9756263201846243, "learning_rate": 1.988487438329619e-05, "loss": 1.0069, "step": 478 }, { "epoch": 0.08, "grad_norm": 4.165906658795288, "learning_rate": 1.98840833172337e-05, "loss": 0.9468, "step": 479 }, { "epoch": 0.08, "grad_norm": 3.5842030970384915, "learning_rate": 1.988328955847446e-05, "loss": 0.9585, "step": 480 }, { "epoch": 0.08, "grad_norm": 4.925454621221071, "learning_rate": 1.9882493107234706e-05, "loss": 0.9145, "step": 481 }, { "epoch": 0.08, "grad_norm": 2.604091571996699, "learning_rate": 1.9881693963731418e-05, "loss": 1.0323, "step": 482 }, { "epoch": 0.08, "grad_norm": 2.3394926950952537, "learning_rate": 1.9880892128182302e-05, "loss": 1.0482, "step": 483 }, { "epoch": 0.08, "grad_norm": 2.5649503761830355, "learning_rate": 1.9880087600805807e-05, "loss": 1.0269, "step": 484 }, { "epoch": 0.08, "grad_norm": 3.1939274927077346, "learning_rate": 1.9879280381821104e-05, "loss": 1.0035, "step": 485 }, { "epoch": 0.08, "grad_norm": 1.8635552482756863, "learning_rate": 1.9878470471448094e-05, "loss": 1.0567, "step": 486 }, { "epoch": 0.08, "grad_norm": 4.036442923567695, "learning_rate": 1.987765786990743e-05, "loss": 1.0273, "step": 487 }, { "epoch": 0.08, "grad_norm": 3.1092409651348127, "learning_rate": 1.9876842577420484e-05, "loss": 1.1017, "step": 488 }, { "epoch": 0.08, "grad_norm": 2.450716914170647, "learning_rate": 1.987602459420936e-05, "loss": 1.0268, "step": 489 }, { "epoch": 0.08, "grad_norm": 1.875063322905332, "learning_rate": 1.9875203920496905e-05, "loss": 0.3274, "step": 490 }, { "epoch": 0.08, "grad_norm": 3.805918406306986, "learning_rate": 1.987438055650669e-05, "loss": 1.04, "step": 491 }, { "epoch": 0.08, "grad_norm": 1.4274406913822866, "learning_rate": 1.987355450246302e-05, "loss": 1.0726, "step": 492 }, { "epoch": 0.08, "grad_norm": 3.2332670368856826, "learning_rate": 1.9872725758590943e-05, "loss": 1.0297, "step": 493 }, { "epoch": 0.08, "grad_norm": 2.6316884525002098, "learning_rate": 1.987189432511622e-05, "loss": 1.0167, "step": 494 }, { "epoch": 0.08, "grad_norm": 3.071765817776123, "learning_rate": 1.9871060202265367e-05, "loss": 1.0219, "step": 495 }, { "epoch": 0.08, "grad_norm": 4.524109574695076, "learning_rate": 1.9870223390265614e-05, "loss": 1.042, "step": 496 }, { "epoch": 0.08, "grad_norm": 2.61317012533491, "learning_rate": 1.9869383889344937e-05, "loss": 1.0753, "step": 497 }, { "epoch": 0.08, "grad_norm": 3.111772236135698, "learning_rate": 1.9868541699732037e-05, "loss": 0.9779, "step": 498 }, { "epoch": 0.08, "grad_norm": 4.33654714886353, "learning_rate": 1.986769682165635e-05, "loss": 1.0074, "step": 499 }, { "epoch": 0.08, "grad_norm": 2.1096421814295883, "learning_rate": 1.9866849255348045e-05, "loss": 1.1031, "step": 500 }, { "epoch": 0.08, "grad_norm": 3.3723246955978396, "learning_rate": 1.9865999001038022e-05, "loss": 0.9789, "step": 501 }, { "epoch": 0.08, "grad_norm": 2.9335745697816726, "learning_rate": 1.986514605895791e-05, "loss": 1.1027, "step": 502 }, { "epoch": 0.08, "grad_norm": 3.3751073749557166, "learning_rate": 1.9864290429340084e-05, "loss": 1.0391, "step": 503 }, { "epoch": 0.08, "grad_norm": 2.514665313960359, "learning_rate": 1.9863432112417628e-05, "loss": 1.0072, "step": 504 }, { "epoch": 0.08, "grad_norm": 2.6550686172555324, "learning_rate": 1.9862571108424377e-05, "loss": 0.9792, "step": 505 }, { "epoch": 0.08, "grad_norm": 3.503470063038213, "learning_rate": 1.9861707417594896e-05, "loss": 1.0509, "step": 506 }, { "epoch": 0.08, "grad_norm": 4.319758181338309, "learning_rate": 1.9860841040164476e-05, "loss": 1.0285, "step": 507 }, { "epoch": 0.08, "grad_norm": 3.7549747530193844, "learning_rate": 1.9859971976369136e-05, "loss": 1.0475, "step": 508 }, { "epoch": 0.08, "grad_norm": 3.626182100330299, "learning_rate": 1.9859100226445643e-05, "loss": 1.1195, "step": 509 }, { "epoch": 0.08, "grad_norm": 3.887616568531848, "learning_rate": 1.9858225790631477e-05, "loss": 1.0285, "step": 510 }, { "epoch": 0.08, "grad_norm": 3.8077634561704294, "learning_rate": 1.9857348669164863e-05, "loss": 1.1182, "step": 511 }, { "epoch": 0.08, "grad_norm": 3.263375853851495, "learning_rate": 1.9856468862284752e-05, "loss": 1.0018, "step": 512 }, { "epoch": 0.08, "grad_norm": 3.513636319042584, "learning_rate": 1.9855586370230832e-05, "loss": 1.0246, "step": 513 }, { "epoch": 0.08, "grad_norm": 2.2758197690805595, "learning_rate": 1.9854701193243507e-05, "loss": 0.3337, "step": 514 }, { "epoch": 0.08, "grad_norm": 3.6322685244608586, "learning_rate": 1.9853813331563934e-05, "loss": 1.054, "step": 515 }, { "epoch": 0.08, "grad_norm": 5.047966900978454, "learning_rate": 1.9852922785433985e-05, "loss": 1.055, "step": 516 }, { "epoch": 0.08, "grad_norm": 4.851739999961012, "learning_rate": 1.9852029555096278e-05, "loss": 1.0366, "step": 517 }, { "epoch": 0.08, "grad_norm": 4.368917542220573, "learning_rate": 1.985113364079414e-05, "loss": 1.0253, "step": 518 }, { "epoch": 0.08, "grad_norm": 3.5096892707853526, "learning_rate": 1.9850235042771655e-05, "loss": 0.9856, "step": 519 }, { "epoch": 0.08, "grad_norm": 3.7430876284428156, "learning_rate": 1.984933376127362e-05, "loss": 1.0267, "step": 520 }, { "epoch": 0.08, "grad_norm": 1.764416711620266, "learning_rate": 1.9848429796545566e-05, "loss": 0.3285, "step": 521 }, { "epoch": 0.08, "grad_norm": 2.8362218120547786, "learning_rate": 1.9847523148833767e-05, "loss": 1.0888, "step": 522 }, { "epoch": 0.08, "grad_norm": 2.9649277023069596, "learning_rate": 1.9846613818385215e-05, "loss": 1.013, "step": 523 }, { "epoch": 0.08, "grad_norm": 1.0717951397298189, "learning_rate": 1.984570180544763e-05, "loss": 0.3353, "step": 524 }, { "epoch": 0.08, "grad_norm": 3.735762079157712, "learning_rate": 1.9844787110269478e-05, "loss": 0.9621, "step": 525 }, { "epoch": 0.08, "grad_norm": 3.430842802417491, "learning_rate": 1.984386973309994e-05, "loss": 1.021, "step": 526 }, { "epoch": 0.08, "grad_norm": 3.260260597531792, "learning_rate": 1.9842949674188946e-05, "loss": 1.0383, "step": 527 }, { "epoch": 0.09, "grad_norm": 3.78371557133729, "learning_rate": 1.9842026933787134e-05, "loss": 1.031, "step": 528 }, { "epoch": 0.09, "grad_norm": 3.8129519835659473, "learning_rate": 1.984110151214589e-05, "loss": 0.9729, "step": 529 }, { "epoch": 0.09, "grad_norm": 3.317057515282541, "learning_rate": 1.984017340951732e-05, "loss": 1.02, "step": 530 }, { "epoch": 0.09, "grad_norm": 2.364968547168325, "learning_rate": 1.983924262615427e-05, "loss": 1.0267, "step": 531 }, { "epoch": 0.09, "grad_norm": 2.828992004696068, "learning_rate": 1.9838309162310304e-05, "loss": 0.9811, "step": 532 }, { "epoch": 0.09, "grad_norm": 4.414532391678944, "learning_rate": 1.9837373018239733e-05, "loss": 1.0459, "step": 533 }, { "epoch": 0.09, "grad_norm": 3.5308066570854937, "learning_rate": 1.983643419419758e-05, "loss": 1.0369, "step": 534 }, { "epoch": 0.09, "grad_norm": 3.85796487970024, "learning_rate": 1.983549269043961e-05, "loss": 1.0848, "step": 535 }, { "epoch": 0.09, "grad_norm": 2.3724229785246944, "learning_rate": 1.9834548507222312e-05, "loss": 1.0053, "step": 536 }, { "epoch": 0.09, "grad_norm": 3.968099826210768, "learning_rate": 1.9833601644802915e-05, "loss": 1.0106, "step": 537 }, { "epoch": 0.09, "grad_norm": 4.619152806207511, "learning_rate": 1.983265210343936e-05, "loss": 1.0444, "step": 538 }, { "epoch": 0.09, "grad_norm": 4.618536685875128, "learning_rate": 1.9831699883390335e-05, "loss": 0.9533, "step": 539 }, { "epoch": 0.09, "grad_norm": 2.329036047951747, "learning_rate": 1.9830744984915247e-05, "loss": 0.9608, "step": 540 }, { "epoch": 0.09, "grad_norm": 3.7266391450369563, "learning_rate": 1.9829787408274247e-05, "loss": 0.9551, "step": 541 }, { "epoch": 0.09, "grad_norm": 4.7515027530103815, "learning_rate": 1.982882715372819e-05, "loss": 0.9852, "step": 542 }, { "epoch": 0.09, "grad_norm": 4.054593888159502, "learning_rate": 1.9827864221538684e-05, "loss": 1.0557, "step": 543 }, { "epoch": 0.09, "grad_norm": 3.9764611731549557, "learning_rate": 1.982689861196806e-05, "loss": 0.9803, "step": 544 }, { "epoch": 0.09, "grad_norm": 2.240836982861213, "learning_rate": 1.9825930325279373e-05, "loss": 1.0006, "step": 545 }, { "epoch": 0.09, "grad_norm": 1.9685328530342951, "learning_rate": 1.982495936173641e-05, "loss": 1.0642, "step": 546 }, { "epoch": 0.09, "grad_norm": 4.1545183977286015, "learning_rate": 1.9823985721603693e-05, "loss": 0.9714, "step": 547 }, { "epoch": 0.09, "grad_norm": 2.0211390103528064, "learning_rate": 1.9823009405146465e-05, "loss": 1.0666, "step": 548 }, { "epoch": 0.09, "grad_norm": 3.252385217868221, "learning_rate": 1.98220304126307e-05, "loss": 0.9133, "step": 549 }, { "epoch": 0.09, "grad_norm": 4.2490682141472575, "learning_rate": 1.9821048744323108e-05, "loss": 0.9718, "step": 550 }, { "epoch": 0.09, "grad_norm": 2.474161903973787, "learning_rate": 1.9820064400491118e-05, "loss": 1.0101, "step": 551 }, { "epoch": 0.09, "grad_norm": 2.3480138014496625, "learning_rate": 1.9819077381402895e-05, "loss": 1.0692, "step": 552 }, { "epoch": 0.09, "grad_norm": 2.66359404531175, "learning_rate": 1.9818087687327328e-05, "loss": 1.0673, "step": 553 }, { "epoch": 0.09, "grad_norm": 3.883813986004075, "learning_rate": 1.9817095318534038e-05, "loss": 0.9784, "step": 554 }, { "epoch": 0.09, "grad_norm": 3.07236154143424, "learning_rate": 1.981610027529337e-05, "loss": 1.0592, "step": 555 }, { "epoch": 0.09, "grad_norm": 3.3475495614107156, "learning_rate": 1.9815102557876406e-05, "loss": 0.9855, "step": 556 }, { "epoch": 0.09, "grad_norm": 1.5149529923803955, "learning_rate": 1.9814102166554954e-05, "loss": 0.9914, "step": 557 }, { "epoch": 0.09, "grad_norm": 3.686952323975728, "learning_rate": 1.981309910160154e-05, "loss": 1.0194, "step": 558 }, { "epoch": 0.09, "grad_norm": 3.180036895825814, "learning_rate": 1.9812093363289433e-05, "loss": 1.0496, "step": 559 }, { "epoch": 0.09, "grad_norm": 3.472622196472515, "learning_rate": 1.9811084951892625e-05, "loss": 0.9457, "step": 560 }, { "epoch": 0.09, "grad_norm": 2.44520154219856, "learning_rate": 1.9810073867685828e-05, "loss": 0.9884, "step": 561 }, { "epoch": 0.09, "grad_norm": 4.7834503496236325, "learning_rate": 1.980906011094449e-05, "loss": 1.0815, "step": 562 }, { "epoch": 0.09, "grad_norm": 3.917598870447178, "learning_rate": 1.9808043681944794e-05, "loss": 1.0741, "step": 563 }, { "epoch": 0.09, "grad_norm": 2.4966994432743377, "learning_rate": 1.980702458096364e-05, "loss": 1.0927, "step": 564 }, { "epoch": 0.09, "grad_norm": 2.2658261966160342, "learning_rate": 1.9806002808278658e-05, "loss": 1.0155, "step": 565 }, { "epoch": 0.09, "grad_norm": 2.6798759371951015, "learning_rate": 1.9804978364168203e-05, "loss": 1.1184, "step": 566 }, { "epoch": 0.09, "grad_norm": 2.892844837572949, "learning_rate": 1.980395124891137e-05, "loss": 1.0381, "step": 567 }, { "epoch": 0.09, "grad_norm": 2.9345184778595983, "learning_rate": 1.9802921462787968e-05, "loss": 0.9786, "step": 568 }, { "epoch": 0.09, "grad_norm": 3.2482201030549986, "learning_rate": 1.980188900607854e-05, "loss": 1.0387, "step": 569 }, { "epoch": 0.09, "grad_norm": 4.15271235612578, "learning_rate": 1.9800853879064356e-05, "loss": 0.9617, "step": 570 }, { "epoch": 0.09, "grad_norm": 3.907881983307452, "learning_rate": 1.9799816082027413e-05, "loss": 1.0616, "step": 571 }, { "epoch": 0.09, "grad_norm": 3.138178389565566, "learning_rate": 1.9798775615250434e-05, "loss": 1.0079, "step": 572 }, { "epoch": 0.09, "grad_norm": 4.87616129028081, "learning_rate": 1.9797732479016874e-05, "loss": 1.0676, "step": 573 }, { "epoch": 0.09, "grad_norm": 3.9252997411486077, "learning_rate": 1.979668667361091e-05, "loss": 1.0632, "step": 574 }, { "epoch": 0.09, "grad_norm": 4.039907695357868, "learning_rate": 1.9795638199317452e-05, "loss": 0.9244, "step": 575 }, { "epoch": 0.09, "grad_norm": 2.71027535478949, "learning_rate": 1.9794587056422125e-05, "loss": 0.9769, "step": 576 }, { "epoch": 0.09, "grad_norm": 2.6293917656815773, "learning_rate": 1.9793533245211298e-05, "loss": 0.9748, "step": 577 }, { "epoch": 0.09, "grad_norm": 2.5755099819303178, "learning_rate": 1.9792476765972055e-05, "loss": 1.0745, "step": 578 }, { "epoch": 0.09, "grad_norm": 1.5870908855813706, "learning_rate": 1.979141761899221e-05, "loss": 0.9851, "step": 579 }, { "epoch": 0.09, "grad_norm": 2.0686997375836262, "learning_rate": 1.9790355804560303e-05, "loss": 0.3327, "step": 580 }, { "epoch": 0.09, "grad_norm": 2.918151604618345, "learning_rate": 1.97892913229656e-05, "loss": 1.0479, "step": 581 }, { "epoch": 0.09, "grad_norm": 2.300113185917116, "learning_rate": 1.9788224174498098e-05, "loss": 1.0692, "step": 582 }, { "epoch": 0.09, "grad_norm": 3.123420010737679, "learning_rate": 1.9787154359448518e-05, "loss": 0.9057, "step": 583 }, { "epoch": 0.09, "grad_norm": 2.6380583800304676, "learning_rate": 1.9786081878108304e-05, "loss": 0.9845, "step": 584 }, { "epoch": 0.09, "grad_norm": 4.19525112341626, "learning_rate": 1.9785006730769636e-05, "loss": 0.9932, "step": 585 }, { "epoch": 0.09, "grad_norm": 4.781860910917663, "learning_rate": 1.9783928917725404e-05, "loss": 1.0513, "step": 586 }, { "epoch": 0.09, "grad_norm": 3.8754836980353553, "learning_rate": 1.9782848439269244e-05, "loss": 0.9593, "step": 587 }, { "epoch": 0.09, "grad_norm": 4.879897567424411, "learning_rate": 1.97817652956955e-05, "loss": 0.9566, "step": 588 }, { "epoch": 0.09, "grad_norm": 1.3461868900393505, "learning_rate": 1.9780679487299255e-05, "loss": 0.3318, "step": 589 }, { "epoch": 0.1, "grad_norm": 4.3237747750147335, "learning_rate": 1.9779591014376312e-05, "loss": 1.0253, "step": 590 }, { "epoch": 0.1, "grad_norm": 3.6931533150806786, "learning_rate": 1.9778499877223198e-05, "loss": 1.0097, "step": 591 }, { "epoch": 0.1, "grad_norm": 1.519111320309085, "learning_rate": 1.9777406076137174e-05, "loss": 0.3055, "step": 592 }, { "epoch": 0.1, "grad_norm": 2.2490447453737143, "learning_rate": 1.977630961141622e-05, "loss": 0.9604, "step": 593 }, { "epoch": 0.1, "grad_norm": 3.516528746686074, "learning_rate": 1.9775210483359037e-05, "loss": 1.0242, "step": 594 }, { "epoch": 0.1, "grad_norm": 2.453536175327635, "learning_rate": 1.977410869226507e-05, "loss": 1.0515, "step": 595 }, { "epoch": 0.1, "grad_norm": 2.981722626719808, "learning_rate": 1.9773004238434465e-05, "loss": 0.9631, "step": 596 }, { "epoch": 0.1, "grad_norm": 3.986164842893997, "learning_rate": 1.9771897122168112e-05, "loss": 0.9879, "step": 597 }, { "epoch": 0.1, "grad_norm": 3.3357448120214075, "learning_rate": 1.9770787343767622e-05, "loss": 0.9522, "step": 598 }, { "epoch": 0.1, "grad_norm": 3.373049136869537, "learning_rate": 1.9769674903535324e-05, "loss": 1.0227, "step": 599 }, { "epoch": 0.1, "grad_norm": 2.744795816745311, "learning_rate": 1.976855980177428e-05, "loss": 1.0083, "step": 600 }, { "epoch": 0.1, "grad_norm": 2.39619166693729, "learning_rate": 1.9767442038788273e-05, "loss": 0.3348, "step": 601 }, { "epoch": 0.1, "grad_norm": 3.34781164745618, "learning_rate": 1.9766321614881814e-05, "loss": 0.9609, "step": 602 }, { "epoch": 0.1, "grad_norm": 4.361878691317933, "learning_rate": 1.976519853036014e-05, "loss": 0.9818, "step": 603 }, { "epoch": 0.1, "grad_norm": 4.294797082799358, "learning_rate": 1.9764072785529203e-05, "loss": 1.0181, "step": 604 }, { "epoch": 0.1, "grad_norm": 3.924970765217558, "learning_rate": 1.9762944380695692e-05, "loss": 1.0581, "step": 605 }, { "epoch": 0.1, "grad_norm": 3.5819047845212157, "learning_rate": 1.9761813316167014e-05, "loss": 1.0301, "step": 606 }, { "epoch": 0.1, "grad_norm": 3.2595783817553214, "learning_rate": 1.9760679592251306e-05, "loss": 0.9817, "step": 607 }, { "epoch": 0.1, "grad_norm": 4.293096705326944, "learning_rate": 1.975954320925742e-05, "loss": 0.9711, "step": 608 }, { "epoch": 0.1, "grad_norm": 3.842679378613799, "learning_rate": 1.975840416749494e-05, "loss": 1.0433, "step": 609 }, { "epoch": 0.1, "grad_norm": 4.235933474514525, "learning_rate": 1.9757262467274173e-05, "loss": 0.9669, "step": 610 }, { "epoch": 0.1, "grad_norm": 3.06477084922778, "learning_rate": 1.975611810890615e-05, "loss": 0.904, "step": 611 }, { "epoch": 0.1, "grad_norm": 3.454972545332519, "learning_rate": 1.9754971092702623e-05, "loss": 0.9686, "step": 612 }, { "epoch": 0.1, "grad_norm": 4.4187311110032095, "learning_rate": 1.9753821418976077e-05, "loss": 0.9738, "step": 613 }, { "epoch": 0.1, "grad_norm": 4.0146967783592284, "learning_rate": 1.975266908803971e-05, "loss": 1.0148, "step": 614 }, { "epoch": 0.1, "grad_norm": 3.718278614589403, "learning_rate": 1.9751514100207444e-05, "loss": 1.0276, "step": 615 }, { "epoch": 0.1, "grad_norm": 2.796086887586752, "learning_rate": 1.975035645579394e-05, "loss": 0.9808, "step": 616 }, { "epoch": 0.1, "grad_norm": 3.7055681756176595, "learning_rate": 1.9749196155114568e-05, "loss": 0.9694, "step": 617 }, { "epoch": 0.1, "grad_norm": 2.640106330810992, "learning_rate": 1.9748033198485422e-05, "loss": 0.9296, "step": 618 }, { "epoch": 0.1, "grad_norm": 4.400199469466555, "learning_rate": 1.974686758622333e-05, "loss": 0.9132, "step": 619 }, { "epoch": 0.1, "grad_norm": 3.7053095172703743, "learning_rate": 1.9745699318645833e-05, "loss": 0.9624, "step": 620 }, { "epoch": 0.1, "grad_norm": 2.389024784845779, "learning_rate": 1.9744528396071197e-05, "loss": 1.0259, "step": 621 }, { "epoch": 0.1, "grad_norm": 1.9501344903392603, "learning_rate": 1.9743354818818418e-05, "loss": 1.1089, "step": 622 }, { "epoch": 0.1, "grad_norm": 3.2187631847518947, "learning_rate": 1.974217858720721e-05, "loss": 1.0336, "step": 623 }, { "epoch": 0.1, "grad_norm": 4.303239693148576, "learning_rate": 1.9740999701558007e-05, "loss": 0.9864, "step": 624 }, { "epoch": 0.1, "grad_norm": 2.966728084085622, "learning_rate": 1.9739818162191976e-05, "loss": 0.9627, "step": 625 }, { "epoch": 0.1, "grad_norm": 4.132533274918733, "learning_rate": 1.9738633969431e-05, "loss": 0.9843, "step": 626 }, { "epoch": 0.1, "grad_norm": 3.4969190933952063, "learning_rate": 1.973744712359768e-05, "loss": 1.049, "step": 627 }, { "epoch": 0.1, "grad_norm": 4.136297303543446, "learning_rate": 1.973625762501535e-05, "loss": 1.0062, "step": 628 }, { "epoch": 0.1, "grad_norm": 3.017329233515987, "learning_rate": 1.973506547400806e-05, "loss": 0.8748, "step": 629 }, { "epoch": 0.1, "grad_norm": 4.0753880228293715, "learning_rate": 1.9733870670900586e-05, "loss": 0.998, "step": 630 }, { "epoch": 0.1, "grad_norm": 5.636442456575263, "learning_rate": 1.973267321601843e-05, "loss": 0.9217, "step": 631 }, { "epoch": 0.1, "grad_norm": 3.4844796887737846, "learning_rate": 1.9731473109687807e-05, "loss": 1.0595, "step": 632 }, { "epoch": 0.1, "grad_norm": 2.6564998733575047, "learning_rate": 1.9730270352235657e-05, "loss": 1.0413, "step": 633 }, { "epoch": 0.1, "grad_norm": 3.8173901823045386, "learning_rate": 1.9729064943989646e-05, "loss": 0.9594, "step": 634 }, { "epoch": 0.1, "grad_norm": 2.825360796684064, "learning_rate": 1.972785688527816e-05, "loss": 0.9994, "step": 635 }, { "epoch": 0.1, "grad_norm": 2.016572478225497, "learning_rate": 1.972664617643031e-05, "loss": 1.0072, "step": 636 }, { "epoch": 0.1, "grad_norm": 1.7260004570077443, "learning_rate": 1.9725432817775925e-05, "loss": 0.9884, "step": 637 }, { "epoch": 0.1, "grad_norm": 3.165457260342212, "learning_rate": 1.9724216809645557e-05, "loss": 0.9536, "step": 638 }, { "epoch": 0.1, "grad_norm": 2.9118856936436703, "learning_rate": 1.9722998152370482e-05, "loss": 0.9698, "step": 639 }, { "epoch": 0.1, "grad_norm": 3.5128552185941566, "learning_rate": 1.9721776846282692e-05, "loss": 1.0123, "step": 640 }, { "epoch": 0.1, "grad_norm": 3.394710066357854, "learning_rate": 1.9720552891714912e-05, "loss": 1.0375, "step": 641 }, { "epoch": 0.1, "grad_norm": 2.4314197118212557, "learning_rate": 1.9719326289000568e-05, "loss": 1.0517, "step": 642 }, { "epoch": 0.1, "grad_norm": 4.751531253750847, "learning_rate": 1.971809703847383e-05, "loss": 1.0058, "step": 643 }, { "epoch": 0.1, "grad_norm": 3.855126888497448, "learning_rate": 1.971686514046958e-05, "loss": 0.9718, "step": 644 }, { "epoch": 0.1, "grad_norm": 4.400393659147866, "learning_rate": 1.971563059532342e-05, "loss": 1.0216, "step": 645 }, { "epoch": 0.1, "grad_norm": 2.4686115081185083, "learning_rate": 1.971439340337167e-05, "loss": 1.0665, "step": 646 }, { "epoch": 0.1, "grad_norm": 2.7220248963943323, "learning_rate": 1.971315356495138e-05, "loss": 0.9875, "step": 647 }, { "epoch": 0.1, "grad_norm": 3.7954533431082926, "learning_rate": 1.9711911080400313e-05, "loss": 1.0292, "step": 648 }, { "epoch": 0.1, "grad_norm": 2.7881244182436418, "learning_rate": 1.971066595005696e-05, "loss": 1.0685, "step": 649 }, { "epoch": 0.1, "grad_norm": 1.9149245208924899, "learning_rate": 1.9709418174260523e-05, "loss": 1.0589, "step": 650 }, { "epoch": 0.1, "grad_norm": 2.996996965999296, "learning_rate": 1.9708167753350932e-05, "loss": 0.9789, "step": 651 }, { "epoch": 0.11, "grad_norm": 2.765900075562533, "learning_rate": 1.9706914687668842e-05, "loss": 1.0684, "step": 652 }, { "epoch": 0.11, "grad_norm": 2.4161215090543133, "learning_rate": 1.9705658977555617e-05, "loss": 0.2885, "step": 653 }, { "epoch": 0.11, "grad_norm": 2.1437450976079377, "learning_rate": 1.970440062335335e-05, "loss": 1.0169, "step": 654 }, { "epoch": 0.11, "grad_norm": 2.719012894471134, "learning_rate": 1.9703139625404847e-05, "loss": 0.9901, "step": 655 }, { "epoch": 0.11, "grad_norm": 3.1737584126959324, "learning_rate": 1.9701875984053642e-05, "loss": 0.9434, "step": 656 }, { "epoch": 0.11, "grad_norm": 5.032916817683915, "learning_rate": 1.9700609699643984e-05, "loss": 1.0722, "step": 657 }, { "epoch": 0.11, "grad_norm": 2.2293476019422465, "learning_rate": 1.9699340772520847e-05, "loss": 1.032, "step": 658 }, { "epoch": 0.11, "grad_norm": 3.14512456189364, "learning_rate": 1.969806920302992e-05, "loss": 1.0259, "step": 659 }, { "epoch": 0.11, "grad_norm": 4.433356486286329, "learning_rate": 1.9696794991517613e-05, "loss": 0.9567, "step": 660 }, { "epoch": 0.11, "grad_norm": 4.412910351570275, "learning_rate": 1.9695518138331055e-05, "loss": 0.9943, "step": 661 }, { "epoch": 0.11, "grad_norm": 3.7101312343696304, "learning_rate": 1.9694238643818097e-05, "loss": 1.0127, "step": 662 }, { "epoch": 0.11, "grad_norm": 3.54521012408483, "learning_rate": 1.9692956508327313e-05, "loss": 1.0039, "step": 663 }, { "epoch": 0.11, "grad_norm": 2.9693288138460394, "learning_rate": 1.9691671732207986e-05, "loss": 1.0075, "step": 664 }, { "epoch": 0.11, "grad_norm": 4.268204911422782, "learning_rate": 1.9690384315810126e-05, "loss": 1.0144, "step": 665 }, { "epoch": 0.11, "grad_norm": 2.896690315030063, "learning_rate": 1.9689094259484462e-05, "loss": 0.9618, "step": 666 }, { "epoch": 0.11, "grad_norm": 4.155818142360545, "learning_rate": 1.968780156358244e-05, "loss": 1.0675, "step": 667 }, { "epoch": 0.11, "grad_norm": 3.3431722551858196, "learning_rate": 1.9686506228456226e-05, "loss": 1.0065, "step": 668 }, { "epoch": 0.11, "grad_norm": 3.8193155871916935, "learning_rate": 1.9685208254458707e-05, "loss": 1.0199, "step": 669 }, { "epoch": 0.11, "grad_norm": 3.884357106788283, "learning_rate": 1.9683907641943484e-05, "loss": 0.992, "step": 670 }, { "epoch": 0.11, "grad_norm": 2.9683773409197722, "learning_rate": 1.9682604391264882e-05, "loss": 1.05, "step": 671 }, { "epoch": 0.11, "grad_norm": 4.177731179142069, "learning_rate": 1.968129850277794e-05, "loss": 1.0206, "step": 672 }, { "epoch": 0.11, "grad_norm": 1.6823079804218997, "learning_rate": 1.9679989976838417e-05, "loss": 0.9798, "step": 673 }, { "epoch": 0.11, "grad_norm": 4.741646297435665, "learning_rate": 1.9678678813802796e-05, "loss": 0.9596, "step": 674 }, { "epoch": 0.11, "grad_norm": 3.3219964996539924, "learning_rate": 1.9677365014028275e-05, "loss": 1.0765, "step": 675 }, { "epoch": 0.11, "grad_norm": 2.358469606198438, "learning_rate": 1.9676048577872762e-05, "loss": 0.3169, "step": 676 }, { "epoch": 0.11, "grad_norm": 3.5898865208449062, "learning_rate": 1.9674729505694894e-05, "loss": 1.0321, "step": 677 }, { "epoch": 0.11, "grad_norm": 2.8094700068447698, "learning_rate": 1.9673407797854024e-05, "loss": 1.0139, "step": 678 }, { "epoch": 0.11, "grad_norm": 3.9285987877822, "learning_rate": 1.967208345471022e-05, "loss": 1.0734, "step": 679 }, { "epoch": 0.11, "grad_norm": 3.2867670090571024, "learning_rate": 1.967075647662427e-05, "loss": 0.9708, "step": 680 }, { "epoch": 0.11, "grad_norm": 3.1875528069404107, "learning_rate": 1.9669426863957685e-05, "loss": 0.9852, "step": 681 }, { "epoch": 0.11, "grad_norm": 2.0186290976397547, "learning_rate": 1.9668094617072683e-05, "loss": 0.334, "step": 682 }, { "epoch": 0.11, "grad_norm": 2.0812732683776956, "learning_rate": 1.96667597363322e-05, "loss": 1.0247, "step": 683 }, { "epoch": 0.11, "grad_norm": 3.7361417924329454, "learning_rate": 1.96654222220999e-05, "loss": 1.0185, "step": 684 }, { "epoch": 0.11, "grad_norm": 3.6468911661739347, "learning_rate": 1.966408207474016e-05, "loss": 1.0102, "step": 685 }, { "epoch": 0.11, "grad_norm": 2.677518361489019, "learning_rate": 1.9662739294618067e-05, "loss": 1.0272, "step": 686 }, { "epoch": 0.11, "grad_norm": 3.559349540907427, "learning_rate": 1.9661393882099436e-05, "loss": 0.9982, "step": 687 }, { "epoch": 0.11, "grad_norm": 3.624673236720525, "learning_rate": 1.9660045837550798e-05, "loss": 0.9802, "step": 688 }, { "epoch": 0.11, "grad_norm": 3.590198254437288, "learning_rate": 1.965869516133939e-05, "loss": 1.0041, "step": 689 }, { "epoch": 0.11, "grad_norm": 3.8806428738522025, "learning_rate": 1.9657341853833176e-05, "loss": 1.106, "step": 690 }, { "epoch": 0.11, "grad_norm": 2.199335830309328, "learning_rate": 1.9655985915400834e-05, "loss": 0.9623, "step": 691 }, { "epoch": 0.11, "grad_norm": 3.9637022583250836, "learning_rate": 1.965462734641176e-05, "loss": 0.9618, "step": 692 }, { "epoch": 0.11, "grad_norm": 2.5363724293871788, "learning_rate": 1.965326614723607e-05, "loss": 1.0578, "step": 693 }, { "epoch": 0.11, "grad_norm": 3.2731053586716152, "learning_rate": 1.9651902318244582e-05, "loss": 0.9603, "step": 694 }, { "epoch": 0.11, "grad_norm": 1.988741718325344, "learning_rate": 1.9650535859808847e-05, "loss": 1.038, "step": 695 }, { "epoch": 0.11, "grad_norm": 3.2716110341619307, "learning_rate": 1.9649166772301126e-05, "loss": 1.0315, "step": 696 }, { "epoch": 0.11, "grad_norm": 2.330441749510974, "learning_rate": 1.96477950560944e-05, "loss": 0.9625, "step": 697 }, { "epoch": 0.11, "grad_norm": 2.1021021040301116, "learning_rate": 1.9646420711562354e-05, "loss": 0.9465, "step": 698 }, { "epoch": 0.11, "grad_norm": 2.962993783481448, "learning_rate": 1.9645043739079398e-05, "loss": 1.079, "step": 699 }, { "epoch": 0.11, "grad_norm": 3.036199873867375, "learning_rate": 1.9643664139020666e-05, "loss": 1.0004, "step": 700 }, { "epoch": 0.11, "grad_norm": 5.464875161855756, "learning_rate": 1.964228191176199e-05, "loss": 0.9452, "step": 701 }, { "epoch": 0.11, "grad_norm": 2.874090963199685, "learning_rate": 1.964089705767993e-05, "loss": 1.0547, "step": 702 }, { "epoch": 0.11, "grad_norm": 3.811054893897079, "learning_rate": 1.963950957715176e-05, "loss": 0.9318, "step": 703 }, { "epoch": 0.11, "grad_norm": 3.6733783647392855, "learning_rate": 1.9638119470555462e-05, "loss": 0.9645, "step": 704 }, { "epoch": 0.11, "grad_norm": 3.1383295733050653, "learning_rate": 1.9636726738269752e-05, "loss": 0.9648, "step": 705 }, { "epoch": 0.11, "grad_norm": 2.749583559583005, "learning_rate": 1.9635331380674035e-05, "loss": 1.0216, "step": 706 }, { "epoch": 0.11, "grad_norm": 2.3419636340099097, "learning_rate": 1.9633933398148452e-05, "loss": 1.0817, "step": 707 }, { "epoch": 0.11, "grad_norm": 2.5514172262123527, "learning_rate": 1.9632532791073847e-05, "loss": 0.9287, "step": 708 }, { "epoch": 0.11, "grad_norm": 2.823482517713193, "learning_rate": 1.963112955983179e-05, "loss": 1.0019, "step": 709 }, { "epoch": 0.11, "grad_norm": 4.131591385718104, "learning_rate": 1.962972370480456e-05, "loss": 1.0131, "step": 710 }, { "epoch": 0.11, "grad_norm": 1.6645671813461298, "learning_rate": 1.9628315226375146e-05, "loss": 1.0044, "step": 711 }, { "epoch": 0.11, "grad_norm": 3.7738158231834222, "learning_rate": 1.962690412492726e-05, "loss": 1.0451, "step": 712 }, { "epoch": 0.11, "grad_norm": 3.0393439704366934, "learning_rate": 1.9625490400845318e-05, "loss": 0.9782, "step": 713 }, { "epoch": 0.12, "grad_norm": 3.1836141199501697, "learning_rate": 1.9624074054514467e-05, "loss": 1.0218, "step": 714 }, { "epoch": 0.12, "grad_norm": 2.146892088887307, "learning_rate": 1.9622655086320553e-05, "loss": 1.0589, "step": 715 }, { "epoch": 0.12, "grad_norm": 3.337443599152929, "learning_rate": 1.9621233496650143e-05, "loss": 0.994, "step": 716 }, { "epoch": 0.12, "grad_norm": 3.0450170867891515, "learning_rate": 1.9619809285890522e-05, "loss": 0.9852, "step": 717 }, { "epoch": 0.12, "grad_norm": 3.7804014656778477, "learning_rate": 1.9618382454429675e-05, "loss": 1.0156, "step": 718 }, { "epoch": 0.12, "grad_norm": 2.937363641698831, "learning_rate": 1.9616953002656316e-05, "loss": 0.9978, "step": 719 }, { "epoch": 0.12, "grad_norm": 2.352068773691432, "learning_rate": 1.961552093095987e-05, "loss": 0.2921, "step": 720 }, { "epoch": 0.12, "grad_norm": 3.9801007251358054, "learning_rate": 1.9614086239730464e-05, "loss": 1.039, "step": 721 }, { "epoch": 0.12, "grad_norm": 3.2292898995460377, "learning_rate": 1.9612648929358953e-05, "loss": 1.0093, "step": 722 }, { "epoch": 0.12, "grad_norm": 3.2130348799302566, "learning_rate": 1.96112090002369e-05, "loss": 1.0664, "step": 723 }, { "epoch": 0.12, "grad_norm": 4.381279281104788, "learning_rate": 1.960976645275658e-05, "loss": 0.9274, "step": 724 }, { "epoch": 0.12, "grad_norm": 2.710205594952686, "learning_rate": 1.9608321287310988e-05, "loss": 1.038, "step": 725 }, { "epoch": 0.12, "grad_norm": 3.3316000405602755, "learning_rate": 1.9606873504293818e-05, "loss": 0.9762, "step": 726 }, { "epoch": 0.12, "grad_norm": 3.6285915340943613, "learning_rate": 1.9605423104099492e-05, "loss": 1.0173, "step": 727 }, { "epoch": 0.12, "grad_norm": 2.677289150851216, "learning_rate": 1.9603970087123138e-05, "loss": 0.9835, "step": 728 }, { "epoch": 0.12, "grad_norm": 3.2957246771326236, "learning_rate": 1.9602514453760594e-05, "loss": 1.0104, "step": 729 }, { "epoch": 0.12, "grad_norm": 3.5877240951673572, "learning_rate": 1.960105620440842e-05, "loss": 0.9578, "step": 730 }, { "epoch": 0.12, "grad_norm": 1.8941063473259543, "learning_rate": 1.959959533946388e-05, "loss": 1.0566, "step": 731 }, { "epoch": 0.12, "grad_norm": 2.7717741800614144, "learning_rate": 1.9598131859324957e-05, "loss": 1.0076, "step": 732 }, { "epoch": 0.12, "grad_norm": 4.681483361981099, "learning_rate": 1.9596665764390344e-05, "loss": 0.9852, "step": 733 }, { "epoch": 0.12, "grad_norm": 2.3051436667860696, "learning_rate": 1.959519705505944e-05, "loss": 1.014, "step": 734 }, { "epoch": 0.12, "grad_norm": 2.465534868998324, "learning_rate": 1.9593725731732366e-05, "loss": 0.9834, "step": 735 }, { "epoch": 0.12, "grad_norm": 3.506880672533869, "learning_rate": 1.9592251794809945e-05, "loss": 0.9135, "step": 736 }, { "epoch": 0.12, "grad_norm": 3.2678896703597635, "learning_rate": 1.959077524469373e-05, "loss": 0.9281, "step": 737 }, { "epoch": 0.12, "grad_norm": 3.3425564641184917, "learning_rate": 1.9589296081785966e-05, "loss": 0.9694, "step": 738 }, { "epoch": 0.12, "grad_norm": 2.7128266449888976, "learning_rate": 1.9587814306489616e-05, "loss": 1.0395, "step": 739 }, { "epoch": 0.12, "grad_norm": 4.603591860049717, "learning_rate": 1.958632991920836e-05, "loss": 0.9338, "step": 740 }, { "epoch": 0.12, "grad_norm": 3.47197745075023, "learning_rate": 1.9584842920346587e-05, "loss": 1.0218, "step": 741 }, { "epoch": 0.12, "grad_norm": 3.9438470214903694, "learning_rate": 1.9583353310309393e-05, "loss": 0.9985, "step": 742 }, { "epoch": 0.12, "grad_norm": 2.1847418699400265, "learning_rate": 1.9581861089502593e-05, "loss": 0.3278, "step": 743 }, { "epoch": 0.12, "grad_norm": 3.349724539064349, "learning_rate": 1.9580366258332703e-05, "loss": 0.9253, "step": 744 }, { "epoch": 0.12, "grad_norm": 2.736543242598154, "learning_rate": 1.957886881720696e-05, "loss": 1.0665, "step": 745 }, { "epoch": 0.12, "grad_norm": 3.0106051868866155, "learning_rate": 1.957736876653331e-05, "loss": 1.0213, "step": 746 }, { "epoch": 0.12, "grad_norm": 2.3490304082962274, "learning_rate": 1.9575866106720405e-05, "loss": 0.9758, "step": 747 }, { "epoch": 0.12, "grad_norm": 2.789349196629003, "learning_rate": 1.957436083817762e-05, "loss": 0.9568, "step": 748 }, { "epoch": 0.12, "grad_norm": 3.7208212846231263, "learning_rate": 1.9572852961315014e-05, "loss": 1.0128, "step": 749 }, { "epoch": 0.12, "grad_norm": 4.290999400785783, "learning_rate": 1.9571342476543387e-05, "loss": 0.9354, "step": 750 }, { "epoch": 0.12, "grad_norm": 3.5872680589084562, "learning_rate": 1.9569829384274236e-05, "loss": 1.0404, "step": 751 }, { "epoch": 0.12, "grad_norm": 3.981118460837199, "learning_rate": 1.9568313684919765e-05, "loss": 1.0461, "step": 752 }, { "epoch": 0.12, "grad_norm": 3.418907097758438, "learning_rate": 1.9566795378892894e-05, "loss": 1.0588, "step": 753 }, { "epoch": 0.12, "grad_norm": 3.5703429506702, "learning_rate": 1.9565274466607248e-05, "loss": 1.0628, "step": 754 }, { "epoch": 0.12, "grad_norm": 2.335327885415121, "learning_rate": 1.9563750948477174e-05, "loss": 0.9812, "step": 755 }, { "epoch": 0.12, "grad_norm": 2.975089281810079, "learning_rate": 1.9562224824917716e-05, "loss": 1.0439, "step": 756 }, { "epoch": 0.12, "grad_norm": 2.4330305546897084, "learning_rate": 1.956069609634463e-05, "loss": 0.9783, "step": 757 }, { "epoch": 0.12, "grad_norm": 2.5933810971363487, "learning_rate": 1.9559164763174384e-05, "loss": 0.9468, "step": 758 }, { "epoch": 0.12, "grad_norm": 2.250229399960704, "learning_rate": 1.9557630825824156e-05, "loss": 1.0433, "step": 759 }, { "epoch": 0.12, "grad_norm": 4.214374222645751, "learning_rate": 1.9556094284711834e-05, "loss": 0.9698, "step": 760 }, { "epoch": 0.12, "grad_norm": 3.061689308455185, "learning_rate": 1.9554555140256016e-05, "loss": 0.9712, "step": 761 }, { "epoch": 0.12, "grad_norm": 3.091616578908822, "learning_rate": 1.9553013392876005e-05, "loss": 1.0739, "step": 762 }, { "epoch": 0.12, "grad_norm": 4.480955514848346, "learning_rate": 1.955146904299181e-05, "loss": 0.9984, "step": 763 }, { "epoch": 0.12, "grad_norm": 4.168782091374675, "learning_rate": 1.9549922091024164e-05, "loss": 1.0487, "step": 764 }, { "epoch": 0.12, "grad_norm": 3.7509352471445685, "learning_rate": 1.9548372537394494e-05, "loss": 0.9455, "step": 765 }, { "epoch": 0.12, "grad_norm": 1.68187496665861, "learning_rate": 1.9546820382524943e-05, "loss": 0.9725, "step": 766 }, { "epoch": 0.12, "grad_norm": 1.7874851519628208, "learning_rate": 1.9545265626838362e-05, "loss": 0.9795, "step": 767 }, { "epoch": 0.12, "grad_norm": 2.7054356748413637, "learning_rate": 1.9543708270758306e-05, "loss": 1.0391, "step": 768 }, { "epoch": 0.12, "grad_norm": 2.9317351242784917, "learning_rate": 1.9542148314709042e-05, "loss": 0.9683, "step": 769 }, { "epoch": 0.12, "grad_norm": 2.7859152766789244, "learning_rate": 1.9540585759115548e-05, "loss": 0.949, "step": 770 }, { "epoch": 0.12, "grad_norm": 2.104213928094762, "learning_rate": 1.9539020604403502e-05, "loss": 0.9813, "step": 771 }, { "epoch": 0.12, "grad_norm": 4.646176905679248, "learning_rate": 1.95374528509993e-05, "loss": 1.0765, "step": 772 }, { "epoch": 0.12, "grad_norm": 4.174342193862985, "learning_rate": 1.9535882499330044e-05, "loss": 1.0489, "step": 773 }, { "epoch": 0.12, "grad_norm": 3.8023904058048457, "learning_rate": 1.9534309549823535e-05, "loss": 0.8756, "step": 774 }, { "epoch": 0.12, "grad_norm": 3.0900601047238005, "learning_rate": 1.9532734002908287e-05, "loss": 0.9631, "step": 775 }, { "epoch": 0.13, "grad_norm": 3.176021027281599, "learning_rate": 1.9531155859013527e-05, "loss": 0.9977, "step": 776 }, { "epoch": 0.13, "grad_norm": 3.5195695915208303, "learning_rate": 1.9529575118569185e-05, "loss": 1.0035, "step": 777 }, { "epoch": 0.13, "grad_norm": 2.6117979210559397, "learning_rate": 1.952799178200589e-05, "loss": 0.9808, "step": 778 }, { "epoch": 0.13, "grad_norm": 2.6037676289328346, "learning_rate": 1.9526405849755003e-05, "loss": 0.9923, "step": 779 }, { "epoch": 0.13, "grad_norm": 2.8661909736314954, "learning_rate": 1.9524817322248558e-05, "loss": 0.9598, "step": 780 }, { "epoch": 0.13, "grad_norm": 3.521036687692859, "learning_rate": 1.9523226199919326e-05, "loss": 1.0347, "step": 781 }, { "epoch": 0.13, "grad_norm": 2.5114162138280403, "learning_rate": 1.9521632483200767e-05, "loss": 0.9677, "step": 782 }, { "epoch": 0.13, "grad_norm": 2.378203991494147, "learning_rate": 1.952003617252705e-05, "loss": 1.0077, "step": 783 }, { "epoch": 0.13, "grad_norm": 3.7952662697964663, "learning_rate": 1.9518437268333062e-05, "loss": 1.0121, "step": 784 }, { "epoch": 0.13, "grad_norm": 3.235617725997096, "learning_rate": 1.951683577105438e-05, "loss": 1.0102, "step": 785 }, { "epoch": 0.13, "grad_norm": 2.8017479413384843, "learning_rate": 1.9515231681127308e-05, "loss": 1.0323, "step": 786 }, { "epoch": 0.13, "grad_norm": 3.434565036781561, "learning_rate": 1.9513624998988833e-05, "loss": 0.9909, "step": 787 }, { "epoch": 0.13, "grad_norm": 2.1735900035587568, "learning_rate": 1.9512015725076666e-05, "loss": 0.9845, "step": 788 }, { "epoch": 0.13, "grad_norm": 2.489462100685755, "learning_rate": 1.9510403859829213e-05, "loss": 1.0269, "step": 789 }, { "epoch": 0.13, "grad_norm": 3.951987483595514, "learning_rate": 1.9508789403685594e-05, "loss": 1.0166, "step": 790 }, { "epoch": 0.13, "grad_norm": 3.4342837459530458, "learning_rate": 1.950717235708563e-05, "loss": 0.9114, "step": 791 }, { "epoch": 0.13, "grad_norm": 3.2968334900471077, "learning_rate": 1.950555272046985e-05, "loss": 1.0843, "step": 792 }, { "epoch": 0.13, "grad_norm": 4.6352238418534775, "learning_rate": 1.9503930494279487e-05, "loss": 1.0032, "step": 793 }, { "epoch": 0.13, "grad_norm": 3.651283869235285, "learning_rate": 1.9502305678956476e-05, "loss": 1.0126, "step": 794 }, { "epoch": 0.13, "grad_norm": 3.897175250076595, "learning_rate": 1.9500678274943467e-05, "loss": 0.9985, "step": 795 }, { "epoch": 0.13, "grad_norm": 3.1526099639760288, "learning_rate": 1.949904828268381e-05, "loss": 1.0128, "step": 796 }, { "epoch": 0.13, "grad_norm": 3.9183224093405125, "learning_rate": 1.949741570262156e-05, "loss": 1.0055, "step": 797 }, { "epoch": 0.13, "grad_norm": 2.0965749557200173, "learning_rate": 1.949578053520147e-05, "loss": 1.0316, "step": 798 }, { "epoch": 0.13, "grad_norm": 1.9369592835022202, "learning_rate": 1.9494142780869013e-05, "loss": 1.0275, "step": 799 }, { "epoch": 0.13, "grad_norm": 2.310468245109141, "learning_rate": 1.9492502440070353e-05, "loss": 1.0194, "step": 800 }, { "epoch": 0.13, "grad_norm": 3.489731712413088, "learning_rate": 1.949085951325237e-05, "loss": 1.0067, "step": 801 }, { "epoch": 0.13, "grad_norm": 4.387249174854971, "learning_rate": 1.9489214000862633e-05, "loss": 0.9687, "step": 802 }, { "epoch": 0.13, "grad_norm": 4.834341448301881, "learning_rate": 1.948756590334943e-05, "loss": 1.0348, "step": 803 }, { "epoch": 0.13, "grad_norm": 2.601988886352778, "learning_rate": 1.9485915221161753e-05, "loss": 0.9853, "step": 804 }, { "epoch": 0.13, "grad_norm": 3.984102007919636, "learning_rate": 1.9484261954749286e-05, "loss": 1.0116, "step": 805 }, { "epoch": 0.13, "grad_norm": 3.0159605165725254, "learning_rate": 1.9482606104562428e-05, "loss": 0.9578, "step": 806 }, { "epoch": 0.13, "grad_norm": 3.1878953576330358, "learning_rate": 1.9480947671052278e-05, "loss": 1.0037, "step": 807 }, { "epoch": 0.13, "grad_norm": 2.5923049762174357, "learning_rate": 1.947928665467064e-05, "loss": 1.0316, "step": 808 }, { "epoch": 0.13, "grad_norm": 3.3960842359719794, "learning_rate": 1.9477623055870016e-05, "loss": 0.9367, "step": 809 }, { "epoch": 0.13, "grad_norm": 2.0530218182652598, "learning_rate": 1.9475956875103623e-05, "loss": 0.9687, "step": 810 }, { "epoch": 0.13, "grad_norm": 4.896149364038101, "learning_rate": 1.947428811282537e-05, "loss": 1.0074, "step": 811 }, { "epoch": 0.13, "grad_norm": 2.9467221233563627, "learning_rate": 1.947261676948987e-05, "loss": 1.004, "step": 812 }, { "epoch": 0.13, "grad_norm": 2.212986727695855, "learning_rate": 1.947094284555245e-05, "loss": 0.9366, "step": 813 }, { "epoch": 0.13, "grad_norm": 3.5319072820605393, "learning_rate": 1.9469266341469136e-05, "loss": 1.0041, "step": 814 }, { "epoch": 0.13, "grad_norm": 2.963940066803729, "learning_rate": 1.9467587257696646e-05, "loss": 0.992, "step": 815 }, { "epoch": 0.13, "grad_norm": 3.5221486240028868, "learning_rate": 1.946590559469241e-05, "loss": 0.9565, "step": 816 }, { "epoch": 0.13, "grad_norm": 4.014019715766784, "learning_rate": 1.946422135291456e-05, "loss": 0.979, "step": 817 }, { "epoch": 0.13, "grad_norm": 2.522264426415572, "learning_rate": 1.9462534532821934e-05, "loss": 0.9707, "step": 818 }, { "epoch": 0.13, "grad_norm": 2.603939219396833, "learning_rate": 1.946084513487406e-05, "loss": 0.9206, "step": 819 }, { "epoch": 0.13, "grad_norm": 3.088230025129338, "learning_rate": 1.9459153159531188e-05, "loss": 0.9692, "step": 820 }, { "epoch": 0.13, "grad_norm": 3.0594552922842064, "learning_rate": 1.9457458607254247e-05, "loss": 0.9396, "step": 821 }, { "epoch": 0.13, "grad_norm": 3.609955183686171, "learning_rate": 1.9455761478504888e-05, "loss": 1.0008, "step": 822 }, { "epoch": 0.13, "grad_norm": 3.2596752229638004, "learning_rate": 1.9454061773745448e-05, "loss": 0.9996, "step": 823 }, { "epoch": 0.13, "grad_norm": 2.649781552793901, "learning_rate": 1.9452359493438984e-05, "loss": 1.0631, "step": 824 }, { "epoch": 0.13, "grad_norm": 2.5402032255738076, "learning_rate": 1.9450654638049235e-05, "loss": 1.026, "step": 825 }, { "epoch": 0.13, "grad_norm": 2.980693043759817, "learning_rate": 1.9448947208040655e-05, "loss": 0.9818, "step": 826 }, { "epoch": 0.13, "grad_norm": 3.6266275895575277, "learning_rate": 1.9447237203878395e-05, "loss": 0.988, "step": 827 }, { "epoch": 0.13, "grad_norm": 2.759161686671479, "learning_rate": 1.9445524626028303e-05, "loss": 0.9336, "step": 828 }, { "epoch": 0.13, "grad_norm": 4.453456719073087, "learning_rate": 1.9443809474956937e-05, "loss": 1.0192, "step": 829 }, { "epoch": 0.13, "grad_norm": 1.9511448702695176, "learning_rate": 1.944209175113155e-05, "loss": 1.0082, "step": 830 }, { "epoch": 0.13, "grad_norm": 2.270129336735106, "learning_rate": 1.9440371455020094e-05, "loss": 0.9606, "step": 831 }, { "epoch": 0.13, "grad_norm": 2.6884442821799803, "learning_rate": 1.943864858709123e-05, "loss": 1.0456, "step": 832 }, { "epoch": 0.13, "grad_norm": 3.977321709665656, "learning_rate": 1.9436923147814317e-05, "loss": 0.9989, "step": 833 }, { "epoch": 0.13, "grad_norm": 2.8817742191480726, "learning_rate": 1.9435195137659404e-05, "loss": 0.9788, "step": 834 }, { "epoch": 0.13, "grad_norm": 2.8744867944843717, "learning_rate": 1.9433464557097257e-05, "loss": 0.9739, "step": 835 }, { "epoch": 0.13, "grad_norm": 3.6771332446521137, "learning_rate": 1.9431731406599325e-05, "loss": 1.0036, "step": 836 }, { "epoch": 0.13, "grad_norm": 3.030004973202895, "learning_rate": 1.9429995686637774e-05, "loss": 1.0529, "step": 837 }, { "epoch": 0.14, "grad_norm": 4.142094880244565, "learning_rate": 1.9428257397685456e-05, "loss": 0.9741, "step": 838 }, { "epoch": 0.14, "grad_norm": 2.4343066694392603, "learning_rate": 1.942651654021593e-05, "loss": 1.0062, "step": 839 }, { "epoch": 0.14, "grad_norm": 3.8175419471499605, "learning_rate": 1.942477311470346e-05, "loss": 0.9205, "step": 840 }, { "epoch": 0.14, "grad_norm": 2.6871392651135046, "learning_rate": 1.9423027121622995e-05, "loss": 1.0676, "step": 841 }, { "epoch": 0.14, "grad_norm": 1.4314149519971577, "learning_rate": 1.9421278561450195e-05, "loss": 0.9559, "step": 842 }, { "epoch": 0.14, "grad_norm": 3.449403578578853, "learning_rate": 1.9419527434661418e-05, "loss": 0.9601, "step": 843 }, { "epoch": 0.14, "grad_norm": 4.0089999516316555, "learning_rate": 1.9417773741733713e-05, "loss": 0.9911, "step": 844 }, { "epoch": 0.14, "grad_norm": 1.4734471471476749, "learning_rate": 1.9416017483144844e-05, "loss": 0.9737, "step": 845 }, { "epoch": 0.14, "grad_norm": 2.895587682024496, "learning_rate": 1.941425865937325e-05, "loss": 1.0172, "step": 846 }, { "epoch": 0.14, "grad_norm": 4.065198031736148, "learning_rate": 1.9412497270898097e-05, "loss": 1.0143, "step": 847 }, { "epoch": 0.14, "grad_norm": 3.011637368692142, "learning_rate": 1.9410733318199233e-05, "loss": 0.9412, "step": 848 }, { "epoch": 0.14, "grad_norm": 2.1651028222385835, "learning_rate": 1.9408966801757204e-05, "loss": 0.9926, "step": 849 }, { "epoch": 0.14, "grad_norm": 3.2065646428025865, "learning_rate": 1.9407197722053254e-05, "loss": 0.9634, "step": 850 }, { "epoch": 0.14, "grad_norm": 2.8560995717576, "learning_rate": 1.9405426079569337e-05, "loss": 1.003, "step": 851 }, { "epoch": 0.14, "grad_norm": 3.1308057641230342, "learning_rate": 1.940365187478809e-05, "loss": 1.0249, "step": 852 }, { "epoch": 0.14, "grad_norm": 2.6173787061392777, "learning_rate": 1.9401875108192863e-05, "loss": 0.3212, "step": 853 }, { "epoch": 0.14, "grad_norm": 2.2599691366940866, "learning_rate": 1.940009578026769e-05, "loss": 0.3176, "step": 854 }, { "epoch": 0.14, "grad_norm": 2.884982153243956, "learning_rate": 1.9398313891497314e-05, "loss": 0.9914, "step": 855 }, { "epoch": 0.14, "grad_norm": 2.5131206486039814, "learning_rate": 1.9396529442367167e-05, "loss": 0.9672, "step": 856 }, { "epoch": 0.14, "grad_norm": 1.3541542737344827, "learning_rate": 1.939474243336338e-05, "loss": 0.3309, "step": 857 }, { "epoch": 0.14, "grad_norm": 2.9848562122263136, "learning_rate": 1.939295286497279e-05, "loss": 0.9748, "step": 858 }, { "epoch": 0.14, "grad_norm": 3.291406951187099, "learning_rate": 1.9391160737682924e-05, "loss": 0.9662, "step": 859 }, { "epoch": 0.14, "grad_norm": 2.908459879427471, "learning_rate": 1.9389366051982e-05, "loss": 0.9804, "step": 860 }, { "epoch": 0.14, "grad_norm": 2.8752681772957507, "learning_rate": 1.9387568808358944e-05, "loss": 1.0343, "step": 861 }, { "epoch": 0.14, "grad_norm": 2.777184418777125, "learning_rate": 1.9385769007303374e-05, "loss": 1.0184, "step": 862 }, { "epoch": 0.14, "grad_norm": 3.572991966291909, "learning_rate": 1.9383966649305608e-05, "loss": 0.9072, "step": 863 }, { "epoch": 0.14, "grad_norm": 2.5118630280296, "learning_rate": 1.9382161734856656e-05, "loss": 0.9789, "step": 864 }, { "epoch": 0.14, "grad_norm": 3.5291502794938423, "learning_rate": 1.9380354264448225e-05, "loss": 1.062, "step": 865 }, { "epoch": 0.14, "grad_norm": 4.160240380237938, "learning_rate": 1.9378544238572723e-05, "loss": 0.9735, "step": 866 }, { "epoch": 0.14, "grad_norm": 3.4149809765041526, "learning_rate": 1.937673165772325e-05, "loss": 1.063, "step": 867 }, { "epoch": 0.14, "grad_norm": 4.9978739986954395, "learning_rate": 1.93749165223936e-05, "loss": 1.0058, "step": 868 }, { "epoch": 0.14, "grad_norm": 3.24679847898806, "learning_rate": 1.937309883307827e-05, "loss": 1.0168, "step": 869 }, { "epoch": 0.14, "grad_norm": 2.097420165914897, "learning_rate": 1.9371278590272448e-05, "loss": 0.3291, "step": 870 }, { "epoch": 0.14, "grad_norm": 2.9913669985101112, "learning_rate": 1.9369455794472017e-05, "loss": 0.9711, "step": 871 }, { "epoch": 0.14, "grad_norm": 2.4121721390809467, "learning_rate": 1.9367630446173554e-05, "loss": 1.0672, "step": 872 }, { "epoch": 0.14, "grad_norm": 2.1714369517358247, "learning_rate": 1.936580254587434e-05, "loss": 0.3247, "step": 873 }, { "epoch": 0.14, "grad_norm": 3.5128711002112243, "learning_rate": 1.936397209407234e-05, "loss": 1.0107, "step": 874 }, { "epoch": 0.14, "grad_norm": 1.3935540886966566, "learning_rate": 1.9362139091266227e-05, "loss": 0.3457, "step": 875 }, { "epoch": 0.14, "grad_norm": 3.329752492785013, "learning_rate": 1.936030353795535e-05, "loss": 0.9542, "step": 876 }, { "epoch": 0.14, "grad_norm": 2.52070833847332, "learning_rate": 1.9358465434639773e-05, "loss": 1.0045, "step": 877 }, { "epoch": 0.14, "grad_norm": 1.8470132688862564, "learning_rate": 1.9356624781820246e-05, "loss": 1.0032, "step": 878 }, { "epoch": 0.14, "grad_norm": 1.732599810522924, "learning_rate": 1.9354781579998212e-05, "loss": 1.0314, "step": 879 }, { "epoch": 0.14, "grad_norm": 3.1447676480919604, "learning_rate": 1.9352935829675807e-05, "loss": 0.9274, "step": 880 }, { "epoch": 0.14, "grad_norm": 1.946577342686121, "learning_rate": 1.935108753135587e-05, "loss": 0.322, "step": 881 }, { "epoch": 0.14, "grad_norm": 3.582804189839564, "learning_rate": 1.9349236685541924e-05, "loss": 1.0186, "step": 882 }, { "epoch": 0.14, "grad_norm": 2.6104903892423468, "learning_rate": 1.934738329273819e-05, "loss": 0.973, "step": 883 }, { "epoch": 0.14, "grad_norm": 2.0039352801564387, "learning_rate": 1.9345527353449583e-05, "loss": 1.0412, "step": 884 }, { "epoch": 0.14, "grad_norm": 1.8770356889521567, "learning_rate": 1.9343668868181722e-05, "loss": 1.064, "step": 885 }, { "epoch": 0.14, "grad_norm": 3.6636469428445153, "learning_rate": 1.9341807837440893e-05, "loss": 0.9503, "step": 886 }, { "epoch": 0.14, "grad_norm": 3.375391054456922, "learning_rate": 1.9339944261734107e-05, "loss": 0.9945, "step": 887 }, { "epoch": 0.14, "grad_norm": 3.3406543127331534, "learning_rate": 1.933807814156904e-05, "loss": 0.889, "step": 888 }, { "epoch": 0.14, "grad_norm": 1.8633438835573275, "learning_rate": 1.933620947745409e-05, "loss": 1.0413, "step": 889 }, { "epoch": 0.14, "grad_norm": 3.5385359147079085, "learning_rate": 1.9334338269898317e-05, "loss": 0.9945, "step": 890 }, { "epoch": 0.14, "grad_norm": 3.0611042812645843, "learning_rate": 1.9332464519411503e-05, "loss": 0.9396, "step": 891 }, { "epoch": 0.14, "grad_norm": 2.167032565496674, "learning_rate": 1.93305882265041e-05, "loss": 1.074, "step": 892 }, { "epoch": 0.14, "grad_norm": 4.0195580602999215, "learning_rate": 1.9328709391687264e-05, "loss": 0.9037, "step": 893 }, { "epoch": 0.14, "grad_norm": 3.2681051810811375, "learning_rate": 1.932682801547284e-05, "loss": 0.875, "step": 894 }, { "epoch": 0.14, "grad_norm": 3.7388120969910275, "learning_rate": 1.9324944098373375e-05, "loss": 1.0216, "step": 895 }, { "epoch": 0.14, "grad_norm": 2.851956274984136, "learning_rate": 1.932305764090209e-05, "loss": 0.9953, "step": 896 }, { "epoch": 0.14, "grad_norm": 2.341717461773808, "learning_rate": 1.9321168643572912e-05, "loss": 0.979, "step": 897 }, { "epoch": 0.14, "grad_norm": 2.76506804122123, "learning_rate": 1.931927710690046e-05, "loss": 0.9736, "step": 898 }, { "epoch": 0.14, "grad_norm": 3.553932864394946, "learning_rate": 1.9317383031400032e-05, "loss": 0.9977, "step": 899 }, { "epoch": 0.15, "grad_norm": 4.309793922471793, "learning_rate": 1.9315486417587636e-05, "loss": 0.9573, "step": 900 }, { "epoch": 0.15, "grad_norm": 3.2520066448767833, "learning_rate": 1.9313587265979952e-05, "loss": 1.0103, "step": 901 }, { "epoch": 0.15, "grad_norm": 2.706659024451731, "learning_rate": 1.9311685577094368e-05, "loss": 0.8935, "step": 902 }, { "epoch": 0.15, "grad_norm": 3.5849937909230793, "learning_rate": 1.9309781351448956e-05, "loss": 1.0021, "step": 903 }, { "epoch": 0.15, "grad_norm": 4.134063242996499, "learning_rate": 1.9307874589562474e-05, "loss": 0.9807, "step": 904 }, { "epoch": 0.15, "grad_norm": 2.7843913650459906, "learning_rate": 1.9305965291954387e-05, "loss": 1.0304, "step": 905 }, { "epoch": 0.15, "grad_norm": 3.466601419446163, "learning_rate": 1.930405345914483e-05, "loss": 1.0071, "step": 906 }, { "epoch": 0.15, "grad_norm": 2.377260236874727, "learning_rate": 1.9302139091654643e-05, "loss": 0.9782, "step": 907 }, { "epoch": 0.15, "grad_norm": 2.5315995151670596, "learning_rate": 1.9300222190005352e-05, "loss": 0.9274, "step": 908 }, { "epoch": 0.15, "grad_norm": 3.168709062842306, "learning_rate": 1.9298302754719177e-05, "loss": 0.9858, "step": 909 }, { "epoch": 0.15, "grad_norm": 2.7710626454700784, "learning_rate": 1.929638078631902e-05, "loss": 0.9865, "step": 910 }, { "epoch": 0.15, "grad_norm": 2.8369140625, "learning_rate": 1.9294456285328482e-05, "loss": 1.0202, "step": 911 }, { "epoch": 0.15, "grad_norm": 1.929346216578376, "learning_rate": 1.9292529252271848e-05, "loss": 0.9976, "step": 912 }, { "epoch": 0.15, "grad_norm": 2.7591760306591646, "learning_rate": 1.9290599687674095e-05, "loss": 0.9922, "step": 913 }, { "epoch": 0.15, "grad_norm": 3.5090297294742294, "learning_rate": 1.928866759206089e-05, "loss": 0.9636, "step": 914 }, { "epoch": 0.15, "grad_norm": 3.0107401285773854, "learning_rate": 1.9286732965958593e-05, "loss": 0.9549, "step": 915 }, { "epoch": 0.15, "grad_norm": 3.4059539106277996, "learning_rate": 1.928479580989424e-05, "loss": 0.9616, "step": 916 }, { "epoch": 0.15, "grad_norm": 1.789156115885332, "learning_rate": 1.9282856124395578e-05, "loss": 0.9889, "step": 917 }, { "epoch": 0.15, "grad_norm": 4.254022601929789, "learning_rate": 1.9280913909991018e-05, "loss": 0.9894, "step": 918 }, { "epoch": 0.15, "grad_norm": 3.0909855369049315, "learning_rate": 1.9278969167209682e-05, "loss": 1.0745, "step": 919 }, { "epoch": 0.15, "grad_norm": 3.2193259307255904, "learning_rate": 1.927702189658137e-05, "loss": 0.9706, "step": 920 }, { "epoch": 0.15, "grad_norm": 2.005718757432666, "learning_rate": 1.9275072098636568e-05, "loss": 0.9833, "step": 921 }, { "epoch": 0.15, "grad_norm": 2.7932568381524634, "learning_rate": 1.927311977390646e-05, "loss": 1.0653, "step": 922 }, { "epoch": 0.15, "grad_norm": 3.23639221680464, "learning_rate": 1.927116492292291e-05, "loss": 0.9988, "step": 923 }, { "epoch": 0.15, "grad_norm": 4.135377257549287, "learning_rate": 1.926920754621847e-05, "loss": 1.043, "step": 924 }, { "epoch": 0.15, "grad_norm": 2.1418190848691143, "learning_rate": 1.9267247644326392e-05, "loss": 1.0629, "step": 925 }, { "epoch": 0.15, "grad_norm": 2.5924743824707486, "learning_rate": 1.92652852177806e-05, "loss": 0.9626, "step": 926 }, { "epoch": 0.15, "grad_norm": 2.328378241162126, "learning_rate": 1.9263320267115716e-05, "loss": 0.9461, "step": 927 }, { "epoch": 0.15, "grad_norm": 2.807040531813036, "learning_rate": 1.9261352792867047e-05, "loss": 0.9671, "step": 928 }, { "epoch": 0.15, "grad_norm": 3.528874501306568, "learning_rate": 1.9259382795570588e-05, "loss": 1.021, "step": 929 }, { "epoch": 0.15, "grad_norm": 3.8949473932555274, "learning_rate": 1.9257410275763018e-05, "loss": 0.983, "step": 930 }, { "epoch": 0.15, "grad_norm": 3.880940099067656, "learning_rate": 1.9255435233981706e-05, "loss": 1.0077, "step": 931 }, { "epoch": 0.15, "grad_norm": 3.5794248926686336, "learning_rate": 1.9253457670764707e-05, "loss": 1.0595, "step": 932 }, { "epoch": 0.15, "grad_norm": 2.655955129974018, "learning_rate": 1.9251477586650768e-05, "loss": 1.0039, "step": 933 }, { "epoch": 0.15, "grad_norm": 3.6679413198272135, "learning_rate": 1.9249494982179313e-05, "loss": 1.031, "step": 934 }, { "epoch": 0.15, "grad_norm": 4.836632008514915, "learning_rate": 1.9247509857890466e-05, "loss": 1.0041, "step": 935 }, { "epoch": 0.15, "grad_norm": 2.5009794224041633, "learning_rate": 1.9245522214325022e-05, "loss": 0.8827, "step": 936 }, { "epoch": 0.15, "grad_norm": 2.5504761494409602, "learning_rate": 1.9243532052024473e-05, "loss": 0.954, "step": 937 }, { "epoch": 0.15, "grad_norm": 3.553805399009051, "learning_rate": 1.9241539371530994e-05, "loss": 1.0, "step": 938 }, { "epoch": 0.15, "grad_norm": 2.3885048833633022, "learning_rate": 1.9239544173387442e-05, "loss": 0.9799, "step": 939 }, { "epoch": 0.15, "grad_norm": 2.782852654057623, "learning_rate": 1.923754645813737e-05, "loss": 0.9679, "step": 940 }, { "epoch": 0.15, "grad_norm": 3.262634221966234, "learning_rate": 1.923554622632501e-05, "loss": 0.9701, "step": 941 }, { "epoch": 0.15, "grad_norm": 2.190811484503476, "learning_rate": 1.9233543478495277e-05, "loss": 0.3372, "step": 942 }, { "epoch": 0.15, "grad_norm": 3.4057053996799516, "learning_rate": 1.9231538215193776e-05, "loss": 0.9997, "step": 943 }, { "epoch": 0.15, "grad_norm": 3.5555834636652595, "learning_rate": 1.9229530436966796e-05, "loss": 0.9864, "step": 944 }, { "epoch": 0.15, "grad_norm": 2.9611088443991562, "learning_rate": 1.9227520144361312e-05, "loss": 1.0195, "step": 945 }, { "epoch": 0.15, "grad_norm": 2.989326403982733, "learning_rate": 1.9225507337924984e-05, "loss": 0.9601, "step": 946 }, { "epoch": 0.15, "grad_norm": 3.5902922872073475, "learning_rate": 1.9223492018206155e-05, "loss": 0.9452, "step": 947 }, { "epoch": 0.15, "grad_norm": 2.7138797503026515, "learning_rate": 1.922147418575385e-05, "loss": 1.0396, "step": 948 }, { "epoch": 0.15, "grad_norm": 3.426254326790851, "learning_rate": 1.9219453841117787e-05, "loss": 0.9087, "step": 949 }, { "epoch": 0.15, "grad_norm": 2.8502756219733945, "learning_rate": 1.921743098484836e-05, "loss": 0.9391, "step": 950 }, { "epoch": 0.15, "grad_norm": 3.3162790011199363, "learning_rate": 1.9215405617496657e-05, "loss": 1.0192, "step": 951 }, { "epoch": 0.15, "grad_norm": 2.1521492262652835, "learning_rate": 1.921337773961444e-05, "loss": 0.9029, "step": 952 }, { "epoch": 0.15, "grad_norm": 3.454368540007402, "learning_rate": 1.9211347351754154e-05, "loss": 0.998, "step": 953 }, { "epoch": 0.15, "grad_norm": 3.024733627823632, "learning_rate": 1.9209314454468935e-05, "loss": 1.0058, "step": 954 }, { "epoch": 0.15, "grad_norm": 4.319444896920124, "learning_rate": 1.9207279048312612e-05, "loss": 0.9697, "step": 955 }, { "epoch": 0.15, "grad_norm": 2.857780470546062, "learning_rate": 1.920524113383967e-05, "loss": 0.9453, "step": 956 }, { "epoch": 0.15, "grad_norm": 3.240930814669019, "learning_rate": 1.9203200711605297e-05, "loss": 0.9631, "step": 957 }, { "epoch": 0.15, "grad_norm": 3.7279538929916347, "learning_rate": 1.9201157782165366e-05, "loss": 0.9733, "step": 958 }, { "epoch": 0.15, "grad_norm": 3.174988219472101, "learning_rate": 1.9199112346076422e-05, "loss": 0.9443, "step": 959 }, { "epoch": 0.15, "grad_norm": 3.9225357532512906, "learning_rate": 1.91970644038957e-05, "loss": 0.9652, "step": 960 }, { "epoch": 0.15, "grad_norm": 3.381998787995341, "learning_rate": 1.9195013956181115e-05, "loss": 0.8465, "step": 961 }, { "epoch": 0.15, "grad_norm": 2.2215389936563557, "learning_rate": 1.919296100349127e-05, "loss": 0.9918, "step": 962 }, { "epoch": 0.16, "grad_norm": 4.150577890995859, "learning_rate": 1.919090554638544e-05, "loss": 0.947, "step": 963 }, { "epoch": 0.16, "grad_norm": 2.2745533724845863, "learning_rate": 1.9188847585423593e-05, "loss": 1.0577, "step": 964 }, { "epoch": 0.16, "grad_norm": 3.557590782953413, "learning_rate": 1.9186787121166367e-05, "loss": 1.025, "step": 965 }, { "epoch": 0.16, "grad_norm": 3.182883266435693, "learning_rate": 1.9184724154175096e-05, "loss": 1.0163, "step": 966 }, { "epoch": 0.16, "grad_norm": 3.885338681063073, "learning_rate": 1.9182658685011787e-05, "loss": 0.9362, "step": 967 }, { "epoch": 0.16, "grad_norm": 4.177763365885268, "learning_rate": 1.918059071423913e-05, "loss": 0.8955, "step": 968 }, { "epoch": 0.16, "grad_norm": 2.9830210545379003, "learning_rate": 1.9178520242420498e-05, "loss": 1.0434, "step": 969 }, { "epoch": 0.16, "grad_norm": 3.3137994772470516, "learning_rate": 1.9176447270119945e-05, "loss": 0.9304, "step": 970 }, { "epoch": 0.16, "grad_norm": 4.33357947212922, "learning_rate": 1.917437179790221e-05, "loss": 0.9769, "step": 971 }, { "epoch": 0.16, "grad_norm": 2.679468796594091, "learning_rate": 1.91722938263327e-05, "loss": 0.991, "step": 972 }, { "epoch": 0.16, "grad_norm": 2.500582245735578, "learning_rate": 1.9170213355977513e-05, "loss": 0.8923, "step": 973 }, { "epoch": 0.16, "grad_norm": 4.1149935875955075, "learning_rate": 1.9168130387403433e-05, "loss": 0.9195, "step": 974 }, { "epoch": 0.16, "grad_norm": 2.771366345376967, "learning_rate": 1.916604492117791e-05, "loss": 1.0412, "step": 975 }, { "epoch": 0.16, "grad_norm": 3.326358523370711, "learning_rate": 1.9163956957869093e-05, "loss": 0.9033, "step": 976 }, { "epoch": 0.16, "grad_norm": 3.2551356599960664, "learning_rate": 1.9161866498045792e-05, "loss": 0.9731, "step": 977 }, { "epoch": 0.16, "grad_norm": 2.832966406755329, "learning_rate": 1.9159773542277508e-05, "loss": 0.9674, "step": 978 }, { "epoch": 0.16, "grad_norm": 3.5371263609993857, "learning_rate": 1.9157678091134424e-05, "loss": 0.9827, "step": 979 }, { "epoch": 0.16, "grad_norm": 1.9284136192909058, "learning_rate": 1.9155580145187392e-05, "loss": 0.9068, "step": 980 }, { "epoch": 0.16, "grad_norm": 3.187507928576613, "learning_rate": 1.9153479705007953e-05, "loss": 0.9692, "step": 981 }, { "epoch": 0.16, "grad_norm": 5.0426243694743125, "learning_rate": 1.9151376771168325e-05, "loss": 0.9742, "step": 982 }, { "epoch": 0.16, "grad_norm": 2.754076796857837, "learning_rate": 1.914927134424141e-05, "loss": 0.9244, "step": 983 }, { "epoch": 0.16, "grad_norm": 3.811804536017132, "learning_rate": 1.9147163424800773e-05, "loss": 0.9363, "step": 984 }, { "epoch": 0.16, "grad_norm": 4.493448362453401, "learning_rate": 1.914505301342068e-05, "loss": 0.9775, "step": 985 }, { "epoch": 0.16, "grad_norm": 4.243172940331622, "learning_rate": 1.914294011067606e-05, "loss": 0.9821, "step": 986 }, { "epoch": 0.16, "grad_norm": 2.8919788386308047, "learning_rate": 1.914082471714253e-05, "loss": 1.0133, "step": 987 }, { "epoch": 0.16, "grad_norm": 2.835272088289981, "learning_rate": 1.9138706833396374e-05, "loss": 1.0223, "step": 988 }, { "epoch": 0.16, "grad_norm": 2.7014727531780385, "learning_rate": 1.913658646001457e-05, "loss": 0.9869, "step": 989 }, { "epoch": 0.16, "grad_norm": 3.6388503566834074, "learning_rate": 1.913446359757476e-05, "loss": 0.9293, "step": 990 }, { "epoch": 0.16, "grad_norm": 2.961024944856098, "learning_rate": 1.9132338246655276e-05, "loss": 0.9816, "step": 991 }, { "epoch": 0.16, "grad_norm": 2.7437553596878708, "learning_rate": 1.9130210407835118e-05, "loss": 1.0056, "step": 992 }, { "epoch": 0.16, "grad_norm": 4.271408910655974, "learning_rate": 1.912808008169397e-05, "loss": 0.9524, "step": 993 }, { "epoch": 0.16, "grad_norm": 1.7128479624951833, "learning_rate": 1.9125947268812192e-05, "loss": 0.9256, "step": 994 }, { "epoch": 0.16, "grad_norm": 3.0555930588087215, "learning_rate": 1.9123811969770815e-05, "loss": 0.8362, "step": 995 }, { "epoch": 0.16, "grad_norm": 3.2002374799346556, "learning_rate": 1.9121674185151562e-05, "loss": 0.9521, "step": 996 }, { "epoch": 0.16, "grad_norm": 1.3213099036084992, "learning_rate": 1.911953391553682e-05, "loss": 0.2988, "step": 997 }, { "epoch": 0.16, "grad_norm": 3.118938824091895, "learning_rate": 1.9117391161509657e-05, "loss": 1.0635, "step": 998 }, { "epoch": 0.16, "grad_norm": 3.0544619269634126, "learning_rate": 1.911524592365382e-05, "loss": 0.9771, "step": 999 }, { "epoch": 0.16, "grad_norm": 2.767522301174825, "learning_rate": 1.911309820255373e-05, "loss": 0.9609, "step": 1000 }, { "epoch": 0.16, "grad_norm": 3.726970274425681, "learning_rate": 1.9110947998794484e-05, "loss": 0.9612, "step": 1001 }, { "epoch": 0.16, "grad_norm": 1.7604577400186234, "learning_rate": 1.910879531296186e-05, "loss": 1.0111, "step": 1002 }, { "epoch": 0.16, "grad_norm": 2.558040739676029, "learning_rate": 1.9106640145642308e-05, "loss": 1.0487, "step": 1003 }, { "epoch": 0.16, "grad_norm": 3.3932276989496217, "learning_rate": 1.9104482497422955e-05, "loss": 0.9639, "step": 1004 }, { "epoch": 0.16, "grad_norm": 3.886905960865068, "learning_rate": 1.9102322368891602e-05, "loss": 0.9394, "step": 1005 }, { "epoch": 0.16, "grad_norm": 3.9951138455589787, "learning_rate": 1.9100159760636727e-05, "loss": 0.9383, "step": 1006 }, { "epoch": 0.16, "grad_norm": 2.294713878539717, "learning_rate": 1.9097994673247488e-05, "loss": 0.3143, "step": 1007 }, { "epoch": 0.16, "grad_norm": 2.6482293601379174, "learning_rate": 1.9095827107313713e-05, "loss": 0.9621, "step": 1008 }, { "epoch": 0.16, "grad_norm": 2.9185221809823942, "learning_rate": 1.90936570634259e-05, "loss": 1.0056, "step": 1009 }, { "epoch": 0.16, "grad_norm": 3.647125596406956, "learning_rate": 1.9091484542175244e-05, "loss": 1.0418, "step": 1010 }, { "epoch": 0.16, "grad_norm": 2.4686942758186396, "learning_rate": 1.908930954415358e-05, "loss": 0.9534, "step": 1011 }, { "epoch": 0.16, "grad_norm": 4.0338887885692785, "learning_rate": 1.9087132069953456e-05, "loss": 0.9253, "step": 1012 }, { "epoch": 0.16, "grad_norm": 4.2300320696685985, "learning_rate": 1.908495212016807e-05, "loss": 0.9644, "step": 1013 }, { "epoch": 0.16, "grad_norm": 2.70227090208574, "learning_rate": 1.908276969539129e-05, "loss": 0.9903, "step": 1014 }, { "epoch": 0.16, "grad_norm": 3.300523230947248, "learning_rate": 1.9080584796217683e-05, "loss": 0.9422, "step": 1015 }, { "epoch": 0.16, "grad_norm": 3.6106646987293614, "learning_rate": 1.9078397423242467e-05, "loss": 0.967, "step": 1016 }, { "epoch": 0.16, "grad_norm": 2.8471377807335685, "learning_rate": 1.9076207577061546e-05, "loss": 0.9569, "step": 1017 }, { "epoch": 0.16, "grad_norm": 3.612705164168597, "learning_rate": 1.907401525827149e-05, "loss": 0.862, "step": 1018 }, { "epoch": 0.16, "grad_norm": 3.6630228027500453, "learning_rate": 1.907182046746956e-05, "loss": 0.9771, "step": 1019 }, { "epoch": 0.16, "grad_norm": 3.873991188894009, "learning_rate": 1.906962320525366e-05, "loss": 0.9376, "step": 1020 }, { "epoch": 0.16, "grad_norm": 3.4932162073267623, "learning_rate": 1.90674234722224e-05, "loss": 1.0773, "step": 1021 }, { "epoch": 0.16, "grad_norm": 2.9718526588667182, "learning_rate": 1.9065221268975037e-05, "loss": 1.0548, "step": 1022 }, { "epoch": 0.16, "grad_norm": 2.3322525813950925, "learning_rate": 1.9063016596111516e-05, "loss": 1.1069, "step": 1023 }, { "epoch": 0.16, "grad_norm": 4.421784848214179, "learning_rate": 1.906080945423245e-05, "loss": 1.0928, "step": 1024 }, { "epoch": 0.17, "grad_norm": 2.5674661105555647, "learning_rate": 1.9058599843939127e-05, "loss": 0.345, "step": 1025 }, { "epoch": 0.17, "grad_norm": 3.487779946525196, "learning_rate": 1.9056387765833506e-05, "loss": 0.9832, "step": 1026 }, { "epoch": 0.17, "grad_norm": 1.9025982758933566, "learning_rate": 1.905417322051822e-05, "loss": 0.959, "step": 1027 }, { "epoch": 0.17, "grad_norm": 2.1479862363434723, "learning_rate": 1.9051956208596564e-05, "loss": 0.9962, "step": 1028 }, { "epoch": 0.17, "grad_norm": 2.524664329651962, "learning_rate": 1.9049736730672518e-05, "loss": 1.0087, "step": 1029 }, { "epoch": 0.17, "grad_norm": 3.8956482107204513, "learning_rate": 1.904751478735073e-05, "loss": 0.9933, "step": 1030 }, { "epoch": 0.17, "grad_norm": 3.5243044714881635, "learning_rate": 1.9045290379236517e-05, "loss": 0.9132, "step": 1031 }, { "epoch": 0.17, "grad_norm": 3.7065012545921214, "learning_rate": 1.904306350693587e-05, "loss": 0.9449, "step": 1032 }, { "epoch": 0.17, "grad_norm": 2.9839397332699447, "learning_rate": 1.904083417105545e-05, "loss": 1.0196, "step": 1033 }, { "epoch": 0.17, "grad_norm": 2.0189038008453672, "learning_rate": 1.9038602372202594e-05, "loss": 0.9445, "step": 1034 }, { "epoch": 0.17, "grad_norm": 3.5131591287043396, "learning_rate": 1.9036368110985296e-05, "loss": 1.0575, "step": 1035 }, { "epoch": 0.17, "grad_norm": 3.279609197269761, "learning_rate": 1.9034131388012237e-05, "loss": 0.972, "step": 1036 }, { "epoch": 0.17, "grad_norm": 4.597025631135888, "learning_rate": 1.903189220389276e-05, "loss": 0.9736, "step": 1037 }, { "epoch": 0.17, "grad_norm": 1.3873762264547023, "learning_rate": 1.9029650559236886e-05, "loss": 0.3432, "step": 1038 }, { "epoch": 0.17, "grad_norm": 2.9135463554081826, "learning_rate": 1.9027406454655292e-05, "loss": 0.9815, "step": 1039 }, { "epoch": 0.17, "grad_norm": 3.0458002787064937, "learning_rate": 1.9025159890759336e-05, "loss": 0.9585, "step": 1040 }, { "epoch": 0.17, "grad_norm": 3.280963412893848, "learning_rate": 1.902291086816105e-05, "loss": 0.9884, "step": 1041 }, { "epoch": 0.17, "grad_norm": 3.7007041776562555, "learning_rate": 1.902065938747312e-05, "loss": 0.9904, "step": 1042 }, { "epoch": 0.17, "grad_norm": 2.009880218352941, "learning_rate": 1.901840544930892e-05, "loss": 0.9308, "step": 1043 }, { "epoch": 0.17, "grad_norm": 1.8869159982377375, "learning_rate": 1.9016149054282486e-05, "loss": 0.9901, "step": 1044 }, { "epoch": 0.17, "grad_norm": 2.8938150485083085, "learning_rate": 1.9013890203008512e-05, "loss": 0.9679, "step": 1045 }, { "epoch": 0.17, "grad_norm": 2.737936871650051, "learning_rate": 1.901162889610238e-05, "loss": 0.997, "step": 1046 }, { "epoch": 0.17, "grad_norm": 2.53099303648438, "learning_rate": 1.9009365134180128e-05, "loss": 0.3124, "step": 1047 }, { "epoch": 0.17, "grad_norm": 3.368292748837327, "learning_rate": 1.9007098917858472e-05, "loss": 0.9817, "step": 1048 }, { "epoch": 0.17, "grad_norm": 3.3297065236672796, "learning_rate": 1.9004830247754786e-05, "loss": 0.9785, "step": 1049 }, { "epoch": 0.17, "grad_norm": 4.72899178112959, "learning_rate": 1.9002559124487122e-05, "loss": 1.0118, "step": 1050 }, { "epoch": 0.17, "grad_norm": 2.315800656800441, "learning_rate": 1.9000285548674195e-05, "loss": 0.9669, "step": 1051 }, { "epoch": 0.17, "grad_norm": 2.1928124361553296, "learning_rate": 1.8998009520935388e-05, "loss": 0.9204, "step": 1052 }, { "epoch": 0.17, "grad_norm": 2.8623120175400856, "learning_rate": 1.899573104189076e-05, "loss": 0.932, "step": 1053 }, { "epoch": 0.17, "grad_norm": 2.018218154056816, "learning_rate": 1.8993450112161023e-05, "loss": 0.9389, "step": 1054 }, { "epoch": 0.17, "grad_norm": 2.130356712738677, "learning_rate": 1.8991166732367575e-05, "loss": 1.0208, "step": 1055 }, { "epoch": 0.17, "grad_norm": 4.243071799378103, "learning_rate": 1.8988880903132464e-05, "loss": 1.0553, "step": 1056 }, { "epoch": 0.17, "grad_norm": 1.911360596974061, "learning_rate": 1.8986592625078413e-05, "loss": 0.9963, "step": 1057 }, { "epoch": 0.17, "grad_norm": 2.23290081343374, "learning_rate": 1.8984301898828815e-05, "loss": 0.9318, "step": 1058 }, { "epoch": 0.17, "grad_norm": 3.7272580055571995, "learning_rate": 1.8982008725007725e-05, "loss": 0.9479, "step": 1059 }, { "epoch": 0.17, "grad_norm": 1.7944765088405699, "learning_rate": 1.897971310423987e-05, "loss": 0.9857, "step": 1060 }, { "epoch": 0.17, "grad_norm": 3.656009633567021, "learning_rate": 1.8977415037150638e-05, "loss": 0.9777, "step": 1061 }, { "epoch": 0.17, "grad_norm": 3.1917951479981714, "learning_rate": 1.8975114524366086e-05, "loss": 0.9248, "step": 1062 }, { "epoch": 0.17, "grad_norm": 3.9117644182194127, "learning_rate": 1.8972811566512934e-05, "loss": 0.9734, "step": 1063 }, { "epoch": 0.17, "grad_norm": 3.0498770767205468, "learning_rate": 1.8970506164218578e-05, "loss": 0.9472, "step": 1064 }, { "epoch": 0.17, "grad_norm": 2.7483122588352673, "learning_rate": 1.8968198318111067e-05, "loss": 0.9688, "step": 1065 }, { "epoch": 0.17, "grad_norm": 3.731296818885166, "learning_rate": 1.8965888028819125e-05, "loss": 0.9627, "step": 1066 }, { "epoch": 0.17, "grad_norm": 2.169593985409122, "learning_rate": 1.8963575296972137e-05, "loss": 0.9526, "step": 1067 }, { "epoch": 0.17, "grad_norm": 3.5569280584705063, "learning_rate": 1.8961260123200158e-05, "loss": 0.9799, "step": 1068 }, { "epoch": 0.17, "grad_norm": 2.517122474555102, "learning_rate": 1.89589425081339e-05, "loss": 1.0395, "step": 1069 }, { "epoch": 0.17, "grad_norm": 2.0386421091801545, "learning_rate": 1.895662245240475e-05, "loss": 0.9675, "step": 1070 }, { "epoch": 0.17, "grad_norm": 3.195102824674301, "learning_rate": 1.8954299956644757e-05, "loss": 0.9685, "step": 1071 }, { "epoch": 0.17, "grad_norm": 3.7810199762163217, "learning_rate": 1.8951975021486623e-05, "loss": 1.0438, "step": 1072 }, { "epoch": 0.17, "grad_norm": 2.9306733367376583, "learning_rate": 1.8949647647563733e-05, "loss": 1.0371, "step": 1073 }, { "epoch": 0.17, "grad_norm": 2.8596266338752088, "learning_rate": 1.8947317835510125e-05, "loss": 0.9273, "step": 1074 }, { "epoch": 0.17, "grad_norm": 2.7106868397152355, "learning_rate": 1.8944985585960504e-05, "loss": 0.9962, "step": 1075 }, { "epoch": 0.17, "grad_norm": 3.5513530140765814, "learning_rate": 1.8942650899550237e-05, "loss": 0.93, "step": 1076 }, { "epoch": 0.17, "grad_norm": 3.280212238237163, "learning_rate": 1.8940313776915365e-05, "loss": 0.9449, "step": 1077 }, { "epoch": 0.17, "grad_norm": 3.022214499242427, "learning_rate": 1.8937974218692577e-05, "loss": 0.9737, "step": 1078 }, { "epoch": 0.17, "grad_norm": 2.808498163413842, "learning_rate": 1.8935632225519232e-05, "loss": 0.9298, "step": 1079 }, { "epoch": 0.17, "grad_norm": 2.390185945372157, "learning_rate": 1.893328779803336e-05, "loss": 0.9268, "step": 1080 }, { "epoch": 0.17, "grad_norm": 4.569285115894708, "learning_rate": 1.8930940936873644e-05, "loss": 0.9874, "step": 1081 }, { "epoch": 0.17, "grad_norm": 2.003153579674666, "learning_rate": 1.892859164267943e-05, "loss": 0.9876, "step": 1082 }, { "epoch": 0.17, "grad_norm": 2.8440772905752096, "learning_rate": 1.892623991609074e-05, "loss": 0.9038, "step": 1083 }, { "epoch": 0.17, "grad_norm": 2.898687223385537, "learning_rate": 1.892388575774824e-05, "loss": 0.9444, "step": 1084 }, { "epoch": 0.17, "grad_norm": 3.080538350653371, "learning_rate": 1.892152916829327e-05, "loss": 0.9505, "step": 1085 }, { "epoch": 0.17, "grad_norm": 2.9752013130355364, "learning_rate": 1.8919170148367837e-05, "loss": 1.0062, "step": 1086 }, { "epoch": 0.18, "grad_norm": 3.276915530212959, "learning_rate": 1.891680869861459e-05, "loss": 0.9465, "step": 1087 }, { "epoch": 0.18, "grad_norm": 1.4619337958441883, "learning_rate": 1.8914444819676866e-05, "loss": 0.3125, "step": 1088 }, { "epoch": 0.18, "grad_norm": 3.4659716502426337, "learning_rate": 1.8912078512198642e-05, "loss": 0.9578, "step": 1089 }, { "epoch": 0.18, "grad_norm": 3.34386693446965, "learning_rate": 1.890970977682457e-05, "loss": 1.007, "step": 1090 }, { "epoch": 0.18, "grad_norm": 2.7387852443027367, "learning_rate": 1.8907338614199956e-05, "loss": 0.9226, "step": 1091 }, { "epoch": 0.18, "grad_norm": 3.3202684107826452, "learning_rate": 1.8904965024970772e-05, "loss": 1.0006, "step": 1092 }, { "epoch": 0.18, "grad_norm": 4.246773111834834, "learning_rate": 1.8902589009783648e-05, "loss": 1.0161, "step": 1093 }, { "epoch": 0.18, "grad_norm": 3.0050489536556952, "learning_rate": 1.8900210569285877e-05, "loss": 0.9652, "step": 1094 }, { "epoch": 0.18, "grad_norm": 1.9123349236946128, "learning_rate": 1.889782970412541e-05, "loss": 0.3406, "step": 1095 }, { "epoch": 0.18, "grad_norm": 1.3482611583981485, "learning_rate": 1.8895446414950864e-05, "loss": 1.0197, "step": 1096 }, { "epoch": 0.18, "grad_norm": 2.335902593345481, "learning_rate": 1.889306070241151e-05, "loss": 0.9996, "step": 1097 }, { "epoch": 0.18, "grad_norm": 3.4533810067537605, "learning_rate": 1.8890672567157278e-05, "loss": 0.9849, "step": 1098 }, { "epoch": 0.18, "grad_norm": 4.133576003508612, "learning_rate": 1.888828200983877e-05, "loss": 0.974, "step": 1099 }, { "epoch": 0.18, "grad_norm": 4.23059070583966, "learning_rate": 1.8885889031107232e-05, "loss": 0.9746, "step": 1100 }, { "epoch": 0.18, "grad_norm": 2.3145270870961645, "learning_rate": 1.8883493631614583e-05, "loss": 1.0484, "step": 1101 }, { "epoch": 0.18, "grad_norm": 2.3682473205403056, "learning_rate": 1.8881095812013394e-05, "loss": 1.0167, "step": 1102 }, { "epoch": 0.18, "grad_norm": 2.4808171545732765, "learning_rate": 1.8878695572956896e-05, "loss": 1.0695, "step": 1103 }, { "epoch": 0.18, "grad_norm": 3.064560800171734, "learning_rate": 1.8876292915098982e-05, "loss": 1.0119, "step": 1104 }, { "epoch": 0.18, "grad_norm": 3.095482851488973, "learning_rate": 1.8873887839094202e-05, "loss": 1.0007, "step": 1105 }, { "epoch": 0.18, "grad_norm": 3.016463405301795, "learning_rate": 1.8871480345597763e-05, "loss": 1.0233, "step": 1106 }, { "epoch": 0.18, "grad_norm": 1.9151777482769365, "learning_rate": 1.8869070435265535e-05, "loss": 0.9485, "step": 1107 }, { "epoch": 0.18, "grad_norm": 2.533436713702639, "learning_rate": 1.8866658108754045e-05, "loss": 0.9921, "step": 1108 }, { "epoch": 0.18, "grad_norm": 3.077004165681307, "learning_rate": 1.886424336672047e-05, "loss": 1.0034, "step": 1109 }, { "epoch": 0.18, "grad_norm": 4.288199249278614, "learning_rate": 1.8861826209822662e-05, "loss": 0.9745, "step": 1110 }, { "epoch": 0.18, "grad_norm": 2.691130824730416, "learning_rate": 1.8859406638719117e-05, "loss": 0.9528, "step": 1111 }, { "epoch": 0.18, "grad_norm": 2.8020298514290602, "learning_rate": 1.885698465406899e-05, "loss": 0.8955, "step": 1112 }, { "epoch": 0.18, "grad_norm": 3.202853754235511, "learning_rate": 1.8854560256532098e-05, "loss": 0.9586, "step": 1113 }, { "epoch": 0.18, "grad_norm": 2.486521627203606, "learning_rate": 1.885213344676892e-05, "loss": 1.0178, "step": 1114 }, { "epoch": 0.18, "grad_norm": 3.680811230558074, "learning_rate": 1.8849704225440578e-05, "loss": 0.942, "step": 1115 }, { "epoch": 0.18, "grad_norm": 3.2673526556158072, "learning_rate": 1.884727259320886e-05, "loss": 0.9949, "step": 1116 }, { "epoch": 0.18, "grad_norm": 2.415701201940775, "learning_rate": 1.8844838550736215e-05, "loss": 0.9578, "step": 1117 }, { "epoch": 0.18, "grad_norm": 4.335159919367365, "learning_rate": 1.8842402098685735e-05, "loss": 0.9957, "step": 1118 }, { "epoch": 0.18, "grad_norm": 3.701245052047045, "learning_rate": 1.883996323772118e-05, "loss": 0.9625, "step": 1119 }, { "epoch": 0.18, "grad_norm": 3.654750288170763, "learning_rate": 1.883752196850697e-05, "loss": 1.0192, "step": 1120 }, { "epoch": 0.18, "grad_norm": 2.751010622376346, "learning_rate": 1.8835078291708164e-05, "loss": 0.9905, "step": 1121 }, { "epoch": 0.18, "grad_norm": 2.5929323309363363, "learning_rate": 1.8832632207990493e-05, "loss": 0.9336, "step": 1122 }, { "epoch": 0.18, "grad_norm": 3.5466650005805134, "learning_rate": 1.883018371802033e-05, "loss": 0.9999, "step": 1123 }, { "epoch": 0.18, "grad_norm": 3.3277283911276756, "learning_rate": 1.8827732822464723e-05, "loss": 1.0139, "step": 1124 }, { "epoch": 0.18, "grad_norm": 3.8716756035029287, "learning_rate": 1.882527952199136e-05, "loss": 0.9076, "step": 1125 }, { "epoch": 0.18, "grad_norm": 3.6353839920164113, "learning_rate": 1.8822823817268577e-05, "loss": 0.9446, "step": 1126 }, { "epoch": 0.18, "grad_norm": 3.133344419608558, "learning_rate": 1.8820365708965385e-05, "loss": 0.9566, "step": 1127 }, { "epoch": 0.18, "grad_norm": 2.085316997919387, "learning_rate": 1.881790519775144e-05, "loss": 0.304, "step": 1128 }, { "epoch": 0.18, "grad_norm": 2.3480780757290076, "learning_rate": 1.8815442284297055e-05, "loss": 0.9688, "step": 1129 }, { "epoch": 0.18, "grad_norm": 2.33831298888155, "learning_rate": 1.881297696927319e-05, "loss": 0.9593, "step": 1130 }, { "epoch": 0.18, "grad_norm": 2.200564689123973, "learning_rate": 1.881050925335147e-05, "loss": 1.0534, "step": 1131 }, { "epoch": 0.18, "grad_norm": 4.963383301499837, "learning_rate": 1.8808039137204162e-05, "loss": 0.9354, "step": 1132 }, { "epoch": 0.18, "grad_norm": 2.3156902885756896, "learning_rate": 1.8805566621504203e-05, "loss": 0.9886, "step": 1133 }, { "epoch": 0.18, "grad_norm": 2.716180573799331, "learning_rate": 1.8803091706925168e-05, "loss": 0.9748, "step": 1134 }, { "epoch": 0.18, "grad_norm": 2.481404381856941, "learning_rate": 1.8800614394141292e-05, "loss": 0.995, "step": 1135 }, { "epoch": 0.18, "grad_norm": 2.9103759682680796, "learning_rate": 1.8798134683827464e-05, "loss": 0.9799, "step": 1136 }, { "epoch": 0.18, "grad_norm": 2.5177478244784597, "learning_rate": 1.879565257665923e-05, "loss": 0.9578, "step": 1137 }, { "epoch": 0.18, "grad_norm": 2.2937667471874574, "learning_rate": 1.8793168073312776e-05, "loss": 0.2872, "step": 1138 }, { "epoch": 0.18, "grad_norm": 3.378587017524538, "learning_rate": 1.8790681174464956e-05, "loss": 0.9606, "step": 1139 }, { "epoch": 0.18, "grad_norm": 3.3648514931091253, "learning_rate": 1.8788191880793264e-05, "loss": 0.9778, "step": 1140 }, { "epoch": 0.18, "grad_norm": 4.354471929091977, "learning_rate": 1.878570019297586e-05, "loss": 0.8959, "step": 1141 }, { "epoch": 0.18, "grad_norm": 4.138021780805757, "learning_rate": 1.8783206111691543e-05, "loss": 1.0184, "step": 1142 }, { "epoch": 0.18, "grad_norm": 2.273853381943454, "learning_rate": 1.878070963761977e-05, "loss": 0.993, "step": 1143 }, { "epoch": 0.18, "grad_norm": 3.731376817040143, "learning_rate": 1.877821077144065e-05, "loss": 0.8854, "step": 1144 }, { "epoch": 0.18, "grad_norm": 3.444449734085561, "learning_rate": 1.8775709513834945e-05, "loss": 0.9602, "step": 1145 }, { "epoch": 0.18, "grad_norm": 2.4996029538529516, "learning_rate": 1.8773205865484065e-05, "loss": 0.9549, "step": 1146 }, { "epoch": 0.18, "grad_norm": 2.966364172897522, "learning_rate": 1.8770699827070068e-05, "loss": 0.9644, "step": 1147 }, { "epoch": 0.18, "grad_norm": 3.6651617487542416, "learning_rate": 1.8768191399275678e-05, "loss": 1.016, "step": 1148 }, { "epoch": 0.19, "grad_norm": 2.1504593646767605, "learning_rate": 1.876568058278425e-05, "loss": 0.3264, "step": 1149 }, { "epoch": 0.19, "grad_norm": 3.234284035698129, "learning_rate": 1.87631673782798e-05, "loss": 0.9601, "step": 1150 }, { "epoch": 0.19, "grad_norm": 3.251053419319054, "learning_rate": 1.8760651786447006e-05, "loss": 0.9689, "step": 1151 }, { "epoch": 0.19, "grad_norm": 1.651797878559347, "learning_rate": 1.8758133807971175e-05, "loss": 0.346, "step": 1152 }, { "epoch": 0.19, "grad_norm": 3.0936249987077287, "learning_rate": 1.8755613443538275e-05, "loss": 0.9307, "step": 1153 }, { "epoch": 0.19, "grad_norm": 2.1682849000549713, "learning_rate": 1.875309069383492e-05, "loss": 1.0559, "step": 1154 }, { "epoch": 0.19, "grad_norm": 3.9853315813254926, "learning_rate": 1.875056555954838e-05, "loss": 0.9718, "step": 1155 }, { "epoch": 0.19, "grad_norm": 2.574220602356124, "learning_rate": 1.8748038041366573e-05, "loss": 0.9598, "step": 1156 }, { "epoch": 0.19, "grad_norm": 2.8856917891643907, "learning_rate": 1.8745508139978063e-05, "loss": 0.9537, "step": 1157 }, { "epoch": 0.19, "grad_norm": 3.0430992793831306, "learning_rate": 1.8742975856072064e-05, "loss": 1.0053, "step": 1158 }, { "epoch": 0.19, "grad_norm": 2.938384389651922, "learning_rate": 1.874044119033844e-05, "loss": 0.909, "step": 1159 }, { "epoch": 0.19, "grad_norm": 1.0665667626166926, "learning_rate": 1.8737904143467703e-05, "loss": 0.965, "step": 1160 }, { "epoch": 0.19, "grad_norm": 1.963097769212274, "learning_rate": 1.8735364716151017e-05, "loss": 0.9983, "step": 1161 }, { "epoch": 0.19, "grad_norm": 3.252375688105104, "learning_rate": 1.873282290908019e-05, "loss": 0.9767, "step": 1162 }, { "epoch": 0.19, "grad_norm": 2.7768330164793693, "learning_rate": 1.873027872294768e-05, "loss": 0.9404, "step": 1163 }, { "epoch": 0.19, "grad_norm": 3.193849261210134, "learning_rate": 1.8727732158446598e-05, "loss": 0.9326, "step": 1164 }, { "epoch": 0.19, "grad_norm": 2.786542935141755, "learning_rate": 1.872518321627069e-05, "loss": 1.0347, "step": 1165 }, { "epoch": 0.19, "grad_norm": 3.276218152813799, "learning_rate": 1.8722631897114364e-05, "loss": 0.9284, "step": 1166 }, { "epoch": 0.19, "grad_norm": 1.5561871987046125, "learning_rate": 1.872007820167267e-05, "loss": 0.9454, "step": 1167 }, { "epoch": 0.19, "grad_norm": 2.691776864676748, "learning_rate": 1.8717522130641305e-05, "loss": 0.9329, "step": 1168 }, { "epoch": 0.19, "grad_norm": 3.6051765893866587, "learning_rate": 1.8714963684716605e-05, "loss": 1.0194, "step": 1169 }, { "epoch": 0.19, "grad_norm": 2.9944043425935916, "learning_rate": 1.8712402864595575e-05, "loss": 0.9228, "step": 1170 }, { "epoch": 0.19, "grad_norm": 2.365898158882915, "learning_rate": 1.8709839670975843e-05, "loss": 0.3071, "step": 1171 }, { "epoch": 0.19, "grad_norm": 3.363616394995949, "learning_rate": 1.8707274104555696e-05, "loss": 0.954, "step": 1172 }, { "epoch": 0.19, "grad_norm": 2.335243759991471, "learning_rate": 1.8704706166034067e-05, "loss": 1.0206, "step": 1173 }, { "epoch": 0.19, "grad_norm": 1.1949656363458425, "learning_rate": 1.870213585611053e-05, "loss": 1.0471, "step": 1174 }, { "epoch": 0.19, "grad_norm": 2.2629573507297756, "learning_rate": 1.869956317548531e-05, "loss": 0.8954, "step": 1175 }, { "epoch": 0.19, "grad_norm": 1.1346668193826748, "learning_rate": 1.8696988124859278e-05, "loss": 1.0041, "step": 1176 }, { "epoch": 0.19, "grad_norm": 2.8120007389713364, "learning_rate": 1.8694410704933944e-05, "loss": 0.9489, "step": 1177 }, { "epoch": 0.19, "grad_norm": 4.333928263001451, "learning_rate": 1.8691830916411473e-05, "loss": 0.97, "step": 1178 }, { "epoch": 0.19, "grad_norm": 2.8880158066884984, "learning_rate": 1.868924875999467e-05, "loss": 0.9132, "step": 1179 }, { "epoch": 0.19, "grad_norm": 3.023479609233165, "learning_rate": 1.8686664236386984e-05, "loss": 1.0147, "step": 1180 }, { "epoch": 0.19, "grad_norm": 2.8741050654182754, "learning_rate": 1.8684077346292507e-05, "loss": 0.9447, "step": 1181 }, { "epoch": 0.19, "grad_norm": 2.714715880531832, "learning_rate": 1.8681488090415984e-05, "loss": 0.985, "step": 1182 }, { "epoch": 0.19, "grad_norm": 2.339896123487831, "learning_rate": 1.86788964694628e-05, "loss": 0.9052, "step": 1183 }, { "epoch": 0.19, "grad_norm": 5.044508624310418, "learning_rate": 1.8676302484138982e-05, "loss": 1.0032, "step": 1184 }, { "epoch": 0.19, "grad_norm": 3.454404015816938, "learning_rate": 1.8673706135151205e-05, "loss": 0.9432, "step": 1185 }, { "epoch": 0.19, "grad_norm": 3.261874294569645, "learning_rate": 1.8671107423206785e-05, "loss": 0.9465, "step": 1186 }, { "epoch": 0.19, "grad_norm": 2.8914411062898338, "learning_rate": 1.866850634901368e-05, "loss": 0.9837, "step": 1187 }, { "epoch": 0.19, "grad_norm": 4.750275252548592, "learning_rate": 1.8665902913280498e-05, "loss": 1.0121, "step": 1188 }, { "epoch": 0.19, "grad_norm": 1.9896225517628403, "learning_rate": 1.8663297116716488e-05, "loss": 0.9542, "step": 1189 }, { "epoch": 0.19, "grad_norm": 2.2965886982092973, "learning_rate": 1.8660688960031533e-05, "loss": 0.9331, "step": 1190 }, { "epoch": 0.19, "grad_norm": 3.5390781292770233, "learning_rate": 1.8658078443936175e-05, "loss": 1.0388, "step": 1191 }, { "epoch": 0.19, "grad_norm": 1.9458602685582072, "learning_rate": 1.865546556914159e-05, "loss": 0.344, "step": 1192 }, { "epoch": 0.19, "grad_norm": 1.970966060704273, "learning_rate": 1.865285033635959e-05, "loss": 0.9032, "step": 1193 }, { "epoch": 0.19, "grad_norm": 2.3454741176396157, "learning_rate": 1.8650232746302645e-05, "loss": 1.0249, "step": 1194 }, { "epoch": 0.19, "grad_norm": 3.4204948812334077, "learning_rate": 1.8647612799683853e-05, "loss": 0.9746, "step": 1195 }, { "epoch": 0.19, "grad_norm": 2.499832243059266, "learning_rate": 1.864499049721696e-05, "loss": 0.963, "step": 1196 }, { "epoch": 0.19, "grad_norm": 4.099406055135647, "learning_rate": 1.8642365839616358e-05, "loss": 0.9406, "step": 1197 }, { "epoch": 0.19, "grad_norm": 2.9928647064311105, "learning_rate": 1.863973882759707e-05, "loss": 0.9759, "step": 1198 }, { "epoch": 0.19, "grad_norm": 4.421600872479638, "learning_rate": 1.863710946187477e-05, "loss": 1.0087, "step": 1199 }, { "epoch": 0.19, "grad_norm": 2.0712227601658406, "learning_rate": 1.8634477743165772e-05, "loss": 0.2916, "step": 1200 }, { "epoch": 0.19, "grad_norm": 1.8521105120294905, "learning_rate": 1.863184367218702e-05, "loss": 0.865, "step": 1201 }, { "epoch": 0.19, "grad_norm": 3.2097645494381033, "learning_rate": 1.862920724965612e-05, "loss": 0.9432, "step": 1202 }, { "epoch": 0.19, "grad_norm": 1.0721544440879491, "learning_rate": 1.8626568476291296e-05, "loss": 0.9456, "step": 1203 }, { "epoch": 0.19, "grad_norm": 3.4853195483959793, "learning_rate": 1.8623927352811432e-05, "loss": 1.0044, "step": 1204 }, { "epoch": 0.19, "grad_norm": 2.530525386373174, "learning_rate": 1.862128387993603e-05, "loss": 0.9449, "step": 1205 }, { "epoch": 0.19, "grad_norm": 3.459506942589888, "learning_rate": 1.8618638058385255e-05, "loss": 0.8698, "step": 1206 }, { "epoch": 0.19, "grad_norm": 3.4906286391578223, "learning_rate": 1.8615989888879898e-05, "loss": 1.0231, "step": 1207 }, { "epoch": 0.19, "grad_norm": 2.793620683036786, "learning_rate": 1.8613339372141395e-05, "loss": 0.9745, "step": 1208 }, { "epoch": 0.19, "grad_norm": 3.445379208980741, "learning_rate": 1.861068650889182e-05, "loss": 0.9902, "step": 1209 }, { "epoch": 0.19, "grad_norm": 1.7131687751863678, "learning_rate": 1.8608031299853882e-05, "loss": 0.3247, "step": 1210 }, { "epoch": 0.2, "grad_norm": 2.1133649601460105, "learning_rate": 1.860537374575094e-05, "loss": 0.9977, "step": 1211 }, { "epoch": 0.2, "grad_norm": 2.857637471574257, "learning_rate": 1.860271384730698e-05, "loss": 0.9996, "step": 1212 }, { "epoch": 0.2, "grad_norm": 3.314216600834365, "learning_rate": 1.860005160524663e-05, "loss": 0.9962, "step": 1213 }, { "epoch": 0.2, "grad_norm": 2.4585356117262114, "learning_rate": 1.859738702029516e-05, "loss": 0.927, "step": 1214 }, { "epoch": 0.2, "grad_norm": 2.5149426215965835, "learning_rate": 1.859472009317848e-05, "loss": 0.3197, "step": 1215 }, { "epoch": 0.2, "grad_norm": 4.083649966880358, "learning_rate": 1.8592050824623133e-05, "loss": 0.983, "step": 1216 }, { "epoch": 0.2, "grad_norm": 2.8891685321964315, "learning_rate": 1.8589379215356302e-05, "loss": 0.9798, "step": 1217 }, { "epoch": 0.2, "grad_norm": 4.43376539797135, "learning_rate": 1.8586705266105802e-05, "loss": 0.939, "step": 1218 }, { "epoch": 0.2, "grad_norm": 2.0153446446104595, "learning_rate": 1.858402897760009e-05, "loss": 1.0201, "step": 1219 }, { "epoch": 0.2, "grad_norm": 1.5339259685124413, "learning_rate": 1.858135035056827e-05, "loss": 0.3381, "step": 1220 }, { "epoch": 0.2, "grad_norm": 1.7521315263734256, "learning_rate": 1.8578669385740065e-05, "loss": 1.0375, "step": 1221 }, { "epoch": 0.2, "grad_norm": 3.728196655505718, "learning_rate": 1.8575986083845848e-05, "loss": 1.0298, "step": 1222 }, { "epoch": 0.2, "grad_norm": 2.9645912354382524, "learning_rate": 1.8573300445616628e-05, "loss": 1.0204, "step": 1223 }, { "epoch": 0.2, "grad_norm": 3.4385418786841675, "learning_rate": 1.8570612471784034e-05, "loss": 0.9639, "step": 1224 }, { "epoch": 0.2, "grad_norm": 3.1212886897428755, "learning_rate": 1.8567922163080357e-05, "loss": 0.9822, "step": 1225 }, { "epoch": 0.2, "grad_norm": 4.413627994723351, "learning_rate": 1.8565229520238504e-05, "loss": 1.0375, "step": 1226 }, { "epoch": 0.2, "grad_norm": 1.6164630243831741, "learning_rate": 1.856253454399203e-05, "loss": 1.076, "step": 1227 }, { "epoch": 0.2, "grad_norm": 3.465430656191743, "learning_rate": 1.855983723507512e-05, "loss": 0.937, "step": 1228 }, { "epoch": 0.2, "grad_norm": 2.486804086537445, "learning_rate": 1.8557137594222588e-05, "loss": 0.9438, "step": 1229 }, { "epoch": 0.2, "grad_norm": 3.2665920742108088, "learning_rate": 1.8554435622169903e-05, "loss": 0.8951, "step": 1230 }, { "epoch": 0.2, "grad_norm": 2.687281754969289, "learning_rate": 1.8551731319653147e-05, "loss": 0.964, "step": 1231 }, { "epoch": 0.2, "grad_norm": 1.86226781639562, "learning_rate": 1.854902468740905e-05, "loss": 0.9332, "step": 1232 }, { "epoch": 0.2, "grad_norm": 3.256197082909605, "learning_rate": 1.8546315726174973e-05, "loss": 0.9683, "step": 1233 }, { "epoch": 0.2, "grad_norm": 3.398237691683689, "learning_rate": 1.854360443668891e-05, "loss": 0.9609, "step": 1234 }, { "epoch": 0.2, "grad_norm": 3.3345784405010614, "learning_rate": 1.8540890819689497e-05, "loss": 0.9662, "step": 1235 }, { "epoch": 0.2, "grad_norm": 2.4845039766049126, "learning_rate": 1.8538174875915992e-05, "loss": 0.9006, "step": 1236 }, { "epoch": 0.2, "grad_norm": 4.144612015810006, "learning_rate": 1.853545660610829e-05, "loss": 1.0109, "step": 1237 }, { "epoch": 0.2, "grad_norm": 3.696890302611899, "learning_rate": 1.853273601100693e-05, "loss": 0.979, "step": 1238 }, { "epoch": 0.2, "grad_norm": 2.3988630184486395, "learning_rate": 1.8530013091353075e-05, "loss": 0.9395, "step": 1239 }, { "epoch": 0.2, "grad_norm": 2.418942848422998, "learning_rate": 1.8527287847888524e-05, "loss": 1.018, "step": 1240 }, { "epoch": 0.2, "grad_norm": 3.080860607001386, "learning_rate": 1.8524560281355705e-05, "loss": 1.0053, "step": 1241 }, { "epoch": 0.2, "grad_norm": 4.043708652479515, "learning_rate": 1.8521830392497685e-05, "loss": 0.8975, "step": 1242 }, { "epoch": 0.2, "grad_norm": 2.9682690682612787, "learning_rate": 1.8519098182058163e-05, "loss": 0.3388, "step": 1243 }, { "epoch": 0.2, "grad_norm": 1.5705866028797602, "learning_rate": 1.8516363650781464e-05, "loss": 0.9856, "step": 1244 }, { "epoch": 0.2, "grad_norm": 3.142678419827848, "learning_rate": 1.851362679941255e-05, "loss": 0.987, "step": 1245 }, { "epoch": 0.2, "grad_norm": 2.595201017499035, "learning_rate": 1.851088762869702e-05, "loss": 1.0207, "step": 1246 }, { "epoch": 0.2, "grad_norm": 2.7119369203663988, "learning_rate": 1.850814613938109e-05, "loss": 0.943, "step": 1247 }, { "epoch": 0.2, "grad_norm": 4.29619179582723, "learning_rate": 1.850540233221163e-05, "loss": 0.9291, "step": 1248 }, { "epoch": 0.2, "grad_norm": 3.4248584300388396, "learning_rate": 1.8502656207936117e-05, "loss": 1.0167, "step": 1249 }, { "epoch": 0.2, "grad_norm": 3.3344350106899876, "learning_rate": 1.849990776730268e-05, "loss": 0.8932, "step": 1250 }, { "epoch": 0.2, "grad_norm": 4.171160347409199, "learning_rate": 1.849715701106006e-05, "loss": 0.9651, "step": 1251 }, { "epoch": 0.2, "grad_norm": 1.929228013658176, "learning_rate": 1.849440393995765e-05, "loss": 0.9936, "step": 1252 }, { "epoch": 0.2, "grad_norm": 3.987078060200212, "learning_rate": 1.8491648554745457e-05, "loss": 0.9648, "step": 1253 }, { "epoch": 0.2, "grad_norm": 1.1596678659539428, "learning_rate": 1.848889085617412e-05, "loss": 0.9894, "step": 1254 }, { "epoch": 0.2, "grad_norm": 3.8675529538495113, "learning_rate": 1.848613084499492e-05, "loss": 0.9617, "step": 1255 }, { "epoch": 0.2, "grad_norm": 1.888872705963723, "learning_rate": 1.8483368521959753e-05, "loss": 0.9843, "step": 1256 }, { "epoch": 0.2, "grad_norm": 2.1293442021934346, "learning_rate": 1.8480603887821157e-05, "loss": 0.9685, "step": 1257 }, { "epoch": 0.2, "grad_norm": 2.7479981158498554, "learning_rate": 1.8477836943332295e-05, "loss": 0.9451, "step": 1258 }, { "epoch": 0.2, "grad_norm": 4.1690495098904705, "learning_rate": 1.8475067689246954e-05, "loss": 0.9512, "step": 1259 }, { "epoch": 0.2, "grad_norm": 3.1931992983578716, "learning_rate": 1.8472296126319557e-05, "loss": 0.978, "step": 1260 }, { "epoch": 0.2, "grad_norm": 4.301600074257547, "learning_rate": 1.8469522255305156e-05, "loss": 0.9342, "step": 1261 }, { "epoch": 0.2, "grad_norm": 2.1510790312864687, "learning_rate": 1.846674607695943e-05, "loss": 0.9632, "step": 1262 }, { "epoch": 0.2, "grad_norm": 2.6027143981157788, "learning_rate": 1.8463967592038684e-05, "loss": 0.9977, "step": 1263 }, { "epoch": 0.2, "grad_norm": 2.969037333185969, "learning_rate": 1.8461186801299857e-05, "loss": 0.9398, "step": 1264 }, { "epoch": 0.2, "grad_norm": 2.4253650744421407, "learning_rate": 1.845840370550051e-05, "loss": 0.9613, "step": 1265 }, { "epoch": 0.2, "grad_norm": 3.042202854362655, "learning_rate": 1.8455618305398836e-05, "loss": 0.9497, "step": 1266 }, { "epoch": 0.2, "grad_norm": 2.022295183861383, "learning_rate": 1.8452830601753655e-05, "loss": 1.0426, "step": 1267 }, { "epoch": 0.2, "grad_norm": 2.6335625641276885, "learning_rate": 1.8450040595324416e-05, "loss": 1.0145, "step": 1268 }, { "epoch": 0.2, "grad_norm": 1.6264218199006, "learning_rate": 1.844724828687119e-05, "loss": 0.3216, "step": 1269 }, { "epoch": 0.2, "grad_norm": 3.632732562754594, "learning_rate": 1.8444453677154683e-05, "loss": 0.9256, "step": 1270 }, { "epoch": 0.2, "grad_norm": 2.1606009992966357, "learning_rate": 1.844165676693622e-05, "loss": 1.07, "step": 1271 }, { "epoch": 0.2, "grad_norm": 3.7371575750458605, "learning_rate": 1.843885755697776e-05, "loss": 0.9538, "step": 1272 }, { "epoch": 0.21, "grad_norm": 2.7528085238778455, "learning_rate": 1.843605604804188e-05, "loss": 0.9741, "step": 1273 }, { "epoch": 0.21, "grad_norm": 2.764835568276076, "learning_rate": 1.8433252240891793e-05, "loss": 0.9315, "step": 1274 }, { "epoch": 0.21, "grad_norm": 3.12146024497736, "learning_rate": 1.8430446136291333e-05, "loss": 0.9825, "step": 1275 }, { "epoch": 0.21, "grad_norm": 1.970363863730038, "learning_rate": 1.8427637735004957e-05, "loss": 1.025, "step": 1276 }, { "epoch": 0.21, "grad_norm": 2.2910614139179573, "learning_rate": 1.8424827037797755e-05, "loss": 0.877, "step": 1277 }, { "epoch": 0.21, "grad_norm": 3.401068570009684, "learning_rate": 1.8422014045435433e-05, "loss": 0.8956, "step": 1278 }, { "epoch": 0.21, "grad_norm": 3.144538831997847, "learning_rate": 1.841919875868433e-05, "loss": 0.9742, "step": 1279 }, { "epoch": 0.21, "grad_norm": 2.3677834745724735, "learning_rate": 1.841638117831141e-05, "loss": 1.0376, "step": 1280 }, { "epoch": 0.21, "grad_norm": 4.333819997798181, "learning_rate": 1.8413561305084262e-05, "loss": 0.9552, "step": 1281 }, { "epoch": 0.21, "grad_norm": 2.861024307942386, "learning_rate": 1.841073913977109e-05, "loss": 0.971, "step": 1282 }, { "epoch": 0.21, "grad_norm": 2.3089152207503307, "learning_rate": 1.8407914683140734e-05, "loss": 0.9328, "step": 1283 }, { "epoch": 0.21, "grad_norm": 2.094084186359117, "learning_rate": 1.840508793596265e-05, "loss": 0.3606, "step": 1284 }, { "epoch": 0.21, "grad_norm": 2.4224033886803897, "learning_rate": 1.8402258899006926e-05, "loss": 1.0573, "step": 1285 }, { "epoch": 0.21, "grad_norm": 3.52940806734602, "learning_rate": 1.839942757304427e-05, "loss": 0.939, "step": 1286 }, { "epoch": 0.21, "grad_norm": 1.6150249820583982, "learning_rate": 1.8396593958846013e-05, "loss": 1.0301, "step": 1287 }, { "epoch": 0.21, "grad_norm": 3.588676254487368, "learning_rate": 1.8393758057184104e-05, "loss": 0.93, "step": 1288 }, { "epoch": 0.21, "grad_norm": 2.632598053210103, "learning_rate": 1.8390919868831126e-05, "loss": 0.97, "step": 1289 }, { "epoch": 0.21, "grad_norm": 2.652229677424318, "learning_rate": 1.838807939456028e-05, "loss": 0.9229, "step": 1290 }, { "epoch": 0.21, "grad_norm": 3.852812094254062, "learning_rate": 1.8385236635145385e-05, "loss": 0.925, "step": 1291 }, { "epoch": 0.21, "grad_norm": 4.854601708210605, "learning_rate": 1.8382391591360895e-05, "loss": 0.9544, "step": 1292 }, { "epoch": 0.21, "grad_norm": 2.3428813087400013, "learning_rate": 1.837954426398187e-05, "loss": 0.9507, "step": 1293 }, { "epoch": 0.21, "grad_norm": 1.6251654907468511, "learning_rate": 1.8376694653784003e-05, "loss": 0.9854, "step": 1294 }, { "epoch": 0.21, "grad_norm": 2.346123371070655, "learning_rate": 1.837384276154361e-05, "loss": 0.9745, "step": 1295 }, { "epoch": 0.21, "grad_norm": 3.82611741343163, "learning_rate": 1.8370988588037622e-05, "loss": 0.9583, "step": 1296 }, { "epoch": 0.21, "grad_norm": 2.6248008107496577, "learning_rate": 1.8368132134043596e-05, "loss": 0.9421, "step": 1297 }, { "epoch": 0.21, "grad_norm": 2.669159786736749, "learning_rate": 1.836527340033971e-05, "loss": 0.9538, "step": 1298 }, { "epoch": 0.21, "grad_norm": 2.200730449616976, "learning_rate": 1.8362412387704752e-05, "loss": 0.3053, "step": 1299 }, { "epoch": 0.21, "grad_norm": 3.1861388348192947, "learning_rate": 1.8359549096918154e-05, "loss": 0.9473, "step": 1300 }, { "epoch": 0.21, "grad_norm": 2.4275027937239533, "learning_rate": 1.8356683528759948e-05, "loss": 0.9839, "step": 1301 }, { "epoch": 0.21, "grad_norm": 2.3975376093582668, "learning_rate": 1.8353815684010796e-05, "loss": 0.9793, "step": 1302 }, { "epoch": 0.21, "grad_norm": 2.425847886501712, "learning_rate": 1.835094556345198e-05, "loss": 0.8879, "step": 1303 }, { "epoch": 0.21, "grad_norm": 3.01171146693753, "learning_rate": 1.834807316786539e-05, "loss": 1.0251, "step": 1304 }, { "epoch": 0.21, "grad_norm": 1.8853448956003278, "learning_rate": 1.834519849803356e-05, "loss": 0.3301, "step": 1305 }, { "epoch": 0.21, "grad_norm": 2.5425496261389657, "learning_rate": 1.834232155473962e-05, "loss": 1.0069, "step": 1306 }, { "epoch": 0.21, "grad_norm": 2.7609162844159587, "learning_rate": 1.833944233876733e-05, "loss": 0.962, "step": 1307 }, { "epoch": 0.21, "grad_norm": 1.8921544847829042, "learning_rate": 1.833656085090107e-05, "loss": 1.0007, "step": 1308 }, { "epoch": 0.21, "grad_norm": 2.2076299614845967, "learning_rate": 1.8333677091925834e-05, "loss": 0.9332, "step": 1309 }, { "epoch": 0.21, "grad_norm": 3.304491835408767, "learning_rate": 1.8330791062627244e-05, "loss": 0.9758, "step": 1310 }, { "epoch": 0.21, "grad_norm": 1.0995660099131157, "learning_rate": 1.8327902763791523e-05, "loss": 1.0271, "step": 1311 }, { "epoch": 0.21, "grad_norm": 3.387730641976704, "learning_rate": 1.832501219620553e-05, "loss": 0.986, "step": 1312 }, { "epoch": 0.21, "grad_norm": 3.798058093951984, "learning_rate": 1.8322119360656736e-05, "loss": 0.9542, "step": 1313 }, { "epoch": 0.21, "grad_norm": 2.4177554626560913, "learning_rate": 1.831922425793323e-05, "loss": 0.9031, "step": 1314 }, { "epoch": 0.21, "grad_norm": 3.2893006879590843, "learning_rate": 1.8316326888823714e-05, "loss": 0.962, "step": 1315 }, { "epoch": 0.21, "grad_norm": 3.786595876820048, "learning_rate": 1.8313427254117508e-05, "loss": 0.8895, "step": 1316 }, { "epoch": 0.21, "grad_norm": 3.3417450785454763, "learning_rate": 1.8310525354604562e-05, "loss": 1.0141, "step": 1317 }, { "epoch": 0.21, "grad_norm": 1.941566813716029, "learning_rate": 1.8307621191075425e-05, "loss": 1.0051, "step": 1318 }, { "epoch": 0.21, "grad_norm": 3.460888864420896, "learning_rate": 1.8304714764321277e-05, "loss": 0.9716, "step": 1319 }, { "epoch": 0.21, "grad_norm": 2.6870359862983966, "learning_rate": 1.8301806075133907e-05, "loss": 0.3039, "step": 1320 }, { "epoch": 0.21, "grad_norm": 4.019426379675446, "learning_rate": 1.829889512430572e-05, "loss": 0.9644, "step": 1321 }, { "epoch": 0.21, "grad_norm": 3.8047606913519583, "learning_rate": 1.829598191262974e-05, "loss": 0.9463, "step": 1322 }, { "epoch": 0.21, "grad_norm": 4.372642753979641, "learning_rate": 1.829306644089961e-05, "loss": 0.9021, "step": 1323 }, { "epoch": 0.21, "grad_norm": 5.840325479190424, "learning_rate": 1.829014870990958e-05, "loss": 1.0308, "step": 1324 }, { "epoch": 0.21, "grad_norm": 1.7111335537554753, "learning_rate": 1.8287228720454522e-05, "loss": 0.9409, "step": 1325 }, { "epoch": 0.21, "grad_norm": 2.9653963119396427, "learning_rate": 1.8284306473329922e-05, "loss": 1.0075, "step": 1326 }, { "epoch": 0.21, "grad_norm": 3.081331857828106, "learning_rate": 1.828138196933188e-05, "loss": 0.9143, "step": 1327 }, { "epoch": 0.21, "grad_norm": 1.847124148118039, "learning_rate": 1.8278455209257113e-05, "loss": 0.3608, "step": 1328 }, { "epoch": 0.21, "grad_norm": 2.2591615230436553, "learning_rate": 1.8275526193902948e-05, "loss": 0.9547, "step": 1329 }, { "epoch": 0.21, "grad_norm": 2.4232434744180815, "learning_rate": 1.8272594924067333e-05, "loss": 1.0097, "step": 1330 }, { "epoch": 0.21, "grad_norm": 3.2447113141458357, "learning_rate": 1.8269661400548825e-05, "loss": 0.9881, "step": 1331 }, { "epoch": 0.21, "grad_norm": 3.4384558995846772, "learning_rate": 1.8266725624146596e-05, "loss": 0.9617, "step": 1332 }, { "epoch": 0.21, "grad_norm": 2.557973818623384, "learning_rate": 1.8263787595660437e-05, "loss": 0.9881, "step": 1333 }, { "epoch": 0.21, "grad_norm": 3.8916308470118364, "learning_rate": 1.8260847315890738e-05, "loss": 0.9326, "step": 1334 }, { "epoch": 0.22, "grad_norm": 3.3179988204926367, "learning_rate": 1.8257904785638523e-05, "loss": 0.9004, "step": 1335 }, { "epoch": 0.22, "grad_norm": 1.4654561708079024, "learning_rate": 1.8254960005705412e-05, "loss": 0.9883, "step": 1336 }, { "epoch": 0.22, "grad_norm": 3.4604475307447027, "learning_rate": 1.825201297689365e-05, "loss": 0.9636, "step": 1337 }, { "epoch": 0.22, "grad_norm": 2.1018461014788303, "learning_rate": 1.824906370000608e-05, "loss": 0.9676, "step": 1338 }, { "epoch": 0.22, "grad_norm": 2.11330212143835, "learning_rate": 1.8246112175846175e-05, "loss": 1.0107, "step": 1339 }, { "epoch": 0.22, "grad_norm": 3.9106968097333614, "learning_rate": 1.8243158405218008e-05, "loss": 0.9948, "step": 1340 }, { "epoch": 0.22, "grad_norm": 1.4827123867442429, "learning_rate": 1.824020238892626e-05, "loss": 0.3555, "step": 1341 }, { "epoch": 0.22, "grad_norm": 1.8179392918395507, "learning_rate": 1.8237244127776247e-05, "loss": 0.3601, "step": 1342 }, { "epoch": 0.22, "grad_norm": 3.3922407809395163, "learning_rate": 1.823428362257387e-05, "loss": 0.9413, "step": 1343 }, { "epoch": 0.22, "grad_norm": 1.8564917384507569, "learning_rate": 1.8231320874125656e-05, "loss": 0.978, "step": 1344 }, { "epoch": 0.22, "grad_norm": 2.987581617445528, "learning_rate": 1.8228355883238737e-05, "loss": 0.97, "step": 1345 }, { "epoch": 0.22, "grad_norm": 3.0341735584404623, "learning_rate": 1.822538865072086e-05, "loss": 0.9601, "step": 1346 }, { "epoch": 0.22, "grad_norm": 3.3603599346470103, "learning_rate": 1.822241917738038e-05, "loss": 1.0099, "step": 1347 }, { "epoch": 0.22, "grad_norm": 2.5421339970769123, "learning_rate": 1.8219447464026264e-05, "loss": 0.3324, "step": 1348 }, { "epoch": 0.22, "grad_norm": 3.1888039109814246, "learning_rate": 1.821647351146809e-05, "loss": 0.9755, "step": 1349 }, { "epoch": 0.22, "grad_norm": 2.042671371610583, "learning_rate": 1.821349732051604e-05, "loss": 0.9692, "step": 1350 }, { "epoch": 0.22, "grad_norm": 2.6995471362629995, "learning_rate": 1.8210518891980914e-05, "loss": 0.9682, "step": 1351 }, { "epoch": 0.22, "grad_norm": 2.8478680650926083, "learning_rate": 1.820753822667412e-05, "loss": 0.91, "step": 1352 }, { "epoch": 0.22, "grad_norm": 3.816091503021912, "learning_rate": 1.8204555325407668e-05, "loss": 0.9674, "step": 1353 }, { "epoch": 0.22, "grad_norm": 3.246434970737297, "learning_rate": 1.8201570188994186e-05, "loss": 0.9341, "step": 1354 }, { "epoch": 0.22, "grad_norm": 2.3773719587855666, "learning_rate": 1.8198582818246908e-05, "loss": 1.0355, "step": 1355 }, { "epoch": 0.22, "grad_norm": 2.2718876015839684, "learning_rate": 1.8195593213979676e-05, "loss": 0.9164, "step": 1356 }, { "epoch": 0.22, "grad_norm": 2.2077119300867944, "learning_rate": 1.819260137700694e-05, "loss": 0.9641, "step": 1357 }, { "epoch": 0.22, "grad_norm": 3.080298106889515, "learning_rate": 1.8189607308143754e-05, "loss": 0.9809, "step": 1358 }, { "epoch": 0.22, "grad_norm": 3.7946523432252426, "learning_rate": 1.8186611008205794e-05, "loss": 0.9452, "step": 1359 }, { "epoch": 0.22, "grad_norm": 2.5612184064414314, "learning_rate": 1.8183612478009328e-05, "loss": 0.9768, "step": 1360 }, { "epoch": 0.22, "grad_norm": 1.67725497802869, "learning_rate": 1.8180611718371238e-05, "loss": 0.3395, "step": 1361 }, { "epoch": 0.22, "grad_norm": 3.1308736913857023, "learning_rate": 1.8177608730109023e-05, "loss": 0.9861, "step": 1362 }, { "epoch": 0.22, "grad_norm": 3.6786806061266164, "learning_rate": 1.8174603514040767e-05, "loss": 0.9755, "step": 1363 }, { "epoch": 0.22, "grad_norm": 2.455879754269667, "learning_rate": 1.817159607098518e-05, "loss": 0.9474, "step": 1364 }, { "epoch": 0.22, "grad_norm": 3.499767840723845, "learning_rate": 1.8168586401761573e-05, "loss": 1.0285, "step": 1365 }, { "epoch": 0.22, "grad_norm": 3.5225068277870863, "learning_rate": 1.8165574507189864e-05, "loss": 0.925, "step": 1366 }, { "epoch": 0.22, "grad_norm": 2.580127654946614, "learning_rate": 1.8162560388090573e-05, "loss": 0.9204, "step": 1367 }, { "epoch": 0.22, "grad_norm": 2.2616080617749508, "learning_rate": 1.8159544045284826e-05, "loss": 1.0496, "step": 1368 }, { "epoch": 0.22, "grad_norm": 3.3436630095102275, "learning_rate": 1.8156525479594368e-05, "loss": 0.927, "step": 1369 }, { "epoch": 0.22, "grad_norm": 2.5593895992100215, "learning_rate": 1.8153504691841528e-05, "loss": 0.929, "step": 1370 }, { "epoch": 0.22, "grad_norm": 2.7233585453461706, "learning_rate": 1.8150481682849258e-05, "loss": 0.9169, "step": 1371 }, { "epoch": 0.22, "grad_norm": 3.0417735111509616, "learning_rate": 1.814745645344111e-05, "loss": 1.023, "step": 1372 }, { "epoch": 0.22, "grad_norm": 3.8832475813755782, "learning_rate": 1.814442900444124e-05, "loss": 0.9984, "step": 1373 }, { "epoch": 0.22, "grad_norm": 3.157716165061286, "learning_rate": 1.8141399336674403e-05, "loss": 0.9178, "step": 1374 }, { "epoch": 0.22, "grad_norm": 4.502047179060903, "learning_rate": 1.8138367450965968e-05, "loss": 0.9422, "step": 1375 }, { "epoch": 0.22, "grad_norm": 3.396493219645974, "learning_rate": 1.8135333348141904e-05, "loss": 0.9457, "step": 1376 }, { "epoch": 0.22, "grad_norm": 2.517109308620786, "learning_rate": 1.813229702902879e-05, "loss": 0.9684, "step": 1377 }, { "epoch": 0.22, "grad_norm": 3.235954304517493, "learning_rate": 1.812925849445379e-05, "loss": 0.9747, "step": 1378 }, { "epoch": 0.22, "grad_norm": 3.618917821705316, "learning_rate": 1.8126217745244696e-05, "loss": 0.9639, "step": 1379 }, { "epoch": 0.22, "grad_norm": 2.1628735302385103, "learning_rate": 1.8123174782229884e-05, "loss": 1.023, "step": 1380 }, { "epoch": 0.22, "grad_norm": 2.9617480776418237, "learning_rate": 1.8120129606238345e-05, "loss": 0.9896, "step": 1381 }, { "epoch": 0.22, "grad_norm": 2.510948240949141, "learning_rate": 1.811708221809967e-05, "loss": 0.9368, "step": 1382 }, { "epoch": 0.22, "grad_norm": 1.692617850208331, "learning_rate": 1.8114032618644053e-05, "loss": 1.0452, "step": 1383 }, { "epoch": 0.22, "grad_norm": 2.2845840128869144, "learning_rate": 1.8110980808702282e-05, "loss": 0.9615, "step": 1384 }, { "epoch": 0.22, "grad_norm": 3.340938348588951, "learning_rate": 1.810792678910576e-05, "loss": 0.9875, "step": 1385 }, { "epoch": 0.22, "grad_norm": 4.5170235233636005, "learning_rate": 1.810487056068648e-05, "loss": 1.0123, "step": 1386 }, { "epoch": 0.22, "grad_norm": 1.840474338550537, "learning_rate": 1.8101812124277053e-05, "loss": 0.9748, "step": 1387 }, { "epoch": 0.22, "grad_norm": 2.0927961083918443, "learning_rate": 1.8098751480710675e-05, "loss": 0.9597, "step": 1388 }, { "epoch": 0.22, "grad_norm": 2.9713415780275803, "learning_rate": 1.8095688630821147e-05, "loss": 0.9415, "step": 1389 }, { "epoch": 0.22, "grad_norm": 4.255862960358747, "learning_rate": 1.809262357544288e-05, "loss": 0.9644, "step": 1390 }, { "epoch": 0.22, "grad_norm": 2.172773106905223, "learning_rate": 1.8089556315410875e-05, "loss": 1.0329, "step": 1391 }, { "epoch": 0.22, "grad_norm": 2.7313830764439513, "learning_rate": 1.808648685156074e-05, "loss": 0.9169, "step": 1392 }, { "epoch": 0.22, "grad_norm": 3.232562013106493, "learning_rate": 1.808341518472868e-05, "loss": 0.9072, "step": 1393 }, { "epoch": 0.22, "grad_norm": 3.4107943677381285, "learning_rate": 1.8080341315751507e-05, "loss": 0.9495, "step": 1394 }, { "epoch": 0.22, "grad_norm": 2.895507647818292, "learning_rate": 1.8077265245466623e-05, "loss": 0.9371, "step": 1395 }, { "epoch": 0.22, "grad_norm": 3.2160983227555184, "learning_rate": 1.8074186974712033e-05, "loss": 1.0271, "step": 1396 }, { "epoch": 0.23, "grad_norm": 2.37313568828146, "learning_rate": 1.8071106504326344e-05, "loss": 0.9963, "step": 1397 }, { "epoch": 0.23, "grad_norm": 2.5904521889102265, "learning_rate": 1.8068023835148763e-05, "loss": 0.9685, "step": 1398 }, { "epoch": 0.23, "grad_norm": 3.245050623047748, "learning_rate": 1.8064938968019096e-05, "loss": 0.9669, "step": 1399 }, { "epoch": 0.23, "grad_norm": 2.850878991552447, "learning_rate": 1.8061851903777738e-05, "loss": 0.9688, "step": 1400 }, { "epoch": 0.23, "grad_norm": 3.1235280957424694, "learning_rate": 1.8058762643265697e-05, "loss": 0.9498, "step": 1401 }, { "epoch": 0.23, "grad_norm": 3.121558315951386, "learning_rate": 1.805567118732457e-05, "loss": 0.942, "step": 1402 }, { "epoch": 0.23, "grad_norm": 3.4136267063109087, "learning_rate": 1.8052577536796557e-05, "loss": 0.9537, "step": 1403 }, { "epoch": 0.23, "grad_norm": 1.6555737248361728, "learning_rate": 1.804948169252445e-05, "loss": 0.8993, "step": 1404 }, { "epoch": 0.23, "grad_norm": 3.715088010954543, "learning_rate": 1.804638365535165e-05, "loss": 0.9673, "step": 1405 }, { "epoch": 0.23, "grad_norm": 4.0204237233509605, "learning_rate": 1.8043283426122137e-05, "loss": 0.9723, "step": 1406 }, { "epoch": 0.23, "grad_norm": 2.6774958331078342, "learning_rate": 1.8040181005680506e-05, "loss": 0.9655, "step": 1407 }, { "epoch": 0.23, "grad_norm": 2.6067394720903745, "learning_rate": 1.803707639487194e-05, "loss": 0.9632, "step": 1408 }, { "epoch": 0.23, "grad_norm": 3.5473996753230295, "learning_rate": 1.8033969594542223e-05, "loss": 0.9788, "step": 1409 }, { "epoch": 0.23, "grad_norm": 3.077672315122658, "learning_rate": 1.803086060553773e-05, "loss": 0.9167, "step": 1410 }, { "epoch": 0.23, "grad_norm": 2.6220744042391257, "learning_rate": 1.8027749428705432e-05, "loss": 1.0129, "step": 1411 }, { "epoch": 0.23, "grad_norm": 2.8683431099949104, "learning_rate": 1.8024636064892907e-05, "loss": 0.9221, "step": 1412 }, { "epoch": 0.23, "grad_norm": 2.320853841292644, "learning_rate": 1.8021520514948315e-05, "loss": 0.9048, "step": 1413 }, { "epoch": 0.23, "grad_norm": 3.5501486841146934, "learning_rate": 1.8018402779720424e-05, "loss": 1.0325, "step": 1414 }, { "epoch": 0.23, "grad_norm": 4.147579010894347, "learning_rate": 1.8015282860058584e-05, "loss": 0.9894, "step": 1415 }, { "epoch": 0.23, "grad_norm": 3.1683913770961176, "learning_rate": 1.8012160756812755e-05, "loss": 0.9379, "step": 1416 }, { "epoch": 0.23, "grad_norm": 3.7065092308128125, "learning_rate": 1.8009036470833474e-05, "loss": 1.0099, "step": 1417 }, { "epoch": 0.23, "grad_norm": 3.026175587372271, "learning_rate": 1.800591000297189e-05, "loss": 0.9068, "step": 1418 }, { "epoch": 0.23, "grad_norm": 3.946322291101443, "learning_rate": 1.8002781354079736e-05, "loss": 1.0058, "step": 1419 }, { "epoch": 0.23, "grad_norm": 2.6493792364613507, "learning_rate": 1.7999650525009343e-05, "loss": 1.0104, "step": 1420 }, { "epoch": 0.23, "grad_norm": 2.9439128919754998, "learning_rate": 1.7996517516613637e-05, "loss": 0.9273, "step": 1421 }, { "epoch": 0.23, "grad_norm": 2.883594437906312, "learning_rate": 1.7993382329746135e-05, "loss": 1.0035, "step": 1422 }, { "epoch": 0.23, "grad_norm": 3.611255502666904, "learning_rate": 1.7990244965260945e-05, "loss": 0.9659, "step": 1423 }, { "epoch": 0.23, "grad_norm": 3.092026818231939, "learning_rate": 1.7987105424012777e-05, "loss": 0.9527, "step": 1424 }, { "epoch": 0.23, "grad_norm": 2.0858824076145797, "learning_rate": 1.798396370685692e-05, "loss": 0.9397, "step": 1425 }, { "epoch": 0.23, "grad_norm": 3.0887029304453195, "learning_rate": 1.7980819814649276e-05, "loss": 0.9286, "step": 1426 }, { "epoch": 0.23, "grad_norm": 3.705295618698762, "learning_rate": 1.797767374824632e-05, "loss": 0.9571, "step": 1427 }, { "epoch": 0.23, "grad_norm": 3.141452509480882, "learning_rate": 1.797452550850513e-05, "loss": 0.9938, "step": 1428 }, { "epoch": 0.23, "grad_norm": 3.4026205342263727, "learning_rate": 1.797137509628337e-05, "loss": 0.9872, "step": 1429 }, { "epoch": 0.23, "grad_norm": 2.939512010155618, "learning_rate": 1.7968222512439302e-05, "loss": 0.9397, "step": 1430 }, { "epoch": 0.23, "grad_norm": 2.971211587403216, "learning_rate": 1.7965067757831777e-05, "loss": 0.9326, "step": 1431 }, { "epoch": 0.23, "grad_norm": 2.5946173998489876, "learning_rate": 1.796191083332024e-05, "loss": 0.959, "step": 1432 }, { "epoch": 0.23, "grad_norm": 3.5346773782186713, "learning_rate": 1.7958751739764716e-05, "loss": 0.9031, "step": 1433 }, { "epoch": 0.23, "grad_norm": 2.9123079883330836, "learning_rate": 1.7955590478025833e-05, "loss": 0.932, "step": 1434 }, { "epoch": 0.23, "grad_norm": 2.7804452824921957, "learning_rate": 1.7952427048964808e-05, "loss": 0.9717, "step": 1435 }, { "epoch": 0.23, "grad_norm": 3.9825611249180466, "learning_rate": 1.7949261453443446e-05, "loss": 1.0509, "step": 1436 }, { "epoch": 0.23, "grad_norm": 3.795340522295171, "learning_rate": 1.794609369232414e-05, "loss": 0.986, "step": 1437 }, { "epoch": 0.23, "grad_norm": 1.931689069890645, "learning_rate": 1.794292376646988e-05, "loss": 0.9487, "step": 1438 }, { "epoch": 0.23, "grad_norm": 3.5153784093206486, "learning_rate": 1.7939751676744234e-05, "loss": 0.9841, "step": 1439 }, { "epoch": 0.23, "grad_norm": 3.5216441528159117, "learning_rate": 1.7936577424011375e-05, "loss": 0.912, "step": 1440 }, { "epoch": 0.23, "grad_norm": 3.246756916267006, "learning_rate": 1.793340100913605e-05, "loss": 0.9839, "step": 1441 }, { "epoch": 0.23, "grad_norm": 4.505118108591969, "learning_rate": 1.7930222432983605e-05, "loss": 0.9672, "step": 1442 }, { "epoch": 0.23, "grad_norm": 2.540088903868698, "learning_rate": 1.792704169641997e-05, "loss": 0.9179, "step": 1443 }, { "epoch": 0.23, "grad_norm": 2.083530594705705, "learning_rate": 1.7923858800311665e-05, "loss": 0.9389, "step": 1444 }, { "epoch": 0.23, "grad_norm": 2.787008859790726, "learning_rate": 1.7920673745525805e-05, "loss": 1.022, "step": 1445 }, { "epoch": 0.23, "grad_norm": 2.97304938344683, "learning_rate": 1.7917486532930082e-05, "loss": 0.9398, "step": 1446 }, { "epoch": 0.23, "grad_norm": 3.493419319021578, "learning_rate": 1.7914297163392778e-05, "loss": 0.8835, "step": 1447 }, { "epoch": 0.23, "grad_norm": 1.711531723160861, "learning_rate": 1.7911105637782767e-05, "loss": 0.9594, "step": 1448 }, { "epoch": 0.23, "grad_norm": 3.2858214005490547, "learning_rate": 1.790791195696951e-05, "loss": 0.9865, "step": 1449 }, { "epoch": 0.23, "grad_norm": 3.764989268982933, "learning_rate": 1.790471612182306e-05, "loss": 0.9223, "step": 1450 }, { "epoch": 0.23, "grad_norm": 2.8488032120358473, "learning_rate": 1.7901518133214034e-05, "loss": 0.9563, "step": 1451 }, { "epoch": 0.23, "grad_norm": 2.344170596212906, "learning_rate": 1.789831799201367e-05, "loss": 0.9854, "step": 1452 }, { "epoch": 0.23, "grad_norm": 3.7314206491677075, "learning_rate": 1.789511569909376e-05, "loss": 1.0075, "step": 1453 }, { "epoch": 0.23, "grad_norm": 3.1028845441301325, "learning_rate": 1.7891911255326705e-05, "loss": 0.8691, "step": 1454 }, { "epoch": 0.23, "grad_norm": 4.380811918072201, "learning_rate": 1.7888704661585483e-05, "loss": 0.907, "step": 1455 }, { "epoch": 0.23, "grad_norm": 1.5709517966341369, "learning_rate": 1.7885495918743658e-05, "loss": 0.9303, "step": 1456 }, { "epoch": 0.23, "grad_norm": 1.079582335102224, "learning_rate": 1.7882285027675377e-05, "loss": 0.9122, "step": 1457 }, { "epoch": 0.23, "grad_norm": 3.03455730798084, "learning_rate": 1.7879071989255383e-05, "loss": 0.9387, "step": 1458 }, { "epoch": 0.24, "grad_norm": 4.351857694136453, "learning_rate": 1.787585680435899e-05, "loss": 1.0695, "step": 1459 }, { "epoch": 0.24, "grad_norm": 4.144305971802159, "learning_rate": 1.7872639473862102e-05, "loss": 0.9085, "step": 1460 }, { "epoch": 0.24, "grad_norm": 2.6788428396138855, "learning_rate": 1.786941999864121e-05, "loss": 0.983, "step": 1461 }, { "epoch": 0.24, "grad_norm": 3.353887574014797, "learning_rate": 1.7866198379573393e-05, "loss": 0.9691, "step": 1462 }, { "epoch": 0.24, "grad_norm": 2.154769721063429, "learning_rate": 1.78629746175363e-05, "loss": 0.9649, "step": 1463 }, { "epoch": 0.24, "grad_norm": 3.619456294415986, "learning_rate": 1.785974871340818e-05, "loss": 0.9672, "step": 1464 }, { "epoch": 0.24, "grad_norm": 1.7640299386059455, "learning_rate": 1.7856520668067854e-05, "loss": 0.9434, "step": 1465 }, { "epoch": 0.24, "grad_norm": 2.897616452632643, "learning_rate": 1.785329048239473e-05, "loss": 0.9605, "step": 1466 }, { "epoch": 0.24, "grad_norm": 3.860332169545584, "learning_rate": 1.7850058157268803e-05, "loss": 0.9721, "step": 1467 }, { "epoch": 0.24, "grad_norm": 3.243158475416747, "learning_rate": 1.7846823693570645e-05, "loss": 0.9229, "step": 1468 }, { "epoch": 0.24, "grad_norm": 4.6180008173529234, "learning_rate": 1.7843587092181417e-05, "loss": 0.9744, "step": 1469 }, { "epoch": 0.24, "grad_norm": 3.706626685107561, "learning_rate": 1.7840348353982852e-05, "loss": 0.9684, "step": 1470 }, { "epoch": 0.24, "grad_norm": 2.7892968856674334, "learning_rate": 1.7837107479857275e-05, "loss": 0.9971, "step": 1471 }, { "epoch": 0.24, "grad_norm": 3.9349651351605646, "learning_rate": 1.7833864470687593e-05, "loss": 0.9159, "step": 1472 }, { "epoch": 0.24, "grad_norm": 2.7683956255288047, "learning_rate": 1.7830619327357282e-05, "loss": 0.9668, "step": 1473 }, { "epoch": 0.24, "grad_norm": 2.3880660379694025, "learning_rate": 1.7827372050750414e-05, "loss": 0.3441, "step": 1474 }, { "epoch": 0.24, "grad_norm": 2.730724491918388, "learning_rate": 1.782412264175164e-05, "loss": 0.9467, "step": 1475 }, { "epoch": 0.24, "grad_norm": 2.4396592382213784, "learning_rate": 1.7820871101246185e-05, "loss": 1.0254, "step": 1476 }, { "epoch": 0.24, "grad_norm": 2.8831161711247733, "learning_rate": 1.7817617430119858e-05, "loss": 0.9435, "step": 1477 }, { "epoch": 0.24, "grad_norm": 3.2229894477370866, "learning_rate": 1.7814361629259052e-05, "loss": 0.8747, "step": 1478 }, { "epoch": 0.24, "grad_norm": 3.7641930607772647, "learning_rate": 1.781110369955073e-05, "loss": 0.9418, "step": 1479 }, { "epoch": 0.24, "grad_norm": 3.3225419043430775, "learning_rate": 1.780784364188245e-05, "loss": 1.0221, "step": 1480 }, { "epoch": 0.24, "grad_norm": 3.737409308618595, "learning_rate": 1.780458145714233e-05, "loss": 0.9344, "step": 1481 }, { "epoch": 0.24, "grad_norm": 3.3087813899529572, "learning_rate": 1.780131714621909e-05, "loss": 0.9204, "step": 1482 }, { "epoch": 0.24, "grad_norm": 2.5741127003892856, "learning_rate": 1.779805071000202e-05, "loss": 0.8925, "step": 1483 }, { "epoch": 0.24, "grad_norm": 2.7171922803225814, "learning_rate": 1.7794782149380977e-05, "loss": 0.9764, "step": 1484 }, { "epoch": 0.24, "grad_norm": 1.3480430158605747, "learning_rate": 1.7791511465246413e-05, "loss": 1.0008, "step": 1485 }, { "epoch": 0.24, "grad_norm": 3.4251086141252056, "learning_rate": 1.778823865848935e-05, "loss": 0.9896, "step": 1486 }, { "epoch": 0.24, "grad_norm": 2.766483324555994, "learning_rate": 1.7784963730001395e-05, "loss": 0.9054, "step": 1487 }, { "epoch": 0.24, "grad_norm": 2.0458772280374373, "learning_rate": 1.7781686680674726e-05, "loss": 0.9772, "step": 1488 }, { "epoch": 0.24, "grad_norm": 2.4170036300234194, "learning_rate": 1.77784075114021e-05, "loss": 0.9096, "step": 1489 }, { "epoch": 0.24, "grad_norm": 2.9576926455469406, "learning_rate": 1.7775126223076857e-05, "loss": 0.9827, "step": 1490 }, { "epoch": 0.24, "grad_norm": 4.420382082550931, "learning_rate": 1.7771842816592907e-05, "loss": 0.9793, "step": 1491 }, { "epoch": 0.24, "grad_norm": 3.6357025791893003, "learning_rate": 1.776855729284474e-05, "loss": 0.9398, "step": 1492 }, { "epoch": 0.24, "grad_norm": 1.7823123106512435, "learning_rate": 1.7765269652727427e-05, "loss": 0.9029, "step": 1493 }, { "epoch": 0.24, "grad_norm": 3.6123135317291704, "learning_rate": 1.7761979897136606e-05, "loss": 0.9799, "step": 1494 }, { "epoch": 0.24, "grad_norm": 4.1532056170605145, "learning_rate": 1.7758688026968506e-05, "loss": 0.9087, "step": 1495 }, { "epoch": 0.24, "grad_norm": 2.469443887663546, "learning_rate": 1.7755394043119916e-05, "loss": 1.0147, "step": 1496 }, { "epoch": 0.24, "grad_norm": 2.568977542646884, "learning_rate": 1.775209794648821e-05, "loss": 0.9658, "step": 1497 }, { "epoch": 0.24, "grad_norm": 3.465239526966072, "learning_rate": 1.7748799737971335e-05, "loss": 0.8699, "step": 1498 }, { "epoch": 0.24, "grad_norm": 2.718836202022202, "learning_rate": 1.7745499418467814e-05, "loss": 0.9675, "step": 1499 }, { "epoch": 0.24, "grad_norm": 2.959650973027244, "learning_rate": 1.774219698887675e-05, "loss": 1.0209, "step": 1500 }, { "epoch": 0.24, "grad_norm": 2.5088712649838123, "learning_rate": 1.7738892450097806e-05, "loss": 0.959, "step": 1501 }, { "epoch": 0.24, "grad_norm": 3.165615275331343, "learning_rate": 1.7735585803031235e-05, "loss": 0.9709, "step": 1502 }, { "epoch": 0.24, "grad_norm": 2.3026165466965196, "learning_rate": 1.7732277048577858e-05, "loss": 0.9967, "step": 1503 }, { "epoch": 0.24, "grad_norm": 2.298098180062971, "learning_rate": 1.7728966187639075e-05, "loss": 1.0127, "step": 1504 }, { "epoch": 0.24, "grad_norm": 3.9600258404437745, "learning_rate": 1.7725653221116853e-05, "loss": 0.987, "step": 1505 }, { "epoch": 0.24, "grad_norm": 3.8651916902487726, "learning_rate": 1.772233814991373e-05, "loss": 0.9009, "step": 1506 }, { "epoch": 0.24, "grad_norm": 1.7952743698865048, "learning_rate": 1.771902097493283e-05, "loss": 1.0289, "step": 1507 }, { "epoch": 0.24, "grad_norm": 2.543109286361227, "learning_rate": 1.771570169707784e-05, "loss": 0.9806, "step": 1508 }, { "epoch": 0.24, "grad_norm": 2.5089477631417174, "learning_rate": 1.7712380317253025e-05, "loss": 0.9598, "step": 1509 }, { "epoch": 0.24, "grad_norm": 2.4103740674087137, "learning_rate": 1.7709056836363218e-05, "loss": 0.9777, "step": 1510 }, { "epoch": 0.24, "grad_norm": 2.7538285914083604, "learning_rate": 1.7705731255313823e-05, "loss": 0.9461, "step": 1511 }, { "epoch": 0.24, "grad_norm": 3.244947614437986, "learning_rate": 1.770240357501083e-05, "loss": 0.9506, "step": 1512 }, { "epoch": 0.24, "grad_norm": 3.4255011867064, "learning_rate": 1.769907379636078e-05, "loss": 0.9617, "step": 1513 }, { "epoch": 0.24, "grad_norm": 3.89499379188131, "learning_rate": 1.7695741920270807e-05, "loss": 0.8347, "step": 1514 }, { "epoch": 0.24, "grad_norm": 1.8305505816492256, "learning_rate": 1.76924079476486e-05, "loss": 0.9523, "step": 1515 }, { "epoch": 0.24, "grad_norm": 1.5234679390238315, "learning_rate": 1.768907187940242e-05, "loss": 0.976, "step": 1516 }, { "epoch": 0.24, "grad_norm": 3.445389727291356, "learning_rate": 1.7685733716441117e-05, "loss": 0.9523, "step": 1517 }, { "epoch": 0.24, "grad_norm": 2.4572773190077193, "learning_rate": 1.7682393459674087e-05, "loss": 0.8928, "step": 1518 }, { "epoch": 0.24, "grad_norm": 2.157693573190163, "learning_rate": 1.7679051110011312e-05, "loss": 1.0117, "step": 1519 }, { "epoch": 0.24, "grad_norm": 2.572438297254644, "learning_rate": 1.7675706668363338e-05, "loss": 1.0282, "step": 1520 }, { "epoch": 0.25, "grad_norm": 3.1968715753471697, "learning_rate": 1.7672360135641292e-05, "loss": 0.931, "step": 1521 }, { "epoch": 0.25, "grad_norm": 3.5386982913536165, "learning_rate": 1.766901151275685e-05, "loss": 0.9558, "step": 1522 }, { "epoch": 0.25, "grad_norm": 3.331297459362695, "learning_rate": 1.7665660800622274e-05, "loss": 0.9875, "step": 1523 }, { "epoch": 0.25, "grad_norm": 3.1779538989075253, "learning_rate": 1.7662308000150393e-05, "loss": 0.9437, "step": 1524 }, { "epoch": 0.25, "grad_norm": 6.915257996044361, "learning_rate": 1.7658953112254603e-05, "loss": 0.9537, "step": 1525 }, { "epoch": 0.25, "grad_norm": 3.7999871454523406, "learning_rate": 1.765559613784886e-05, "loss": 0.9281, "step": 1526 }, { "epoch": 0.25, "grad_norm": 4.30876595091627, "learning_rate": 1.7652237077847703e-05, "loss": 0.9156, "step": 1527 }, { "epoch": 0.25, "grad_norm": 3.476591028675082, "learning_rate": 1.7648875933166228e-05, "loss": 0.9786, "step": 1528 }, { "epoch": 0.25, "grad_norm": 3.454686152864674, "learning_rate": 1.764551270472011e-05, "loss": 0.9751, "step": 1529 }, { "epoch": 0.25, "grad_norm": 4.209186933205969, "learning_rate": 1.764214739342558e-05, "loss": 0.975, "step": 1530 }, { "epoch": 0.25, "grad_norm": 1.3751201143686849, "learning_rate": 1.7638780000199446e-05, "loss": 0.991, "step": 1531 }, { "epoch": 0.25, "grad_norm": 3.4321663833158893, "learning_rate": 1.7635410525959072e-05, "loss": 0.9107, "step": 1532 }, { "epoch": 0.25, "grad_norm": 3.3268564878239295, "learning_rate": 1.76320389716224e-05, "loss": 0.9252, "step": 1533 }, { "epoch": 0.25, "grad_norm": 1.5868369121871084, "learning_rate": 1.7628665338107936e-05, "loss": 0.9092, "step": 1534 }, { "epoch": 0.25, "grad_norm": 2.9739369905488724, "learning_rate": 1.7625289626334744e-05, "loss": 0.9489, "step": 1535 }, { "epoch": 0.25, "grad_norm": 3.5955657352554367, "learning_rate": 1.762191183722247e-05, "loss": 0.8386, "step": 1536 }, { "epoch": 0.25, "grad_norm": 3.73384684255425, "learning_rate": 1.761853197169131e-05, "loss": 0.9217, "step": 1537 }, { "epoch": 0.25, "grad_norm": 2.0493980175143856, "learning_rate": 1.7615150030662037e-05, "loss": 0.3424, "step": 1538 }, { "epoch": 0.25, "grad_norm": 3.209189725912043, "learning_rate": 1.7611766015055984e-05, "loss": 0.9576, "step": 1539 }, { "epoch": 0.25, "grad_norm": 4.117656297432929, "learning_rate": 1.7608379925795043e-05, "loss": 0.99, "step": 1540 }, { "epoch": 0.25, "grad_norm": 3.9390385361834905, "learning_rate": 1.7604991763801688e-05, "loss": 0.9702, "step": 1541 }, { "epoch": 0.25, "grad_norm": 2.784144577613155, "learning_rate": 1.760160152999894e-05, "loss": 0.9184, "step": 1542 }, { "epoch": 0.25, "grad_norm": 4.03615868553436, "learning_rate": 1.75982092253104e-05, "loss": 0.9322, "step": 1543 }, { "epoch": 0.25, "grad_norm": 2.0244518668458227, "learning_rate": 1.759481485066022e-05, "loss": 0.9616, "step": 1544 }, { "epoch": 0.25, "grad_norm": 2.0812055657486304, "learning_rate": 1.7591418406973126e-05, "loss": 0.9826, "step": 1545 }, { "epoch": 0.25, "grad_norm": 3.0657320678554805, "learning_rate": 1.7588019895174394e-05, "loss": 0.9307, "step": 1546 }, { "epoch": 0.25, "grad_norm": 2.4813519204680436, "learning_rate": 1.758461931618988e-05, "loss": 1.0091, "step": 1547 }, { "epoch": 0.25, "grad_norm": 3.6139828552362965, "learning_rate": 1.758121667094599e-05, "loss": 0.9538, "step": 1548 }, { "epoch": 0.25, "grad_norm": 3.4178271455042033, "learning_rate": 1.7577811960369703e-05, "loss": 0.9623, "step": 1549 }, { "epoch": 0.25, "grad_norm": 2.5685686961695495, "learning_rate": 1.7574405185388556e-05, "loss": 0.9053, "step": 1550 }, { "epoch": 0.25, "grad_norm": 2.117769843948694, "learning_rate": 1.7570996346930645e-05, "loss": 0.3203, "step": 1551 }, { "epoch": 0.25, "grad_norm": 2.997607707807956, "learning_rate": 1.7567585445924632e-05, "loss": 1.0085, "step": 1552 }, { "epoch": 0.25, "grad_norm": 2.694369740013133, "learning_rate": 1.7564172483299748e-05, "loss": 0.9312, "step": 1553 }, { "epoch": 0.25, "grad_norm": 3.1626574752260206, "learning_rate": 1.7560757459985767e-05, "loss": 0.9715, "step": 1554 }, { "epoch": 0.25, "grad_norm": 3.6227707420883597, "learning_rate": 1.7557340376913038e-05, "loss": 0.918, "step": 1555 }, { "epoch": 0.25, "grad_norm": 1.7305967399849491, "learning_rate": 1.7553921235012475e-05, "loss": 0.3644, "step": 1556 }, { "epoch": 0.25, "grad_norm": 2.758674033138395, "learning_rate": 1.755050003521554e-05, "loss": 0.8953, "step": 1557 }, { "epoch": 0.25, "grad_norm": 1.2851028199379613, "learning_rate": 1.7547076778454268e-05, "loss": 1.0176, "step": 1558 }, { "epoch": 0.25, "grad_norm": 1.8801220550889404, "learning_rate": 1.7543651465661244e-05, "loss": 0.9507, "step": 1559 }, { "epoch": 0.25, "grad_norm": 3.6228290502663265, "learning_rate": 1.754022409776962e-05, "loss": 0.8879, "step": 1560 }, { "epoch": 0.25, "grad_norm": 2.6810939645603167, "learning_rate": 1.75367946757131e-05, "loss": 0.905, "step": 1561 }, { "epoch": 0.25, "grad_norm": 4.24536733019777, "learning_rate": 1.7533363200425962e-05, "loss": 0.9491, "step": 1562 }, { "epoch": 0.25, "grad_norm": 2.9788934186633567, "learning_rate": 1.752992967284303e-05, "loss": 0.991, "step": 1563 }, { "epoch": 0.25, "grad_norm": 1.982818353402326, "learning_rate": 1.7526494093899687e-05, "loss": 0.8921, "step": 1564 }, { "epoch": 0.25, "grad_norm": 2.665936658199787, "learning_rate": 1.7523056464531887e-05, "loss": 0.9159, "step": 1565 }, { "epoch": 0.25, "grad_norm": 1.3474892333101287, "learning_rate": 1.751961678567613e-05, "loss": 0.9317, "step": 1566 }, { "epoch": 0.25, "grad_norm": 3.0438370647158637, "learning_rate": 1.751617505826948e-05, "loss": 0.892, "step": 1567 }, { "epoch": 0.25, "grad_norm": 3.413514815686556, "learning_rate": 1.7512731283249563e-05, "loss": 0.9085, "step": 1568 }, { "epoch": 0.25, "grad_norm": 2.634280918744678, "learning_rate": 1.7509285461554552e-05, "loss": 0.9989, "step": 1569 }, { "epoch": 0.25, "grad_norm": 2.597295429152249, "learning_rate": 1.7505837594123186e-05, "loss": 0.8858, "step": 1570 }, { "epoch": 0.25, "grad_norm": 3.1617233106666514, "learning_rate": 1.7502387681894765e-05, "loss": 0.9198, "step": 1571 }, { "epoch": 0.25, "grad_norm": 2.4443180877606854, "learning_rate": 1.749893572580913e-05, "loss": 0.9409, "step": 1572 }, { "epoch": 0.25, "grad_norm": 3.6720557635027054, "learning_rate": 1.7495481726806697e-05, "loss": 0.9537, "step": 1573 }, { "epoch": 0.25, "grad_norm": 3.862496908735995, "learning_rate": 1.7492025685828428e-05, "loss": 0.8411, "step": 1574 }, { "epoch": 0.25, "grad_norm": 3.039709497926917, "learning_rate": 1.748856760381584e-05, "loss": 0.9612, "step": 1575 }, { "epoch": 0.25, "grad_norm": 3.7813218795628005, "learning_rate": 1.7485107481711014e-05, "loss": 0.9623, "step": 1576 }, { "epoch": 0.25, "grad_norm": 2.04519607760469, "learning_rate": 1.748164532045658e-05, "loss": 0.9438, "step": 1577 }, { "epoch": 0.25, "grad_norm": 2.2095275265113528, "learning_rate": 1.747818112099573e-05, "loss": 1.0173, "step": 1578 }, { "epoch": 0.25, "grad_norm": 2.4105410276449293, "learning_rate": 1.7474714884272208e-05, "loss": 0.9351, "step": 1579 }, { "epoch": 0.25, "grad_norm": 2.3458698095960524, "learning_rate": 1.7471246611230307e-05, "loss": 0.9924, "step": 1580 }, { "epoch": 0.25, "grad_norm": 2.9118971565027545, "learning_rate": 1.7467776302814882e-05, "loss": 0.9503, "step": 1581 }, { "epoch": 0.25, "grad_norm": 2.0510662934345825, "learning_rate": 1.7464303959971343e-05, "loss": 1.0015, "step": 1582 }, { "epoch": 0.26, "grad_norm": 3.8743317550852447, "learning_rate": 1.746082958364565e-05, "loss": 0.9313, "step": 1583 }, { "epoch": 0.26, "grad_norm": 4.63740443210891, "learning_rate": 1.745735317478432e-05, "loss": 0.9159, "step": 1584 }, { "epoch": 0.26, "grad_norm": 4.109161951612528, "learning_rate": 1.7453874734334422e-05, "loss": 0.9768, "step": 1585 }, { "epoch": 0.26, "grad_norm": 3.4496099610922673, "learning_rate": 1.745039426324358e-05, "loss": 0.8868, "step": 1586 }, { "epoch": 0.26, "grad_norm": 2.3741763845155357, "learning_rate": 1.7446911762459968e-05, "loss": 0.9389, "step": 1587 }, { "epoch": 0.26, "grad_norm": 3.4404173522433563, "learning_rate": 1.7443427232932316e-05, "loss": 0.9388, "step": 1588 }, { "epoch": 0.26, "grad_norm": 3.3485883841650264, "learning_rate": 1.7439940675609906e-05, "loss": 0.8585, "step": 1589 }, { "epoch": 0.26, "grad_norm": 3.126841498436986, "learning_rate": 1.7436452091442574e-05, "loss": 0.9722, "step": 1590 }, { "epoch": 0.26, "grad_norm": 3.002882367513346, "learning_rate": 1.7432961481380707e-05, "loss": 0.9522, "step": 1591 }, { "epoch": 0.26, "grad_norm": 3.6096595552908504, "learning_rate": 1.742946884637524e-05, "loss": 0.9849, "step": 1592 }, { "epoch": 0.26, "grad_norm": 2.8947941587331467, "learning_rate": 1.7425974187377665e-05, "loss": 1.019, "step": 1593 }, { "epoch": 0.26, "grad_norm": 3.9827750789759535, "learning_rate": 1.7422477505340024e-05, "loss": 0.904, "step": 1594 }, { "epoch": 0.26, "grad_norm": 3.3622363302665037, "learning_rate": 1.7418978801214906e-05, "loss": 0.9942, "step": 1595 }, { "epoch": 0.26, "grad_norm": 3.1855112491301867, "learning_rate": 1.741547807595546e-05, "loss": 1.0335, "step": 1596 }, { "epoch": 0.26, "grad_norm": 3.602291538155497, "learning_rate": 1.7411975330515377e-05, "loss": 0.9443, "step": 1597 }, { "epoch": 0.26, "grad_norm": 2.180799193921014, "learning_rate": 1.7408470565848897e-05, "loss": 0.9185, "step": 1598 }, { "epoch": 0.26, "grad_norm": 3.9379583425028253, "learning_rate": 1.7404963782910828e-05, "loss": 0.9932, "step": 1599 }, { "epoch": 0.26, "grad_norm": 2.8083175077662195, "learning_rate": 1.74014549826565e-05, "loss": 0.8489, "step": 1600 }, { "epoch": 0.26, "grad_norm": 4.558929718633899, "learning_rate": 1.739794416604181e-05, "loss": 0.9402, "step": 1601 }, { "epoch": 0.26, "grad_norm": 1.5175464680829835, "learning_rate": 1.739443133402321e-05, "loss": 0.324, "step": 1602 }, { "epoch": 0.26, "grad_norm": 2.482110869151548, "learning_rate": 1.739091648755768e-05, "loss": 0.935, "step": 1603 }, { "epoch": 0.26, "grad_norm": 2.8214306926201957, "learning_rate": 1.7387399627602773e-05, "loss": 1.0073, "step": 1604 }, { "epoch": 0.26, "grad_norm": 2.6041456806608836, "learning_rate": 1.7383880755116567e-05, "loss": 0.9541, "step": 1605 }, { "epoch": 0.26, "grad_norm": 2.1088270817967008, "learning_rate": 1.7380359871057707e-05, "loss": 0.9584, "step": 1606 }, { "epoch": 0.26, "grad_norm": 3.3459692034720665, "learning_rate": 1.737683697638538e-05, "loss": 0.9277, "step": 1607 }, { "epoch": 0.26, "grad_norm": 2.9331579379905164, "learning_rate": 1.7373312072059313e-05, "loss": 0.974, "step": 1608 }, { "epoch": 0.26, "grad_norm": 3.6517150664216076, "learning_rate": 1.7369785159039794e-05, "loss": 0.904, "step": 1609 }, { "epoch": 0.26, "grad_norm": 3.3702892346876787, "learning_rate": 1.7366256238287647e-05, "loss": 0.9166, "step": 1610 }, { "epoch": 0.26, "grad_norm": 1.7783880833590764, "learning_rate": 1.736272531076425e-05, "loss": 0.9382, "step": 1611 }, { "epoch": 0.26, "grad_norm": 3.9523625901409036, "learning_rate": 1.735919237743152e-05, "loss": 1.0452, "step": 1612 }, { "epoch": 0.26, "grad_norm": 1.9054044505426413, "learning_rate": 1.7355657439251933e-05, "loss": 0.9427, "step": 1613 }, { "epoch": 0.26, "grad_norm": 2.9398630255625555, "learning_rate": 1.7352120497188497e-05, "loss": 0.9187, "step": 1614 }, { "epoch": 0.26, "grad_norm": 3.2070098393012456, "learning_rate": 1.7348581552204776e-05, "loss": 0.9825, "step": 1615 }, { "epoch": 0.26, "grad_norm": 2.3976595235027243, "learning_rate": 1.734504060526488e-05, "loss": 0.9425, "step": 1616 }, { "epoch": 0.26, "grad_norm": 2.899148612174973, "learning_rate": 1.734149765733345e-05, "loss": 0.9699, "step": 1617 }, { "epoch": 0.26, "grad_norm": 2.2610101462756154, "learning_rate": 1.7337952709375688e-05, "loss": 0.8423, "step": 1618 }, { "epoch": 0.26, "grad_norm": 3.384678138971437, "learning_rate": 1.733440576235734e-05, "loss": 1.0412, "step": 1619 }, { "epoch": 0.26, "grad_norm": 2.717617018074381, "learning_rate": 1.7330856817244686e-05, "loss": 0.9803, "step": 1620 }, { "epoch": 0.26, "grad_norm": 2.1288139673232847, "learning_rate": 1.732730587500456e-05, "loss": 0.3582, "step": 1621 }, { "epoch": 0.26, "grad_norm": 2.9366489557373723, "learning_rate": 1.7323752936604334e-05, "loss": 0.839, "step": 1622 }, { "epoch": 0.26, "grad_norm": 2.3165372711746848, "learning_rate": 1.732019800301193e-05, "loss": 0.9663, "step": 1623 }, { "epoch": 0.26, "grad_norm": 3.4552862386092786, "learning_rate": 1.7316641075195802e-05, "loss": 0.8869, "step": 1624 }, { "epoch": 0.26, "grad_norm": 1.375406681954052, "learning_rate": 1.7313082154124966e-05, "loss": 0.9065, "step": 1625 }, { "epoch": 0.26, "grad_norm": 1.4292767043982757, "learning_rate": 1.7309521240768964e-05, "loss": 0.9108, "step": 1626 }, { "epoch": 0.26, "grad_norm": 2.4198581296146595, "learning_rate": 1.7305958336097888e-05, "loss": 0.9252, "step": 1627 }, { "epoch": 0.26, "grad_norm": 2.1609855287942024, "learning_rate": 1.730239344108237e-05, "loss": 1.0545, "step": 1628 }, { "epoch": 0.26, "grad_norm": 2.56630675240561, "learning_rate": 1.729882655669359e-05, "loss": 0.9597, "step": 1629 }, { "epoch": 0.26, "grad_norm": 3.662433319005564, "learning_rate": 1.729525768390326e-05, "loss": 0.991, "step": 1630 }, { "epoch": 0.26, "grad_norm": 2.6027999546766827, "learning_rate": 1.729168682368364e-05, "loss": 0.9829, "step": 1631 }, { "epoch": 0.26, "grad_norm": 2.8545656876655956, "learning_rate": 1.7288113977007538e-05, "loss": 0.9616, "step": 1632 }, { "epoch": 0.26, "grad_norm": 2.5274085577507543, "learning_rate": 1.7284539144848286e-05, "loss": 0.9645, "step": 1633 }, { "epoch": 0.26, "grad_norm": 2.146269436072279, "learning_rate": 1.7280962328179773e-05, "loss": 1.0172, "step": 1634 }, { "epoch": 0.26, "grad_norm": 4.199883214144314, "learning_rate": 1.727738352797642e-05, "loss": 0.9577, "step": 1635 }, { "epoch": 0.26, "grad_norm": 3.110020709444133, "learning_rate": 1.7273802745213193e-05, "loss": 0.9425, "step": 1636 }, { "epoch": 0.26, "grad_norm": 2.889993794576457, "learning_rate": 1.7270219980865594e-05, "loss": 0.9883, "step": 1637 }, { "epoch": 0.26, "grad_norm": 3.181470816092755, "learning_rate": 1.7266635235909664e-05, "loss": 0.9497, "step": 1638 }, { "epoch": 0.26, "grad_norm": 3.694283662769531, "learning_rate": 1.7263048511321995e-05, "loss": 0.9668, "step": 1639 }, { "epoch": 0.26, "grad_norm": 3.635036124321472, "learning_rate": 1.7259459808079705e-05, "loss": 0.9793, "step": 1640 }, { "epoch": 0.26, "grad_norm": 1.9564911788129349, "learning_rate": 1.7255869127160452e-05, "loss": 0.3409, "step": 1641 }, { "epoch": 0.26, "grad_norm": 3.606817751509291, "learning_rate": 1.725227646954244e-05, "loss": 0.9478, "step": 1642 }, { "epoch": 0.26, "grad_norm": 3.715538240772141, "learning_rate": 1.724868183620441e-05, "loss": 0.9038, "step": 1643 }, { "epoch": 0.26, "grad_norm": 2.1012958797095838, "learning_rate": 1.724508522812563e-05, "loss": 0.9547, "step": 1644 }, { "epoch": 0.27, "grad_norm": 2.28306893769362, "learning_rate": 1.724148664628593e-05, "loss": 0.9603, "step": 1645 }, { "epoch": 0.27, "grad_norm": 2.6362293382597364, "learning_rate": 1.7237886091665653e-05, "loss": 0.3245, "step": 1646 }, { "epoch": 0.27, "grad_norm": 2.9445866264546883, "learning_rate": 1.7234283565245688e-05, "loss": 1.0011, "step": 1647 }, { "epoch": 0.27, "grad_norm": 5.247182907694078, "learning_rate": 1.723067906800747e-05, "loss": 0.9544, "step": 1648 }, { "epoch": 0.27, "grad_norm": 2.692580101358897, "learning_rate": 1.7227072600932952e-05, "loss": 1.0376, "step": 1649 }, { "epoch": 0.27, "grad_norm": 2.5870401388445203, "learning_rate": 1.7223464165004648e-05, "loss": 0.9671, "step": 1650 }, { "epoch": 0.27, "grad_norm": 4.220491855034563, "learning_rate": 1.7219853761205588e-05, "loss": 0.8665, "step": 1651 }, { "epoch": 0.27, "grad_norm": 3.119332323770291, "learning_rate": 1.7216241390519348e-05, "loss": 0.8907, "step": 1652 }, { "epoch": 0.27, "grad_norm": 4.213232656998314, "learning_rate": 1.7212627053930034e-05, "loss": 0.9955, "step": 1653 }, { "epoch": 0.27, "grad_norm": 3.1895734737417434, "learning_rate": 1.7209010752422296e-05, "loss": 0.9265, "step": 1654 }, { "epoch": 0.27, "grad_norm": 2.18861088474582, "learning_rate": 1.7205392486981306e-05, "loss": 0.9162, "step": 1655 }, { "epoch": 0.27, "grad_norm": 3.7964393145974804, "learning_rate": 1.720177225859279e-05, "loss": 0.9215, "step": 1656 }, { "epoch": 0.27, "grad_norm": 2.3308664860400365, "learning_rate": 1.7198150068242992e-05, "loss": 0.939, "step": 1657 }, { "epoch": 0.27, "grad_norm": 1.092308020320463, "learning_rate": 1.71945259169187e-05, "loss": 1.0139, "step": 1658 }, { "epoch": 0.27, "grad_norm": 2.987099088319626, "learning_rate": 1.7190899805607224e-05, "loss": 0.9134, "step": 1659 }, { "epoch": 0.27, "grad_norm": 3.7490612126499467, "learning_rate": 1.7187271735296428e-05, "loss": 0.9224, "step": 1660 }, { "epoch": 0.27, "grad_norm": 3.4709537138714324, "learning_rate": 1.718364170697469e-05, "loss": 1.0004, "step": 1661 }, { "epoch": 0.27, "grad_norm": 3.393598667655658, "learning_rate": 1.7180009721630932e-05, "loss": 0.8956, "step": 1662 }, { "epoch": 0.27, "grad_norm": 3.6637631682436185, "learning_rate": 1.717637578025461e-05, "loss": 0.8729, "step": 1663 }, { "epoch": 0.27, "grad_norm": 4.337484107964065, "learning_rate": 1.7172739883835705e-05, "loss": 0.9395, "step": 1664 }, { "epoch": 0.27, "grad_norm": 3.2358804783385886, "learning_rate": 1.716910203336474e-05, "loss": 0.9493, "step": 1665 }, { "epoch": 0.27, "grad_norm": 3.277290643891471, "learning_rate": 1.716546222983276e-05, "loss": 0.9273, "step": 1666 }, { "epoch": 0.27, "grad_norm": 1.8864748463433672, "learning_rate": 1.7161820474231355e-05, "loss": 0.9235, "step": 1667 }, { "epoch": 0.27, "grad_norm": 2.451993547695563, "learning_rate": 1.7158176767552636e-05, "loss": 1.0253, "step": 1668 }, { "epoch": 0.27, "grad_norm": 3.075182132444366, "learning_rate": 1.715453111078925e-05, "loss": 0.8916, "step": 1669 }, { "epoch": 0.27, "grad_norm": 2.652087012381048, "learning_rate": 1.715088350493437e-05, "loss": 0.9509, "step": 1670 }, { "epoch": 0.27, "grad_norm": 3.1076314820687356, "learning_rate": 1.714723395098171e-05, "loss": 0.9626, "step": 1671 }, { "epoch": 0.27, "grad_norm": 2.615405213158396, "learning_rate": 1.7143582449925508e-05, "loss": 0.9066, "step": 1672 }, { "epoch": 0.27, "grad_norm": 2.0478776412562176, "learning_rate": 1.7139929002760532e-05, "loss": 0.9492, "step": 1673 }, { "epoch": 0.27, "grad_norm": 4.424090474077937, "learning_rate": 1.713627361048208e-05, "loss": 0.8949, "step": 1674 }, { "epoch": 0.27, "grad_norm": 3.2718504925732708, "learning_rate": 1.713261627408599e-05, "loss": 0.9779, "step": 1675 }, { "epoch": 0.27, "grad_norm": 3.921328928344321, "learning_rate": 1.7128956994568612e-05, "loss": 1.0286, "step": 1676 }, { "epoch": 0.27, "grad_norm": 2.8724715683130095, "learning_rate": 1.7125295772926834e-05, "loss": 0.9357, "step": 1677 }, { "epoch": 0.27, "grad_norm": 2.643684285386506, "learning_rate": 1.7121632610158083e-05, "loss": 0.8791, "step": 1678 }, { "epoch": 0.27, "grad_norm": 2.2260473893597355, "learning_rate": 1.7117967507260298e-05, "loss": 0.9598, "step": 1679 }, { "epoch": 0.27, "grad_norm": 2.2620349722265947, "learning_rate": 1.7114300465231955e-05, "loss": 0.3558, "step": 1680 }, { "epoch": 0.27, "grad_norm": 3.121149055458839, "learning_rate": 1.711063148507206e-05, "loss": 1.0401, "step": 1681 }, { "epoch": 0.27, "grad_norm": 3.2464768313367416, "learning_rate": 1.710696056778014e-05, "loss": 0.9377, "step": 1682 }, { "epoch": 0.27, "grad_norm": 2.9499253796983704, "learning_rate": 1.710328771435626e-05, "loss": 0.9559, "step": 1683 }, { "epoch": 0.27, "grad_norm": 2.490517466292587, "learning_rate": 1.7099612925801005e-05, "loss": 1.0338, "step": 1684 }, { "epoch": 0.27, "grad_norm": 3.064889248314065, "learning_rate": 1.7095936203115484e-05, "loss": 0.9052, "step": 1685 }, { "epoch": 0.27, "grad_norm": 2.878053784809595, "learning_rate": 1.709225754730134e-05, "loss": 0.9756, "step": 1686 }, { "epoch": 0.27, "grad_norm": 1.7648612201800349, "learning_rate": 1.7088576959360743e-05, "loss": 0.9072, "step": 1687 }, { "epoch": 0.27, "grad_norm": 1.6156279189417313, "learning_rate": 1.7084894440296383e-05, "loss": 0.942, "step": 1688 }, { "epoch": 0.27, "grad_norm": 3.7180527746714005, "learning_rate": 1.7081209991111483e-05, "loss": 0.9195, "step": 1689 }, { "epoch": 0.27, "grad_norm": 3.741919139565405, "learning_rate": 1.7077523612809784e-05, "loss": 0.9365, "step": 1690 }, { "epoch": 0.27, "grad_norm": 2.318944148652376, "learning_rate": 1.707383530639556e-05, "loss": 0.336, "step": 1691 }, { "epoch": 0.27, "grad_norm": 2.240496060586508, "learning_rate": 1.707014507287361e-05, "loss": 0.9538, "step": 1692 }, { "epoch": 0.27, "grad_norm": 3.971267742613246, "learning_rate": 1.706645291324925e-05, "loss": 0.8796, "step": 1693 }, { "epoch": 0.27, "grad_norm": 2.907256905650772, "learning_rate": 1.706275882852833e-05, "loss": 0.8829, "step": 1694 }, { "epoch": 0.27, "grad_norm": 1.8237979901949959, "learning_rate": 1.705906281971722e-05, "loss": 1.1217, "step": 1695 }, { "epoch": 0.27, "grad_norm": 2.891282125476786, "learning_rate": 1.705536488782281e-05, "loss": 0.9257, "step": 1696 }, { "epoch": 0.27, "grad_norm": 2.419169040231551, "learning_rate": 1.7051665033852525e-05, "loss": 1.0309, "step": 1697 }, { "epoch": 0.27, "grad_norm": 3.3082771014632253, "learning_rate": 1.70479632588143e-05, "loss": 0.8967, "step": 1698 }, { "epoch": 0.27, "grad_norm": 2.444303944432057, "learning_rate": 1.704425956371661e-05, "loss": 0.9868, "step": 1699 }, { "epoch": 0.27, "grad_norm": 3.1717580388495645, "learning_rate": 1.704055394956844e-05, "loss": 0.8713, "step": 1700 }, { "epoch": 0.27, "grad_norm": 3.3135693821272096, "learning_rate": 1.7036846417379295e-05, "loss": 0.9164, "step": 1701 }, { "epoch": 0.27, "grad_norm": 2.5991037107692945, "learning_rate": 1.7033136968159218e-05, "loss": 1.0185, "step": 1702 }, { "epoch": 0.27, "grad_norm": 2.9971924996476034, "learning_rate": 1.7029425602918758e-05, "loss": 0.9847, "step": 1703 }, { "epoch": 0.27, "grad_norm": 3.2817886637001656, "learning_rate": 1.7025712322669e-05, "loss": 0.9478, "step": 1704 }, { "epoch": 0.27, "grad_norm": 3.9552839211504933, "learning_rate": 1.7021997128421537e-05, "loss": 0.9386, "step": 1705 }, { "epoch": 0.27, "grad_norm": 2.1745154707080454, "learning_rate": 1.7018280021188497e-05, "loss": 0.9518, "step": 1706 }, { "epoch": 0.28, "grad_norm": 3.690304223315487, "learning_rate": 1.701456100198252e-05, "loss": 0.9527, "step": 1707 }, { "epoch": 0.28, "grad_norm": 2.9537370642896597, "learning_rate": 1.7010840071816765e-05, "loss": 1.0317, "step": 1708 }, { "epoch": 0.28, "grad_norm": 2.4351717270722473, "learning_rate": 1.7007117231704923e-05, "loss": 0.9736, "step": 1709 }, { "epoch": 0.28, "grad_norm": 2.654753409247253, "learning_rate": 1.7003392482661188e-05, "loss": 1.0041, "step": 1710 }, { "epoch": 0.28, "grad_norm": 2.362144915826088, "learning_rate": 1.69996658257003e-05, "loss": 0.9076, "step": 1711 }, { "epoch": 0.28, "grad_norm": 2.661110951745071, "learning_rate": 1.6995937261837487e-05, "loss": 0.9356, "step": 1712 }, { "epoch": 0.28, "grad_norm": 3.0039827612370833, "learning_rate": 1.6992206792088525e-05, "loss": 0.9854, "step": 1713 }, { "epoch": 0.28, "grad_norm": 2.8889787264654987, "learning_rate": 1.6988474417469686e-05, "loss": 0.9226, "step": 1714 }, { "epoch": 0.28, "grad_norm": 3.0125707467663663, "learning_rate": 1.6984740138997784e-05, "loss": 0.9106, "step": 1715 }, { "epoch": 0.28, "grad_norm": 3.6118945290848954, "learning_rate": 1.6981003957690128e-05, "loss": 0.9597, "step": 1716 }, { "epoch": 0.28, "grad_norm": 1.7501008822109796, "learning_rate": 1.6977265874564562e-05, "loss": 0.9673, "step": 1717 }, { "epoch": 0.28, "grad_norm": 2.997567621252594, "learning_rate": 1.6973525890639445e-05, "loss": 0.9984, "step": 1718 }, { "epoch": 0.28, "grad_norm": 4.277078197022166, "learning_rate": 1.6969784006933647e-05, "loss": 0.9237, "step": 1719 }, { "epoch": 0.28, "grad_norm": 3.0170393873355197, "learning_rate": 1.696604022446656e-05, "loss": 0.9093, "step": 1720 }, { "epoch": 0.28, "grad_norm": 3.444648384564981, "learning_rate": 1.69622945442581e-05, "loss": 1.0064, "step": 1721 }, { "epoch": 0.28, "grad_norm": 2.31667960549819, "learning_rate": 1.6958546967328688e-05, "loss": 0.9734, "step": 1722 }, { "epoch": 0.28, "grad_norm": 3.213267870614773, "learning_rate": 1.6954797494699263e-05, "loss": 0.9391, "step": 1723 }, { "epoch": 0.28, "grad_norm": 2.279350639696333, "learning_rate": 1.69510461273913e-05, "loss": 0.9712, "step": 1724 }, { "epoch": 0.28, "grad_norm": 1.824727152978097, "learning_rate": 1.6947292866426757e-05, "loss": 0.9205, "step": 1725 }, { "epoch": 0.28, "grad_norm": 3.6621615881694476, "learning_rate": 1.6943537712828138e-05, "loss": 0.9342, "step": 1726 }, { "epoch": 0.28, "grad_norm": 2.9956318525350962, "learning_rate": 1.6939780667618445e-05, "loss": 0.9613, "step": 1727 }, { "epoch": 0.28, "grad_norm": 1.3922509963500722, "learning_rate": 1.6936021731821202e-05, "loss": 0.9362, "step": 1728 }, { "epoch": 0.28, "grad_norm": 3.630221848027136, "learning_rate": 1.6932260906460448e-05, "loss": 1.0305, "step": 1729 }, { "epoch": 0.28, "grad_norm": 3.0063920271032325, "learning_rate": 1.6928498192560737e-05, "loss": 0.8745, "step": 1730 }, { "epoch": 0.28, "grad_norm": 1.9279444926886322, "learning_rate": 1.6924733591147127e-05, "loss": 0.9245, "step": 1731 }, { "epoch": 0.28, "grad_norm": 2.9087839872295076, "learning_rate": 1.692096710324521e-05, "loss": 0.9525, "step": 1732 }, { "epoch": 0.28, "grad_norm": 2.4342273358867534, "learning_rate": 1.6917198729881073e-05, "loss": 0.9477, "step": 1733 }, { "epoch": 0.28, "grad_norm": 2.900636563646889, "learning_rate": 1.6913428472081328e-05, "loss": 1.0419, "step": 1734 }, { "epoch": 0.28, "grad_norm": 2.512805377374656, "learning_rate": 1.69096563308731e-05, "loss": 0.991, "step": 1735 }, { "epoch": 0.28, "grad_norm": 3.2108769097841723, "learning_rate": 1.690588230728402e-05, "loss": 0.8908, "step": 1736 }, { "epoch": 0.28, "grad_norm": 3.3866506039496342, "learning_rate": 1.690210640234224e-05, "loss": 0.9662, "step": 1737 }, { "epoch": 0.28, "grad_norm": 2.7336163149477715, "learning_rate": 1.6898328617076418e-05, "loss": 0.9494, "step": 1738 }, { "epoch": 0.28, "grad_norm": 2.2071629307716303, "learning_rate": 1.6894548952515726e-05, "loss": 0.9074, "step": 1739 }, { "epoch": 0.28, "grad_norm": 2.330879681094251, "learning_rate": 1.689076740968985e-05, "loss": 0.3194, "step": 1740 }, { "epoch": 0.28, "grad_norm": 3.0534331338999783, "learning_rate": 1.6886983989628985e-05, "loss": 0.9785, "step": 1741 }, { "epoch": 0.28, "grad_norm": 3.0325740793210354, "learning_rate": 1.6883198693363843e-05, "loss": 0.9349, "step": 1742 }, { "epoch": 0.28, "grad_norm": 1.4866926069662256, "learning_rate": 1.687941152192564e-05, "loss": 0.896, "step": 1743 }, { "epoch": 0.28, "grad_norm": 1.9503843075516052, "learning_rate": 1.6875622476346107e-05, "loss": 0.9885, "step": 1744 }, { "epoch": 0.28, "grad_norm": 4.199647961349568, "learning_rate": 1.687183155765748e-05, "loss": 0.9594, "step": 1745 }, { "epoch": 0.28, "grad_norm": 3.6168721599182083, "learning_rate": 1.686803876689252e-05, "loss": 0.8942, "step": 1746 }, { "epoch": 0.28, "grad_norm": 3.565588833410671, "learning_rate": 1.6864244105084477e-05, "loss": 0.8777, "step": 1747 }, { "epoch": 0.28, "grad_norm": 3.3414385640845277, "learning_rate": 1.6860447573267125e-05, "loss": 0.9235, "step": 1748 }, { "epoch": 0.28, "grad_norm": 2.6811125500000594, "learning_rate": 1.6856649172474747e-05, "loss": 0.9686, "step": 1749 }, { "epoch": 0.28, "grad_norm": 2.3124909271887053, "learning_rate": 1.6852848903742125e-05, "loss": 0.9266, "step": 1750 }, { "epoch": 0.28, "grad_norm": 2.6504928130631877, "learning_rate": 1.6849046768104567e-05, "loss": 1.015, "step": 1751 }, { "epoch": 0.28, "grad_norm": 3.0513624743930827, "learning_rate": 1.6845242766597873e-05, "loss": 0.8876, "step": 1752 }, { "epoch": 0.28, "grad_norm": 3.8944211770192645, "learning_rate": 1.684143690025836e-05, "loss": 0.9963, "step": 1753 }, { "epoch": 0.28, "grad_norm": 2.7398006125807766, "learning_rate": 1.6837629170122846e-05, "loss": 0.8657, "step": 1754 }, { "epoch": 0.28, "grad_norm": 3.2383427999465746, "learning_rate": 1.683381957722867e-05, "loss": 0.9301, "step": 1755 }, { "epoch": 0.28, "grad_norm": 3.15561027464488, "learning_rate": 1.6830008122613665e-05, "loss": 1.0062, "step": 1756 }, { "epoch": 0.28, "grad_norm": 3.007599582815059, "learning_rate": 1.6826194807316177e-05, "loss": 0.9376, "step": 1757 }, { "epoch": 0.28, "grad_norm": 1.7945131785072808, "learning_rate": 1.6822379632375055e-05, "loss": 0.9584, "step": 1758 }, { "epoch": 0.28, "grad_norm": 3.828292531609956, "learning_rate": 1.6818562598829666e-05, "loss": 0.8815, "step": 1759 }, { "epoch": 0.28, "grad_norm": 2.9057821902241616, "learning_rate": 1.681474370771987e-05, "loss": 1.0016, "step": 1760 }, { "epoch": 0.28, "grad_norm": 2.3852459597967686, "learning_rate": 1.6810922960086037e-05, "loss": 0.8876, "step": 1761 }, { "epoch": 0.28, "grad_norm": 3.7487255473927794, "learning_rate": 1.6807100356969043e-05, "loss": 0.9106, "step": 1762 }, { "epoch": 0.28, "grad_norm": 1.6735159405899944, "learning_rate": 1.6803275899410277e-05, "loss": 0.3627, "step": 1763 }, { "epoch": 0.28, "grad_norm": 3.8964069100617373, "learning_rate": 1.6799449588451618e-05, "loss": 0.9426, "step": 1764 }, { "epoch": 0.28, "grad_norm": 3.328132880116478, "learning_rate": 1.6795621425135465e-05, "loss": 0.9657, "step": 1765 }, { "epoch": 0.28, "grad_norm": 2.1592655932565776, "learning_rate": 1.679179141050472e-05, "loss": 0.9335, "step": 1766 }, { "epoch": 0.28, "grad_norm": 3.661760139597662, "learning_rate": 1.678795954560277e-05, "loss": 0.9376, "step": 1767 }, { "epoch": 0.28, "grad_norm": 3.7139512844228864, "learning_rate": 1.678412583147353e-05, "loss": 0.9057, "step": 1768 }, { "epoch": 0.29, "grad_norm": 2.546632251989631, "learning_rate": 1.678029026916141e-05, "loss": 0.8956, "step": 1769 }, { "epoch": 0.29, "grad_norm": 2.2762571068474204, "learning_rate": 1.677645285971132e-05, "loss": 0.9491, "step": 1770 }, { "epoch": 0.29, "grad_norm": 2.0682736127758066, "learning_rate": 1.6772613604168677e-05, "loss": 0.9576, "step": 1771 }, { "epoch": 0.29, "grad_norm": 3.436688830294645, "learning_rate": 1.67687725035794e-05, "loss": 0.9345, "step": 1772 }, { "epoch": 0.29, "grad_norm": 3.6673110193450817, "learning_rate": 1.676492955898991e-05, "loss": 0.8833, "step": 1773 }, { "epoch": 0.29, "grad_norm": 1.7574192539728872, "learning_rate": 1.6761084771447133e-05, "loss": 0.9058, "step": 1774 }, { "epoch": 0.29, "grad_norm": 3.067339440506403, "learning_rate": 1.675723814199849e-05, "loss": 0.9565, "step": 1775 }, { "epoch": 0.29, "grad_norm": 1.8701414101727625, "learning_rate": 1.675338967169192e-05, "loss": 0.3077, "step": 1776 }, { "epoch": 0.29, "grad_norm": 3.694800053040191, "learning_rate": 1.674953936157584e-05, "loss": 0.9909, "step": 1777 }, { "epoch": 0.29, "grad_norm": 3.603789925408673, "learning_rate": 1.6745687212699188e-05, "loss": 0.8895, "step": 1778 }, { "epoch": 0.29, "grad_norm": 3.151710645213152, "learning_rate": 1.674183322611139e-05, "loss": 0.9758, "step": 1779 }, { "epoch": 0.29, "grad_norm": 3.5304237681944395, "learning_rate": 1.6737977402862383e-05, "loss": 0.9908, "step": 1780 }, { "epoch": 0.29, "grad_norm": 2.6560971384381973, "learning_rate": 1.67341197440026e-05, "loss": 0.964, "step": 1781 }, { "epoch": 0.29, "grad_norm": 3.5625250296800957, "learning_rate": 1.673026025058297e-05, "loss": 0.9442, "step": 1782 }, { "epoch": 0.29, "grad_norm": 3.975598171499231, "learning_rate": 1.672639892365493e-05, "loss": 1.0097, "step": 1783 }, { "epoch": 0.29, "grad_norm": 2.8153560124660455, "learning_rate": 1.6722535764270405e-05, "loss": 0.8823, "step": 1784 }, { "epoch": 0.29, "grad_norm": 2.1111323065558185, "learning_rate": 1.671867077348183e-05, "loss": 0.9201, "step": 1785 }, { "epoch": 0.29, "grad_norm": 4.324078923749931, "learning_rate": 1.6714803952342137e-05, "loss": 0.8906, "step": 1786 }, { "epoch": 0.29, "grad_norm": 3.367094317428019, "learning_rate": 1.671093530190475e-05, "loss": 0.916, "step": 1787 }, { "epoch": 0.29, "grad_norm": 2.4965786410688646, "learning_rate": 1.6707064823223603e-05, "loss": 0.9731, "step": 1788 }, { "epoch": 0.29, "grad_norm": 3.257173544158361, "learning_rate": 1.6703192517353116e-05, "loss": 0.8708, "step": 1789 }, { "epoch": 0.29, "grad_norm": 2.3159992445844035, "learning_rate": 1.6699318385348207e-05, "loss": 1.0153, "step": 1790 }, { "epoch": 0.29, "grad_norm": 3.2595836481185803, "learning_rate": 1.6695442428264308e-05, "loss": 0.9503, "step": 1791 }, { "epoch": 0.29, "grad_norm": 4.245386424454709, "learning_rate": 1.669156464715733e-05, "loss": 0.9682, "step": 1792 }, { "epoch": 0.29, "grad_norm": 2.2401035739590722, "learning_rate": 1.668768504308369e-05, "loss": 0.9341, "step": 1793 }, { "epoch": 0.29, "grad_norm": 3.299915087214322, "learning_rate": 1.6683803617100293e-05, "loss": 0.9625, "step": 1794 }, { "epoch": 0.29, "grad_norm": 2.663504623217157, "learning_rate": 1.667992037026455e-05, "loss": 0.9884, "step": 1795 }, { "epoch": 0.29, "grad_norm": 4.118156073067007, "learning_rate": 1.6676035303634366e-05, "loss": 0.9571, "step": 1796 }, { "epoch": 0.29, "grad_norm": 1.8072561504234557, "learning_rate": 1.6672148418268143e-05, "loss": 0.3296, "step": 1797 }, { "epoch": 0.29, "grad_norm": 3.1925702627839687, "learning_rate": 1.6668259715224767e-05, "loss": 0.9595, "step": 1798 }, { "epoch": 0.29, "grad_norm": 2.790978978038579, "learning_rate": 1.6664369195563635e-05, "loss": 0.9201, "step": 1799 }, { "epoch": 0.29, "grad_norm": 4.014734785505665, "learning_rate": 1.666047686034463e-05, "loss": 0.8766, "step": 1800 }, { "epoch": 0.29, "grad_norm": 3.4295060581187693, "learning_rate": 1.6656582710628133e-05, "loss": 0.927, "step": 1801 }, { "epoch": 0.29, "grad_norm": 4.29579354288424, "learning_rate": 1.665268674747501e-05, "loss": 0.9616, "step": 1802 }, { "epoch": 0.29, "grad_norm": 4.441337752673759, "learning_rate": 1.664878897194664e-05, "loss": 0.9464, "step": 1803 }, { "epoch": 0.29, "grad_norm": 3.404709353821054, "learning_rate": 1.6644889385104875e-05, "loss": 1.0089, "step": 1804 }, { "epoch": 0.29, "grad_norm": 2.8311127955725968, "learning_rate": 1.6640987988012077e-05, "loss": 1.003, "step": 1805 }, { "epoch": 0.29, "grad_norm": 3.7585911568613093, "learning_rate": 1.663708478173109e-05, "loss": 0.9989, "step": 1806 }, { "epoch": 0.29, "grad_norm": 2.0754140509718035, "learning_rate": 1.6633179767325258e-05, "loss": 0.9545, "step": 1807 }, { "epoch": 0.29, "grad_norm": 3.0429637355542765, "learning_rate": 1.6629272945858413e-05, "loss": 0.9281, "step": 1808 }, { "epoch": 0.29, "grad_norm": 3.6677612636003967, "learning_rate": 1.662536431839488e-05, "loss": 0.9983, "step": 1809 }, { "epoch": 0.29, "grad_norm": 3.131664950654712, "learning_rate": 1.6621453885999473e-05, "loss": 0.9498, "step": 1810 }, { "epoch": 0.29, "grad_norm": 1.4721605999763758, "learning_rate": 1.6617541649737514e-05, "loss": 0.3305, "step": 1811 }, { "epoch": 0.29, "grad_norm": 3.0315177248676153, "learning_rate": 1.661362761067479e-05, "loss": 0.9445, "step": 1812 }, { "epoch": 0.29, "grad_norm": 2.2620163163423843, "learning_rate": 1.66097117698776e-05, "loss": 0.9639, "step": 1813 }, { "epoch": 0.29, "grad_norm": 2.908955125186554, "learning_rate": 1.6605794128412725e-05, "loss": 0.8986, "step": 1814 }, { "epoch": 0.29, "grad_norm": 3.294805374301919, "learning_rate": 1.6601874687347443e-05, "loss": 0.9089, "step": 1815 }, { "epoch": 0.29, "grad_norm": 4.116095903085646, "learning_rate": 1.6597953447749514e-05, "loss": 0.9007, "step": 1816 }, { "epoch": 0.29, "grad_norm": 3.3395466276369943, "learning_rate": 1.659403041068719e-05, "loss": 0.8572, "step": 1817 }, { "epoch": 0.29, "grad_norm": 1.9612359659484218, "learning_rate": 1.6590105577229216e-05, "loss": 0.9063, "step": 1818 }, { "epoch": 0.29, "grad_norm": 2.7097506604792576, "learning_rate": 1.6586178948444828e-05, "loss": 0.8798, "step": 1819 }, { "epoch": 0.29, "grad_norm": 3.022884190124441, "learning_rate": 1.6582250525403748e-05, "loss": 1.0713, "step": 1820 }, { "epoch": 0.29, "grad_norm": 1.6401394080014795, "learning_rate": 1.6578320309176175e-05, "loss": 0.9886, "step": 1821 }, { "epoch": 0.29, "grad_norm": 2.938978107382535, "learning_rate": 1.6574388300832827e-05, "loss": 0.9681, "step": 1822 }, { "epoch": 0.29, "grad_norm": 1.9004741779711856, "learning_rate": 1.6570454501444878e-05, "loss": 1.0369, "step": 1823 }, { "epoch": 0.29, "grad_norm": 2.7402120142029123, "learning_rate": 1.6566518912084e-05, "loss": 0.943, "step": 1824 }, { "epoch": 0.29, "grad_norm": 4.056589610220273, "learning_rate": 1.6562581533822374e-05, "loss": 0.9407, "step": 1825 }, { "epoch": 0.29, "grad_norm": 2.1365571736783653, "learning_rate": 1.6558642367732635e-05, "loss": 0.9467, "step": 1826 }, { "epoch": 0.29, "grad_norm": 2.7931148037369047, "learning_rate": 1.6554701414887927e-05, "loss": 0.9497, "step": 1827 }, { "epoch": 0.29, "grad_norm": 3.510497291310706, "learning_rate": 1.6550758676361872e-05, "loss": 0.9931, "step": 1828 }, { "epoch": 0.29, "grad_norm": 2.879473150330477, "learning_rate": 1.6546814153228576e-05, "loss": 0.9528, "step": 1829 }, { "epoch": 0.29, "grad_norm": 3.373164066118867, "learning_rate": 1.6542867846562644e-05, "loss": 0.983, "step": 1830 }, { "epoch": 0.3, "grad_norm": 2.763875548065892, "learning_rate": 1.653891975743916e-05, "loss": 0.9499, "step": 1831 }, { "epoch": 0.3, "grad_norm": 2.202138476973039, "learning_rate": 1.653496988693368e-05, "loss": 0.924, "step": 1832 }, { "epoch": 0.3, "grad_norm": 1.880678035435779, "learning_rate": 1.653101823612227e-05, "loss": 0.9662, "step": 1833 }, { "epoch": 0.3, "grad_norm": 2.807682658883369, "learning_rate": 1.652706480608146e-05, "loss": 0.975, "step": 1834 }, { "epoch": 0.3, "grad_norm": 2.684907683165356, "learning_rate": 1.652310959788828e-05, "loss": 0.9383, "step": 1835 }, { "epoch": 0.3, "grad_norm": 3.4676465178037574, "learning_rate": 1.6519152612620236e-05, "loss": 1.0517, "step": 1836 }, { "epoch": 0.3, "grad_norm": 3.0002905387062753, "learning_rate": 1.6515193851355314e-05, "loss": 0.8999, "step": 1837 }, { "epoch": 0.3, "grad_norm": 2.5916748938067036, "learning_rate": 1.6511233315172e-05, "loss": 0.9013, "step": 1838 }, { "epoch": 0.3, "grad_norm": 2.031089659011117, "learning_rate": 1.6507271005149246e-05, "loss": 0.9148, "step": 1839 }, { "epoch": 0.3, "grad_norm": 2.5051080970511785, "learning_rate": 1.6503306922366497e-05, "loss": 0.8677, "step": 1840 }, { "epoch": 0.3, "grad_norm": 3.6648874879145015, "learning_rate": 1.6499341067903675e-05, "loss": 0.985, "step": 1841 }, { "epoch": 0.3, "grad_norm": 2.2285997139180216, "learning_rate": 1.6495373442841192e-05, "loss": 0.9573, "step": 1842 }, { "epoch": 0.3, "grad_norm": 4.425943289178414, "learning_rate": 1.649140404825994e-05, "loss": 0.9534, "step": 1843 }, { "epoch": 0.3, "grad_norm": 2.1944826125461963, "learning_rate": 1.6487432885241287e-05, "loss": 0.3414, "step": 1844 }, { "epoch": 0.3, "grad_norm": 3.0052047720656074, "learning_rate": 1.6483459954867086e-05, "loss": 0.9616, "step": 1845 }, { "epoch": 0.3, "grad_norm": 4.593135040018921, "learning_rate": 1.6479485258219677e-05, "loss": 0.9521, "step": 1846 }, { "epoch": 0.3, "grad_norm": 2.9652279490548374, "learning_rate": 1.6475508796381875e-05, "loss": 0.935, "step": 1847 }, { "epoch": 0.3, "grad_norm": 2.5640058977294315, "learning_rate": 1.6471530570436983e-05, "loss": 1.0223, "step": 1848 }, { "epoch": 0.3, "grad_norm": 2.55589550516908, "learning_rate": 1.646755058146877e-05, "loss": 0.8762, "step": 1849 }, { "epoch": 0.3, "grad_norm": 1.7437805966116444, "learning_rate": 1.64635688305615e-05, "loss": 0.3419, "step": 1850 }, { "epoch": 0.3, "grad_norm": 3.3766351200398774, "learning_rate": 1.6459585318799914e-05, "loss": 0.8397, "step": 1851 }, { "epoch": 0.3, "grad_norm": 1.9520361955366767, "learning_rate": 1.6455600047269228e-05, "loss": 0.9282, "step": 1852 }, { "epoch": 0.3, "grad_norm": 2.210488641150144, "learning_rate": 1.6451613017055136e-05, "loss": 0.3501, "step": 1853 }, { "epoch": 0.3, "grad_norm": 1.7983922772061838, "learning_rate": 1.644762422924382e-05, "loss": 1.0258, "step": 1854 }, { "epoch": 0.3, "grad_norm": 2.004143594885276, "learning_rate": 1.6443633684921934e-05, "loss": 0.3088, "step": 1855 }, { "epoch": 0.3, "grad_norm": 3.1012516202234344, "learning_rate": 1.643964138517661e-05, "loss": 1.0322, "step": 1856 }, { "epoch": 0.3, "grad_norm": 3.4128005010741305, "learning_rate": 1.6435647331095466e-05, "loss": 0.927, "step": 1857 }, { "epoch": 0.3, "grad_norm": 1.095710305583089, "learning_rate": 1.643165152376659e-05, "loss": 1.0106, "step": 1858 }, { "epoch": 0.3, "grad_norm": 2.7065239853514083, "learning_rate": 1.6427653964278547e-05, "loss": 0.9666, "step": 1859 }, { "epoch": 0.3, "grad_norm": 2.28125606170267, "learning_rate": 1.6423654653720385e-05, "loss": 0.9906, "step": 1860 }, { "epoch": 0.3, "grad_norm": 4.817389048400839, "learning_rate": 1.6419653593181628e-05, "loss": 0.8091, "step": 1861 }, { "epoch": 0.3, "grad_norm": 2.25042180240212, "learning_rate": 1.6415650783752274e-05, "loss": 0.9466, "step": 1862 }, { "epoch": 0.3, "grad_norm": 3.5642935270348866, "learning_rate": 1.6411646226522793e-05, "loss": 0.9747, "step": 1863 }, { "epoch": 0.3, "grad_norm": 2.5451767765247637, "learning_rate": 1.6407639922584148e-05, "loss": 0.9597, "step": 1864 }, { "epoch": 0.3, "grad_norm": 3.347670351441639, "learning_rate": 1.640363187302776e-05, "loss": 1.0042, "step": 1865 }, { "epoch": 0.3, "grad_norm": 2.257666031993621, "learning_rate": 1.639962207894553e-05, "loss": 1.0132, "step": 1866 }, { "epoch": 0.3, "grad_norm": 2.7516721496743055, "learning_rate": 1.6395610541429836e-05, "loss": 0.9102, "step": 1867 }, { "epoch": 0.3, "grad_norm": 3.4149118584670135, "learning_rate": 1.639159726157354e-05, "loss": 0.9005, "step": 1868 }, { "epoch": 0.3, "grad_norm": 3.1320539588323824, "learning_rate": 1.6387582240469963e-05, "loss": 0.9521, "step": 1869 }, { "epoch": 0.3, "grad_norm": 2.750904888049473, "learning_rate": 1.638356547921291e-05, "loss": 0.9639, "step": 1870 }, { "epoch": 0.3, "grad_norm": 2.3711645376583292, "learning_rate": 1.6379546978896655e-05, "loss": 0.3259, "step": 1871 }, { "epoch": 0.3, "grad_norm": 3.694253588330143, "learning_rate": 1.637552674061595e-05, "loss": 0.9299, "step": 1872 }, { "epoch": 0.3, "grad_norm": 2.0477279167339786, "learning_rate": 1.6371504765466014e-05, "loss": 0.2818, "step": 1873 }, { "epoch": 0.3, "grad_norm": 2.408323905248788, "learning_rate": 1.6367481054542556e-05, "loss": 0.9815, "step": 1874 }, { "epoch": 0.3, "grad_norm": 3.1746977474491476, "learning_rate": 1.6363455608941734e-05, "loss": 0.9638, "step": 1875 }, { "epoch": 0.3, "grad_norm": 1.8973262972729188, "learning_rate": 1.6359428429760192e-05, "loss": 0.9718, "step": 1876 }, { "epoch": 0.3, "grad_norm": 2.717981339458663, "learning_rate": 1.635539951809505e-05, "loss": 0.9311, "step": 1877 }, { "epoch": 0.3, "grad_norm": 2.8021469298074373, "learning_rate": 1.6351368875043893e-05, "loss": 0.8895, "step": 1878 }, { "epoch": 0.3, "grad_norm": 3.5514380054328845, "learning_rate": 1.6347336501704777e-05, "loss": 0.9809, "step": 1879 }, { "epoch": 0.3, "grad_norm": 2.808270304469694, "learning_rate": 1.6343302399176235e-05, "loss": 0.9462, "step": 1880 }, { "epoch": 0.3, "grad_norm": 1.8145401095101195, "learning_rate": 1.633926656855726e-05, "loss": 0.3444, "step": 1881 }, { "epoch": 0.3, "grad_norm": 4.05327368399582, "learning_rate": 1.6335229010947333e-05, "loss": 0.9592, "step": 1882 }, { "epoch": 0.3, "grad_norm": 3.6730285131280906, "learning_rate": 1.6331189727446393e-05, "loss": 0.9681, "step": 1883 }, { "epoch": 0.3, "grad_norm": 3.184410692488495, "learning_rate": 1.6327148719154845e-05, "loss": 0.9824, "step": 1884 }, { "epoch": 0.3, "grad_norm": 3.037665744859074, "learning_rate": 1.632310598717358e-05, "loss": 0.9453, "step": 1885 }, { "epoch": 0.3, "grad_norm": 4.604834437671376, "learning_rate": 1.6319061532603947e-05, "loss": 0.8489, "step": 1886 }, { "epoch": 0.3, "grad_norm": 3.7694572480997133, "learning_rate": 1.631501535654777e-05, "loss": 0.9334, "step": 1887 }, { "epoch": 0.3, "grad_norm": 2.394327027416073, "learning_rate": 1.6310967460107328e-05, "loss": 0.9826, "step": 1888 }, { "epoch": 0.3, "grad_norm": 4.201653772556525, "learning_rate": 1.630691784438539e-05, "loss": 0.94, "step": 1889 }, { "epoch": 0.3, "grad_norm": 1.5910451038232394, "learning_rate": 1.630286651048518e-05, "loss": 0.2946, "step": 1890 }, { "epoch": 0.3, "grad_norm": 3.713337140472305, "learning_rate": 1.6298813459510396e-05, "loss": 1.0442, "step": 1891 }, { "epoch": 0.3, "grad_norm": 2.787064207762957, "learning_rate": 1.6294758692565197e-05, "loss": 0.9857, "step": 1892 }, { "epoch": 0.31, "grad_norm": 2.9997897074427393, "learning_rate": 1.6290702210754213e-05, "loss": 0.9777, "step": 1893 }, { "epoch": 0.31, "grad_norm": 2.8458489018726976, "learning_rate": 1.628664401518254e-05, "loss": 0.9296, "step": 1894 }, { "epoch": 0.31, "grad_norm": 2.6692967160838625, "learning_rate": 1.6282584106955747e-05, "loss": 0.924, "step": 1895 }, { "epoch": 0.31, "grad_norm": 3.638196536358421, "learning_rate": 1.6278522487179867e-05, "loss": 0.9545, "step": 1896 }, { "epoch": 0.31, "grad_norm": 3.8602970890493205, "learning_rate": 1.6274459156961388e-05, "loss": 0.9138, "step": 1897 }, { "epoch": 0.31, "grad_norm": 3.040535302716117, "learning_rate": 1.6270394117407277e-05, "loss": 0.9183, "step": 1898 }, { "epoch": 0.31, "grad_norm": 3.0418725836740133, "learning_rate": 1.626632736962497e-05, "loss": 0.9347, "step": 1899 }, { "epoch": 0.31, "grad_norm": 4.680819417507101, "learning_rate": 1.6262258914722352e-05, "loss": 0.9697, "step": 1900 }, { "epoch": 0.31, "grad_norm": 2.704607694738774, "learning_rate": 1.6258188753807783e-05, "loss": 0.9669, "step": 1901 }, { "epoch": 0.31, "grad_norm": 3.027137719803677, "learning_rate": 1.625411688799009e-05, "loss": 0.8994, "step": 1902 }, { "epoch": 0.31, "grad_norm": 2.9228226863863447, "learning_rate": 1.6250043318378563e-05, "loss": 0.8634, "step": 1903 }, { "epoch": 0.31, "grad_norm": 2.296616312627505, "learning_rate": 1.6245968046082952e-05, "loss": 0.8866, "step": 1904 }, { "epoch": 0.31, "grad_norm": 2.067663952970141, "learning_rate": 1.6241891072213475e-05, "loss": 0.3548, "step": 1905 }, { "epoch": 0.31, "grad_norm": 2.8214853653529084, "learning_rate": 1.6237812397880806e-05, "loss": 0.9742, "step": 1906 }, { "epoch": 0.31, "grad_norm": 2.9664136828735557, "learning_rate": 1.62337320241961e-05, "loss": 0.9326, "step": 1907 }, { "epoch": 0.31, "grad_norm": 1.8974462990581442, "learning_rate": 1.6229649952270955e-05, "loss": 0.3402, "step": 1908 }, { "epoch": 0.31, "grad_norm": 3.1328590751096783, "learning_rate": 1.622556618321744e-05, "loss": 0.9184, "step": 1909 }, { "epoch": 0.31, "grad_norm": 2.770736367577349, "learning_rate": 1.622148071814809e-05, "loss": 1.0585, "step": 1910 }, { "epoch": 0.31, "grad_norm": 2.7742212182186816, "learning_rate": 1.6217393558175897e-05, "loss": 0.9428, "step": 1911 }, { "epoch": 0.31, "grad_norm": 3.1504690275233287, "learning_rate": 1.6213304704414314e-05, "loss": 0.9437, "step": 1912 }, { "epoch": 0.31, "grad_norm": 5.2981688233057485, "learning_rate": 1.620921415797726e-05, "loss": 0.8123, "step": 1913 }, { "epoch": 0.31, "grad_norm": 2.3499622666596416, "learning_rate": 1.620512191997911e-05, "loss": 0.9603, "step": 1914 }, { "epoch": 0.31, "grad_norm": 2.85084286322883, "learning_rate": 1.6201027991534705e-05, "loss": 0.9091, "step": 1915 }, { "epoch": 0.31, "grad_norm": 2.5772995985770057, "learning_rate": 1.6196932373759337e-05, "loss": 0.9845, "step": 1916 }, { "epoch": 0.31, "grad_norm": 2.7059837615603626, "learning_rate": 1.6192835067768776e-05, "loss": 0.9264, "step": 1917 }, { "epoch": 0.31, "grad_norm": 2.7758360041622008, "learning_rate": 1.6188736074679237e-05, "loss": 0.88, "step": 1918 }, { "epoch": 0.31, "grad_norm": 1.9014050558793194, "learning_rate": 1.6184635395607395e-05, "loss": 0.3416, "step": 1919 }, { "epoch": 0.31, "grad_norm": 3.2916751008388454, "learning_rate": 1.6180533031670395e-05, "loss": 0.9607, "step": 1920 }, { "epoch": 0.31, "grad_norm": 3.219290382429993, "learning_rate": 1.6176428983985825e-05, "loss": 0.9577, "step": 1921 }, { "epoch": 0.31, "grad_norm": 3.261574893908496, "learning_rate": 1.6172323253671745e-05, "loss": 0.9245, "step": 1922 }, { "epoch": 0.31, "grad_norm": 2.7705494632487806, "learning_rate": 1.6168215841846673e-05, "loss": 0.9421, "step": 1923 }, { "epoch": 0.31, "grad_norm": 3.8877260982594812, "learning_rate": 1.616410674962958e-05, "loss": 0.9465, "step": 1924 }, { "epoch": 0.31, "grad_norm": 3.5954453159885036, "learning_rate": 1.615999597813989e-05, "loss": 0.9894, "step": 1925 }, { "epoch": 0.31, "grad_norm": 3.1359663235452113, "learning_rate": 1.6155883528497492e-05, "loss": 0.905, "step": 1926 }, { "epoch": 0.31, "grad_norm": 2.401584312920671, "learning_rate": 1.6151769401822735e-05, "loss": 1.0082, "step": 1927 }, { "epoch": 0.31, "grad_norm": 2.1892791052749434, "learning_rate": 1.614765359923642e-05, "loss": 0.889, "step": 1928 }, { "epoch": 0.31, "grad_norm": 3.681354898146659, "learning_rate": 1.6143536121859805e-05, "loss": 0.9134, "step": 1929 }, { "epoch": 0.31, "grad_norm": 3.2667265133987393, "learning_rate": 1.61394169708146e-05, "loss": 0.905, "step": 1930 }, { "epoch": 0.31, "grad_norm": 2.7234790932329287, "learning_rate": 1.613529614722298e-05, "loss": 0.8956, "step": 1931 }, { "epoch": 0.31, "grad_norm": 2.513001584521218, "learning_rate": 1.6131173652207565e-05, "loss": 0.9438, "step": 1932 }, { "epoch": 0.31, "grad_norm": 3.6086551889795757, "learning_rate": 1.6127049486891442e-05, "loss": 0.9189, "step": 1933 }, { "epoch": 0.31, "grad_norm": 2.8480850545741045, "learning_rate": 1.612292365239815e-05, "loss": 0.9533, "step": 1934 }, { "epoch": 0.31, "grad_norm": 2.63092688889453, "learning_rate": 1.611879614985167e-05, "loss": 0.9373, "step": 1935 }, { "epoch": 0.31, "grad_norm": 3.905241324847653, "learning_rate": 1.6114666980376455e-05, "loss": 0.9564, "step": 1936 }, { "epoch": 0.31, "grad_norm": 3.8554738207999204, "learning_rate": 1.6110536145097407e-05, "loss": 0.9023, "step": 1937 }, { "epoch": 0.31, "grad_norm": 4.613954706479818, "learning_rate": 1.6106403645139866e-05, "loss": 0.9229, "step": 1938 }, { "epoch": 0.31, "grad_norm": 3.789792334672364, "learning_rate": 1.6102269481629654e-05, "loss": 0.9409, "step": 1939 }, { "epoch": 0.31, "grad_norm": 3.4028013077179704, "learning_rate": 1.6098133655693027e-05, "loss": 0.929, "step": 1940 }, { "epoch": 0.31, "grad_norm": 2.5887658823540103, "learning_rate": 1.6093996168456694e-05, "loss": 0.9472, "step": 1941 }, { "epoch": 0.31, "grad_norm": 3.7854017683935135, "learning_rate": 1.6089857021047822e-05, "loss": 0.9362, "step": 1942 }, { "epoch": 0.31, "grad_norm": 3.9294948037682653, "learning_rate": 1.608571621459403e-05, "loss": 0.9363, "step": 1943 }, { "epoch": 0.31, "grad_norm": 3.549907984011176, "learning_rate": 1.6081573750223388e-05, "loss": 0.9177, "step": 1944 }, { "epoch": 0.31, "grad_norm": 3.3897405780565917, "learning_rate": 1.607742962906442e-05, "loss": 0.9677, "step": 1945 }, { "epoch": 0.31, "grad_norm": 3.535637754242314, "learning_rate": 1.6073283852246087e-05, "loss": 0.9429, "step": 1946 }, { "epoch": 0.31, "grad_norm": 2.2553308701794457, "learning_rate": 1.6069136420897827e-05, "loss": 0.895, "step": 1947 }, { "epoch": 0.31, "grad_norm": 1.7677331002935062, "learning_rate": 1.606498733614951e-05, "loss": 0.9624, "step": 1948 }, { "epoch": 0.31, "grad_norm": 4.477993029229078, "learning_rate": 1.606083659913146e-05, "loss": 0.9824, "step": 1949 }, { "epoch": 0.31, "grad_norm": 2.464457486170406, "learning_rate": 1.605668421097445e-05, "loss": 0.9863, "step": 1950 }, { "epoch": 0.31, "grad_norm": 4.901929763396599, "learning_rate": 1.605253017280971e-05, "loss": 0.9561, "step": 1951 }, { "epoch": 0.31, "grad_norm": 1.6752304914938534, "learning_rate": 1.6048374485768912e-05, "loss": 0.9973, "step": 1952 }, { "epoch": 0.31, "grad_norm": 3.6646329846397134, "learning_rate": 1.604421715098418e-05, "loss": 0.9901, "step": 1953 }, { "epoch": 0.31, "grad_norm": 2.498512015981982, "learning_rate": 1.6040058169588086e-05, "loss": 0.9463, "step": 1954 }, { "epoch": 0.31, "grad_norm": 3.66232278024036, "learning_rate": 1.603589754271365e-05, "loss": 0.9399, "step": 1955 }, { "epoch": 0.32, "grad_norm": 3.069136921095194, "learning_rate": 1.603173527149434e-05, "loss": 0.8823, "step": 1956 }, { "epoch": 0.32, "grad_norm": 3.215213796053615, "learning_rate": 1.602757135706408e-05, "loss": 0.9186, "step": 1957 }, { "epoch": 0.32, "grad_norm": 2.1537869525203566, "learning_rate": 1.602340580055723e-05, "loss": 1.0264, "step": 1958 }, { "epoch": 0.32, "grad_norm": 1.4476549291630425, "learning_rate": 1.6019238603108605e-05, "loss": 0.8871, "step": 1959 }, { "epoch": 0.32, "grad_norm": 3.796618292022819, "learning_rate": 1.6015069765853462e-05, "loss": 0.8879, "step": 1960 }, { "epoch": 0.32, "grad_norm": 4.355161319026666, "learning_rate": 1.6010899289927513e-05, "loss": 0.9484, "step": 1961 }, { "epoch": 0.32, "grad_norm": 3.9444507828663027, "learning_rate": 1.60067271764669e-05, "loss": 0.9165, "step": 1962 }, { "epoch": 0.32, "grad_norm": 2.1238026330992086, "learning_rate": 1.600255342660823e-05, "loss": 0.9472, "step": 1963 }, { "epoch": 0.32, "grad_norm": 3.5832086290055023, "learning_rate": 1.5998378041488547e-05, "loss": 0.9803, "step": 1964 }, { "epoch": 0.32, "grad_norm": 2.4185785316586372, "learning_rate": 1.5994201022245338e-05, "loss": 0.9002, "step": 1965 }, { "epoch": 0.32, "grad_norm": 2.4851448257888844, "learning_rate": 1.599002237001654e-05, "loss": 0.9431, "step": 1966 }, { "epoch": 0.32, "grad_norm": 3.95067511474997, "learning_rate": 1.598584208594053e-05, "loss": 0.8748, "step": 1967 }, { "epoch": 0.32, "grad_norm": 3.3521915725735902, "learning_rate": 1.5981660171156136e-05, "loss": 0.9153, "step": 1968 }, { "epoch": 0.32, "grad_norm": 3.7060952165521046, "learning_rate": 1.5977476626802624e-05, "loss": 0.9541, "step": 1969 }, { "epoch": 0.32, "grad_norm": 2.86471675677677, "learning_rate": 1.5973291454019713e-05, "loss": 0.9515, "step": 1970 }, { "epoch": 0.32, "grad_norm": 3.328819954538528, "learning_rate": 1.5969104653947552e-05, "loss": 0.9532, "step": 1971 }, { "epoch": 0.32, "grad_norm": 2.350515848722666, "learning_rate": 1.5964916227726747e-05, "loss": 0.9294, "step": 1972 }, { "epoch": 0.32, "grad_norm": 2.9808672517320742, "learning_rate": 1.5960726176498334e-05, "loss": 0.9141, "step": 1973 }, { "epoch": 0.32, "grad_norm": 2.4487010159152214, "learning_rate": 1.5956534501403808e-05, "loss": 0.8641, "step": 1974 }, { "epoch": 0.32, "grad_norm": 1.9298539186447117, "learning_rate": 1.5952341203585086e-05, "loss": 0.9839, "step": 1975 }, { "epoch": 0.32, "grad_norm": 3.023746602820151, "learning_rate": 1.594814628418454e-05, "loss": 0.9627, "step": 1976 }, { "epoch": 0.32, "grad_norm": 2.934151059381575, "learning_rate": 1.5943949744344994e-05, "loss": 0.8327, "step": 1977 }, { "epoch": 0.32, "grad_norm": 2.699660445867172, "learning_rate": 1.5939751585209685e-05, "loss": 0.9408, "step": 1978 }, { "epoch": 0.32, "grad_norm": 3.226101526023845, "learning_rate": 1.5935551807922315e-05, "loss": 0.8938, "step": 1979 }, { "epoch": 0.32, "grad_norm": 3.1377962192963658, "learning_rate": 1.593135041362702e-05, "loss": 0.9344, "step": 1980 }, { "epoch": 0.32, "grad_norm": 5.114144342025773, "learning_rate": 1.592714740346837e-05, "loss": 0.9305, "step": 1981 }, { "epoch": 0.32, "grad_norm": 3.891983714180381, "learning_rate": 1.592294277859139e-05, "loss": 0.9115, "step": 1982 }, { "epoch": 0.32, "grad_norm": 3.5518789074077657, "learning_rate": 1.5918736540141525e-05, "loss": 0.9682, "step": 1983 }, { "epoch": 0.32, "grad_norm": 3.6720967976562684, "learning_rate": 1.5914528689264677e-05, "loss": 0.9118, "step": 1984 }, { "epoch": 0.32, "grad_norm": 2.6222558891789602, "learning_rate": 1.591031922710718e-05, "loss": 0.9032, "step": 1985 }, { "epoch": 0.32, "grad_norm": 2.9719954569539278, "learning_rate": 1.5906108154815805e-05, "loss": 0.9371, "step": 1986 }, { "epoch": 0.32, "grad_norm": 3.0114011287808955, "learning_rate": 1.5901895473537768e-05, "loss": 0.8999, "step": 1987 }, { "epoch": 0.32, "grad_norm": 2.488846695160085, "learning_rate": 1.5897681184420716e-05, "loss": 0.9459, "step": 1988 }, { "epoch": 0.32, "grad_norm": 2.0744149497823754, "learning_rate": 1.589346528861274e-05, "loss": 0.9448, "step": 1989 }, { "epoch": 0.32, "grad_norm": 4.396856806540518, "learning_rate": 1.588924778726236e-05, "loss": 0.9485, "step": 1990 }, { "epoch": 0.32, "grad_norm": 3.5055722429859375, "learning_rate": 1.588502868151855e-05, "loss": 0.9207, "step": 1991 }, { "epoch": 0.32, "grad_norm": 2.121559948910539, "learning_rate": 1.5880807972530705e-05, "loss": 0.3466, "step": 1992 }, { "epoch": 0.32, "grad_norm": 3.4180892835889964, "learning_rate": 1.587658566144866e-05, "loss": 0.9233, "step": 1993 }, { "epoch": 0.32, "grad_norm": 2.999845659736309, "learning_rate": 1.5872361749422694e-05, "loss": 0.9374, "step": 1994 }, { "epoch": 0.32, "grad_norm": 3.7734067927467563, "learning_rate": 1.5868136237603516e-05, "loss": 0.9278, "step": 1995 }, { "epoch": 0.32, "grad_norm": 2.416929745879034, "learning_rate": 1.5863909127142268e-05, "loss": 0.9696, "step": 1996 }, { "epoch": 0.32, "grad_norm": 3.68641853638002, "learning_rate": 1.585968041919054e-05, "loss": 0.9573, "step": 1997 }, { "epoch": 0.32, "grad_norm": 2.852688501734328, "learning_rate": 1.585545011490034e-05, "loss": 0.8995, "step": 1998 }, { "epoch": 0.32, "grad_norm": 3.5654510856828443, "learning_rate": 1.5851218215424115e-05, "loss": 0.8377, "step": 1999 }, { "epoch": 0.32, "grad_norm": 1.6198008333623466, "learning_rate": 1.5846984721914765e-05, "loss": 0.9637, "step": 2000 }, { "epoch": 0.32, "grad_norm": 3.306018173657063, "learning_rate": 1.5842749635525602e-05, "loss": 0.9471, "step": 2001 }, { "epoch": 0.32, "grad_norm": 1.9088036364380827, "learning_rate": 1.5838512957410384e-05, "loss": 0.9461, "step": 2002 }, { "epoch": 0.32, "grad_norm": 2.981982805249833, "learning_rate": 1.5834274688723293e-05, "loss": 0.8444, "step": 2003 }, { "epoch": 0.32, "grad_norm": 4.022722080660361, "learning_rate": 1.583003483061896e-05, "loss": 1.0134, "step": 2004 }, { "epoch": 0.32, "grad_norm": 3.4935289234510796, "learning_rate": 1.5825793384252432e-05, "loss": 0.9867, "step": 2005 }, { "epoch": 0.32, "grad_norm": 2.794122801619461, "learning_rate": 1.582155035077919e-05, "loss": 0.9093, "step": 2006 }, { "epoch": 0.32, "grad_norm": 1.8049223115427866, "learning_rate": 1.5817305731355168e-05, "loss": 0.9919, "step": 2007 }, { "epoch": 0.32, "grad_norm": 3.875817120445088, "learning_rate": 1.5813059527136708e-05, "loss": 0.9321, "step": 2008 }, { "epoch": 0.32, "grad_norm": 2.497197774141057, "learning_rate": 1.5808811739280592e-05, "loss": 0.9106, "step": 2009 }, { "epoch": 0.32, "grad_norm": 3.3468053530900432, "learning_rate": 1.5804562368944042e-05, "loss": 1.0109, "step": 2010 }, { "epoch": 0.32, "grad_norm": 2.4709287276570007, "learning_rate": 1.5800311417284695e-05, "loss": 0.9725, "step": 2011 }, { "epoch": 0.32, "grad_norm": 3.4918453040361532, "learning_rate": 1.579605888546063e-05, "loss": 0.9964, "step": 2012 }, { "epoch": 0.32, "grad_norm": 3.0728019736055465, "learning_rate": 1.579180477463036e-05, "loss": 0.9565, "step": 2013 }, { "epoch": 0.32, "grad_norm": 2.4913600396996234, "learning_rate": 1.5787549085952816e-05, "loss": 0.9795, "step": 2014 }, { "epoch": 0.32, "grad_norm": 3.089360986701351, "learning_rate": 1.5783291820587366e-05, "loss": 0.9053, "step": 2015 }, { "epoch": 0.32, "grad_norm": 3.72150774087835, "learning_rate": 1.5779032979693808e-05, "loss": 0.9707, "step": 2016 }, { "epoch": 0.32, "grad_norm": 1.6586620832427952, "learning_rate": 1.5774772564432365e-05, "loss": 0.9009, "step": 2017 }, { "epoch": 0.33, "grad_norm": 2.500755005317125, "learning_rate": 1.577051057596369e-05, "loss": 0.8194, "step": 2018 }, { "epoch": 0.33, "grad_norm": 2.943579530740827, "learning_rate": 1.5766247015448875e-05, "loss": 0.9545, "step": 2019 }, { "epoch": 0.33, "grad_norm": 2.2323998744678173, "learning_rate": 1.576198188404942e-05, "loss": 0.3706, "step": 2020 }, { "epoch": 0.33, "grad_norm": 2.510536971843555, "learning_rate": 1.5757715182927273e-05, "loss": 0.9058, "step": 2021 }, { "epoch": 0.33, "grad_norm": 3.388101368131908, "learning_rate": 1.5753446913244798e-05, "loss": 0.9003, "step": 2022 }, { "epoch": 0.33, "grad_norm": 3.324534023588723, "learning_rate": 1.5749177076164788e-05, "loss": 0.9175, "step": 2023 }, { "epoch": 0.33, "grad_norm": 2.9022614376261044, "learning_rate": 1.5744905672850467e-05, "loss": 0.9127, "step": 2024 }, { "epoch": 0.33, "grad_norm": 3.0449254768159424, "learning_rate": 1.5740632704465478e-05, "loss": 0.8837, "step": 2025 }, { "epoch": 0.33, "grad_norm": 2.87843697810994, "learning_rate": 1.5736358172173903e-05, "loss": 0.9778, "step": 2026 }, { "epoch": 0.33, "grad_norm": 2.9070372796963637, "learning_rate": 1.5732082077140235e-05, "loss": 0.976, "step": 2027 }, { "epoch": 0.33, "grad_norm": 2.016319923962118, "learning_rate": 1.5727804420529405e-05, "loss": 0.3111, "step": 2028 }, { "epoch": 0.33, "grad_norm": 3.4580224291910793, "learning_rate": 1.5723525203506758e-05, "loss": 0.8935, "step": 2029 }, { "epoch": 0.33, "grad_norm": 4.367746498213495, "learning_rate": 1.5719244427238086e-05, "loss": 0.9252, "step": 2030 }, { "epoch": 0.33, "grad_norm": 3.7152828434387724, "learning_rate": 1.571496209288957e-05, "loss": 0.9529, "step": 2031 }, { "epoch": 0.33, "grad_norm": 4.3378679806506, "learning_rate": 1.571067820162785e-05, "loss": 0.9055, "step": 2032 }, { "epoch": 0.33, "grad_norm": 2.4731913344689063, "learning_rate": 1.5706392754619973e-05, "loss": 0.9565, "step": 2033 }, { "epoch": 0.33, "grad_norm": 2.3848857927468563, "learning_rate": 1.5702105753033415e-05, "loss": 0.8794, "step": 2034 }, { "epoch": 0.33, "grad_norm": 2.316606226711275, "learning_rate": 1.5697817198036066e-05, "loss": 0.8861, "step": 2035 }, { "epoch": 0.33, "grad_norm": 2.954505234557941, "learning_rate": 1.569352709079625e-05, "loss": 0.9207, "step": 2036 }, { "epoch": 0.33, "grad_norm": 3.962030203614125, "learning_rate": 1.5689235432482715e-05, "loss": 0.9116, "step": 2037 }, { "epoch": 0.33, "grad_norm": 3.8357712967987565, "learning_rate": 1.5684942224264622e-05, "loss": 0.9007, "step": 2038 }, { "epoch": 0.33, "grad_norm": 2.9305596032699746, "learning_rate": 1.568064746731156e-05, "loss": 0.9927, "step": 2039 }, { "epoch": 0.33, "grad_norm": 3.2040124175632365, "learning_rate": 1.567635116279354e-05, "loss": 0.9151, "step": 2040 }, { "epoch": 0.33, "grad_norm": 3.9286216063824795, "learning_rate": 1.5672053311880994e-05, "loss": 0.9834, "step": 2041 }, { "epoch": 0.33, "grad_norm": 3.8239226026882522, "learning_rate": 1.5667753915744776e-05, "loss": 0.9725, "step": 2042 }, { "epoch": 0.33, "grad_norm": 3.0776046080671557, "learning_rate": 1.566345297555616e-05, "loss": 0.8478, "step": 2043 }, { "epoch": 0.33, "grad_norm": 2.3012090780669587, "learning_rate": 1.5659150492486833e-05, "loss": 0.8839, "step": 2044 }, { "epoch": 0.33, "grad_norm": 4.027460490624339, "learning_rate": 1.565484646770892e-05, "loss": 0.833, "step": 2045 }, { "epoch": 0.33, "grad_norm": 3.412134809131737, "learning_rate": 1.5650540902394954e-05, "loss": 0.9299, "step": 2046 }, { "epoch": 0.33, "grad_norm": 3.6546950987255338, "learning_rate": 1.564623379771789e-05, "loss": 0.9502, "step": 2047 }, { "epoch": 0.33, "grad_norm": 4.58484435171928, "learning_rate": 1.5641925154851096e-05, "loss": 0.8426, "step": 2048 }, { "epoch": 0.33, "grad_norm": 2.2034310202746075, "learning_rate": 1.5637614974968372e-05, "loss": 1.009, "step": 2049 }, { "epoch": 0.33, "grad_norm": 2.3589382146269213, "learning_rate": 1.5633303259243927e-05, "loss": 0.9466, "step": 2050 }, { "epoch": 0.33, "grad_norm": 2.042160078359317, "learning_rate": 1.562899000885239e-05, "loss": 0.9106, "step": 2051 }, { "epoch": 0.33, "grad_norm": 4.0958761270537645, "learning_rate": 1.5624675224968808e-05, "loss": 0.9181, "step": 2052 }, { "epoch": 0.33, "grad_norm": 2.878902109213016, "learning_rate": 1.562035890876865e-05, "loss": 0.9001, "step": 2053 }, { "epoch": 0.33, "grad_norm": 2.564393902085343, "learning_rate": 1.5616041061427805e-05, "loss": 0.9135, "step": 2054 }, { "epoch": 0.33, "grad_norm": 3.3584482733380754, "learning_rate": 1.5611721684122564e-05, "loss": 0.9382, "step": 2055 }, { "epoch": 0.33, "grad_norm": 4.570724807790849, "learning_rate": 1.5607400778029648e-05, "loss": 0.9283, "step": 2056 }, { "epoch": 0.33, "grad_norm": 2.40262260989499, "learning_rate": 1.5603078344326194e-05, "loss": 0.929, "step": 2057 }, { "epoch": 0.33, "grad_norm": 3.9273711216285574, "learning_rate": 1.5598754384189745e-05, "loss": 0.9703, "step": 2058 }, { "epoch": 0.33, "grad_norm": 3.1528825074967672, "learning_rate": 1.5594428898798272e-05, "loss": 0.961, "step": 2059 }, { "epoch": 0.33, "grad_norm": 2.9983307645008166, "learning_rate": 1.559010188933016e-05, "loss": 0.9169, "step": 2060 }, { "epoch": 0.33, "grad_norm": 3.33328334452975, "learning_rate": 1.5585773356964192e-05, "loss": 0.8945, "step": 2061 }, { "epoch": 0.33, "grad_norm": 3.168406426879082, "learning_rate": 1.55814433028796e-05, "loss": 0.9133, "step": 2062 }, { "epoch": 0.33, "grad_norm": 2.9446486476248714, "learning_rate": 1.557711172825599e-05, "loss": 0.8704, "step": 2063 }, { "epoch": 0.33, "grad_norm": 3.114035942090716, "learning_rate": 1.5572778634273417e-05, "loss": 0.909, "step": 2064 }, { "epoch": 0.33, "grad_norm": 3.1667119742381376, "learning_rate": 1.556844402211233e-05, "loss": 0.9993, "step": 2065 }, { "epoch": 0.33, "grad_norm": 2.887900558365846, "learning_rate": 1.556410789295359e-05, "loss": 0.9603, "step": 2066 }, { "epoch": 0.33, "grad_norm": 2.0827430524644925, "learning_rate": 1.5559770247978488e-05, "loss": 0.8903, "step": 2067 }, { "epoch": 0.33, "grad_norm": 3.0367659886765903, "learning_rate": 1.5555431088368716e-05, "loss": 0.9548, "step": 2068 }, { "epoch": 0.33, "grad_norm": 2.943719164773897, "learning_rate": 1.5551090415306377e-05, "loss": 0.9539, "step": 2069 }, { "epoch": 0.33, "grad_norm": 3.5028989230597762, "learning_rate": 1.554674822997399e-05, "loss": 0.9252, "step": 2070 }, { "epoch": 0.33, "grad_norm": 3.5795637011923054, "learning_rate": 1.5542404533554493e-05, "loss": 0.959, "step": 2071 }, { "epoch": 0.33, "grad_norm": 1.8772625305919917, "learning_rate": 1.553805932723122e-05, "loss": 0.8716, "step": 2072 }, { "epoch": 0.33, "grad_norm": 3.9689153201295913, "learning_rate": 1.5533712612187933e-05, "loss": 0.8946, "step": 2073 }, { "epoch": 0.33, "grad_norm": 2.3374324850032018, "learning_rate": 1.5529364389608788e-05, "loss": 0.9255, "step": 2074 }, { "epoch": 0.33, "grad_norm": 1.388109868708708, "learning_rate": 1.552501466067837e-05, "loss": 0.8842, "step": 2075 }, { "epoch": 0.33, "grad_norm": 1.8272521714857703, "learning_rate": 1.552066342658166e-05, "loss": 0.8989, "step": 2076 }, { "epoch": 0.33, "grad_norm": 3.2502012924066888, "learning_rate": 1.551631068850405e-05, "loss": 0.942, "step": 2077 }, { "epoch": 0.33, "grad_norm": 3.069858352425251, "learning_rate": 1.5511956447631355e-05, "loss": 0.9317, "step": 2078 }, { "epoch": 0.33, "grad_norm": 1.6882803490180454, "learning_rate": 1.5507600705149782e-05, "loss": 0.8968, "step": 2079 }, { "epoch": 0.34, "grad_norm": 1.5556597712589904, "learning_rate": 1.5503243462245963e-05, "loss": 0.8131, "step": 2080 }, { "epoch": 0.34, "grad_norm": 2.299073911894952, "learning_rate": 1.5498884720106925e-05, "loss": 0.9006, "step": 2081 }, { "epoch": 0.34, "grad_norm": 1.975258921298067, "learning_rate": 1.549452447992011e-05, "loss": 0.9478, "step": 2082 }, { "epoch": 0.34, "grad_norm": 3.2144162529974523, "learning_rate": 1.5490162742873372e-05, "loss": 0.9177, "step": 2083 }, { "epoch": 0.34, "grad_norm": 2.8839391972220785, "learning_rate": 1.5485799510154965e-05, "loss": 0.9356, "step": 2084 }, { "epoch": 0.34, "grad_norm": 3.1267124824447765, "learning_rate": 1.5481434782953557e-05, "loss": 0.9041, "step": 2085 }, { "epoch": 0.34, "grad_norm": 1.8125370942627943, "learning_rate": 1.5477068562458212e-05, "loss": 0.3269, "step": 2086 }, { "epoch": 0.34, "grad_norm": 4.000185962168958, "learning_rate": 1.5472700849858417e-05, "loss": 0.9088, "step": 2087 }, { "epoch": 0.34, "grad_norm": 2.934628970659591, "learning_rate": 1.5468331646344056e-05, "loss": 0.9637, "step": 2088 }, { "epoch": 0.34, "grad_norm": 3.3129293775276647, "learning_rate": 1.5463960953105416e-05, "loss": 0.9539, "step": 2089 }, { "epoch": 0.34, "grad_norm": 3.0575072403679346, "learning_rate": 1.54595887713332e-05, "loss": 0.892, "step": 2090 }, { "epoch": 0.34, "grad_norm": 2.6899649383075106, "learning_rate": 1.5455215102218505e-05, "loss": 0.3054, "step": 2091 }, { "epoch": 0.34, "grad_norm": 3.1679100473329673, "learning_rate": 1.5450839946952845e-05, "loss": 0.9577, "step": 2092 }, { "epoch": 0.34, "grad_norm": 4.061598222855304, "learning_rate": 1.544646330672813e-05, "loss": 0.855, "step": 2093 }, { "epoch": 0.34, "grad_norm": 2.654188455664898, "learning_rate": 1.544208518273668e-05, "loss": 0.9569, "step": 2094 }, { "epoch": 0.34, "grad_norm": 2.6332575481309872, "learning_rate": 1.5437705576171208e-05, "loss": 1.0162, "step": 2095 }, { "epoch": 0.34, "grad_norm": 1.6512812090688702, "learning_rate": 1.543332448822485e-05, "loss": 0.921, "step": 2096 }, { "epoch": 0.34, "grad_norm": 1.9063389241474042, "learning_rate": 1.542894192009113e-05, "loss": 0.9126, "step": 2097 }, { "epoch": 0.34, "grad_norm": 1.6749390662198183, "learning_rate": 1.5424557872963984e-05, "loss": 0.3171, "step": 2098 }, { "epoch": 0.34, "grad_norm": 2.056232756104676, "learning_rate": 1.5420172348037742e-05, "loss": 0.9257, "step": 2099 }, { "epoch": 0.34, "grad_norm": 2.597431832851876, "learning_rate": 1.5415785346507143e-05, "loss": 0.8907, "step": 2100 }, { "epoch": 0.34, "grad_norm": 3.9055125036228713, "learning_rate": 1.5411396869567332e-05, "loss": 0.8983, "step": 2101 }, { "epoch": 0.34, "grad_norm": 1.9165058966948263, "learning_rate": 1.5407006918413843e-05, "loss": 0.9466, "step": 2102 }, { "epoch": 0.34, "grad_norm": 2.6112433956897245, "learning_rate": 1.540261549424263e-05, "loss": 0.94, "step": 2103 }, { "epoch": 0.34, "grad_norm": 3.3682106393108198, "learning_rate": 1.539822259825003e-05, "loss": 0.955, "step": 2104 }, { "epoch": 0.34, "grad_norm": 3.344260203573296, "learning_rate": 1.539382823163279e-05, "loss": 1.0217, "step": 2105 }, { "epoch": 0.34, "grad_norm": 2.7844595231607525, "learning_rate": 1.538943239558806e-05, "loss": 0.9135, "step": 2106 }, { "epoch": 0.34, "grad_norm": 2.6952240418002447, "learning_rate": 1.5385035091313382e-05, "loss": 0.8654, "step": 2107 }, { "epoch": 0.34, "grad_norm": 3.167299173999385, "learning_rate": 1.538063632000671e-05, "loss": 0.9291, "step": 2108 }, { "epoch": 0.34, "grad_norm": 3.37531985427342, "learning_rate": 1.5376236082866384e-05, "loss": 0.8904, "step": 2109 }, { "epoch": 0.34, "grad_norm": 4.28595914595336, "learning_rate": 1.5371834381091152e-05, "loss": 0.926, "step": 2110 }, { "epoch": 0.34, "grad_norm": 3.49820976793922, "learning_rate": 1.5367431215880156e-05, "loss": 0.9547, "step": 2111 }, { "epoch": 0.34, "grad_norm": 1.8075529527096474, "learning_rate": 1.536302658843295e-05, "loss": 0.9961, "step": 2112 }, { "epoch": 0.34, "grad_norm": 1.0611220006955409, "learning_rate": 1.5358620499949464e-05, "loss": 0.9087, "step": 2113 }, { "epoch": 0.34, "grad_norm": 3.2820058769438463, "learning_rate": 1.5354212951630043e-05, "loss": 0.9629, "step": 2114 }, { "epoch": 0.34, "grad_norm": 2.7160683045092076, "learning_rate": 1.5349803944675424e-05, "loss": 0.963, "step": 2115 }, { "epoch": 0.34, "grad_norm": 2.7324362721040028, "learning_rate": 1.5345393480286744e-05, "loss": 0.9061, "step": 2116 }, { "epoch": 0.34, "grad_norm": 2.5354764527476377, "learning_rate": 1.5340981559665533e-05, "loss": 0.9882, "step": 2117 }, { "epoch": 0.34, "grad_norm": 2.3244598007221677, "learning_rate": 1.5336568184013717e-05, "loss": 0.9462, "step": 2118 }, { "epoch": 0.34, "grad_norm": 3.0519821792522226, "learning_rate": 1.5332153354533626e-05, "loss": 0.8942, "step": 2119 }, { "epoch": 0.34, "grad_norm": 1.774660268696564, "learning_rate": 1.532773707242798e-05, "loss": 0.9513, "step": 2120 }, { "epoch": 0.34, "grad_norm": 2.955323869026439, "learning_rate": 1.5323319338899896e-05, "loss": 0.95, "step": 2121 }, { "epoch": 0.34, "grad_norm": 3.3631003372147297, "learning_rate": 1.531890015515289e-05, "loss": 0.9074, "step": 2122 }, { "epoch": 0.34, "grad_norm": 1.7695385256990326, "learning_rate": 1.5314479522390856e-05, "loss": 0.9544, "step": 2123 }, { "epoch": 0.34, "grad_norm": 3.400569834360744, "learning_rate": 1.5310057441818115e-05, "loss": 0.9071, "step": 2124 }, { "epoch": 0.34, "grad_norm": 3.0818313514466955, "learning_rate": 1.5305633914639348e-05, "loss": 0.9771, "step": 2125 }, { "epoch": 0.34, "grad_norm": 2.6794093574330344, "learning_rate": 1.530120894205965e-05, "loss": 0.8818, "step": 2126 }, { "epoch": 0.34, "grad_norm": 2.469655027971283, "learning_rate": 1.5296782525284514e-05, "loss": 0.9181, "step": 2127 }, { "epoch": 0.34, "grad_norm": 2.5810776881601445, "learning_rate": 1.529235466551981e-05, "loss": 0.8543, "step": 2128 }, { "epoch": 0.34, "grad_norm": 2.615229725566489, "learning_rate": 1.5287925363971807e-05, "loss": 0.9907, "step": 2129 }, { "epoch": 0.34, "grad_norm": 3.214384507371585, "learning_rate": 1.5283494621847175e-05, "loss": 0.9071, "step": 2130 }, { "epoch": 0.34, "grad_norm": 2.979993229800733, "learning_rate": 1.5279062440352968e-05, "loss": 0.9467, "step": 2131 }, { "epoch": 0.34, "grad_norm": 2.7754912173613784, "learning_rate": 1.527462882069663e-05, "loss": 0.8818, "step": 2132 }, { "epoch": 0.34, "grad_norm": 2.736914365650406, "learning_rate": 1.5270193764086012e-05, "loss": 0.9725, "step": 2133 }, { "epoch": 0.34, "grad_norm": 1.8797086719262368, "learning_rate": 1.5265757271729333e-05, "loss": 0.9398, "step": 2134 }, { "epoch": 0.34, "grad_norm": 2.100088290220483, "learning_rate": 1.5261319344835225e-05, "loss": 0.8854, "step": 2135 }, { "epoch": 0.34, "grad_norm": 3.797271582217449, "learning_rate": 1.5256879984612698e-05, "loss": 0.9866, "step": 2136 }, { "epoch": 0.34, "grad_norm": 3.261907624583866, "learning_rate": 1.5252439192271156e-05, "loss": 0.8845, "step": 2137 }, { "epoch": 0.34, "grad_norm": 3.3484885606597405, "learning_rate": 1.5247996969020394e-05, "loss": 0.9226, "step": 2138 }, { "epoch": 0.34, "grad_norm": 3.6257497077988594, "learning_rate": 1.5243553316070596e-05, "loss": 0.9745, "step": 2139 }, { "epoch": 0.34, "grad_norm": 2.8972297064478383, "learning_rate": 1.523910823463233e-05, "loss": 0.9498, "step": 2140 }, { "epoch": 0.34, "grad_norm": 2.570988992175615, "learning_rate": 1.5234661725916573e-05, "loss": 0.9692, "step": 2141 }, { "epoch": 0.35, "grad_norm": 2.7220769235755533, "learning_rate": 1.5230213791134662e-05, "loss": 0.9482, "step": 2142 }, { "epoch": 0.35, "grad_norm": 3.4781645437564945, "learning_rate": 1.5225764431498344e-05, "loss": 0.9085, "step": 2143 }, { "epoch": 0.35, "grad_norm": 3.2874303832134752, "learning_rate": 1.5221313648219749e-05, "loss": 0.9795, "step": 2144 }, { "epoch": 0.35, "grad_norm": 3.3914141461768823, "learning_rate": 1.5216861442511382e-05, "loss": 0.8708, "step": 2145 }, { "epoch": 0.35, "grad_norm": 3.7067931473602482, "learning_rate": 1.5212407815586162e-05, "loss": 0.9683, "step": 2146 }, { "epoch": 0.35, "grad_norm": 2.898724071391658, "learning_rate": 1.5207952768657368e-05, "loss": 0.9519, "step": 2147 }, { "epoch": 0.35, "grad_norm": 1.683499149930698, "learning_rate": 1.5203496302938682e-05, "loss": 0.3269, "step": 2148 }, { "epoch": 0.35, "grad_norm": 2.9988060800731664, "learning_rate": 1.519903841964417e-05, "loss": 0.9111, "step": 2149 }, { "epoch": 0.35, "grad_norm": 3.4593440191437517, "learning_rate": 1.519457911998828e-05, "loss": 0.9168, "step": 2150 }, { "epoch": 0.35, "grad_norm": 3.459890926383828, "learning_rate": 1.5190118405185845e-05, "loss": 0.9406, "step": 2151 }, { "epoch": 0.35, "grad_norm": 1.8248504263841232, "learning_rate": 1.5185656276452095e-05, "loss": 0.852, "step": 2152 }, { "epoch": 0.35, "grad_norm": 4.056825871432375, "learning_rate": 1.5181192735002628e-05, "loss": 0.9751, "step": 2153 }, { "epoch": 0.35, "grad_norm": 2.3290178263904573, "learning_rate": 1.517672778205344e-05, "loss": 0.902, "step": 2154 }, { "epoch": 0.35, "grad_norm": 2.861529763053124, "learning_rate": 1.5172261418820908e-05, "loss": 0.9663, "step": 2155 }, { "epoch": 0.35, "grad_norm": 1.606750051239331, "learning_rate": 1.5167793646521788e-05, "loss": 0.8256, "step": 2156 }, { "epoch": 0.35, "grad_norm": 3.718890916734952, "learning_rate": 1.5163324466373236e-05, "loss": 0.9193, "step": 2157 }, { "epoch": 0.35, "grad_norm": 3.704827154866543, "learning_rate": 1.5158853879592763e-05, "loss": 0.9607, "step": 2158 }, { "epoch": 0.35, "grad_norm": 3.6901295225160236, "learning_rate": 1.515438188739829e-05, "loss": 0.9038, "step": 2159 }, { "epoch": 0.35, "grad_norm": 3.091323980239804, "learning_rate": 1.5149908491008112e-05, "loss": 0.8808, "step": 2160 }, { "epoch": 0.35, "grad_norm": 2.8615669228958156, "learning_rate": 1.5145433691640903e-05, "loss": 0.8976, "step": 2161 }, { "epoch": 0.35, "grad_norm": 1.5700932439826, "learning_rate": 1.514095749051572e-05, "loss": 0.8828, "step": 2162 }, { "epoch": 0.35, "grad_norm": 3.8006791260849697, "learning_rate": 1.5136479888852006e-05, "loss": 0.9345, "step": 2163 }, { "epoch": 0.35, "grad_norm": 2.619749813364865, "learning_rate": 1.5132000887869583e-05, "loss": 0.9567, "step": 2164 }, { "epoch": 0.35, "grad_norm": 4.0789794026851, "learning_rate": 1.512752048878865e-05, "loss": 0.921, "step": 2165 }, { "epoch": 0.35, "grad_norm": 2.9750403970492076, "learning_rate": 1.5123038692829801e-05, "loss": 0.9319, "step": 2166 }, { "epoch": 0.35, "grad_norm": 2.541651512059126, "learning_rate": 1.5118555501213989e-05, "loss": 0.9497, "step": 2167 }, { "epoch": 0.35, "grad_norm": 1.7865308148181833, "learning_rate": 1.5114070915162568e-05, "loss": 0.9758, "step": 2168 }, { "epoch": 0.35, "grad_norm": 2.4422707453797567, "learning_rate": 1.5109584935897259e-05, "loss": 0.8828, "step": 2169 }, { "epoch": 0.35, "grad_norm": 3.67951631249058, "learning_rate": 1.5105097564640168e-05, "loss": 0.895, "step": 2170 }, { "epoch": 0.35, "grad_norm": 1.6246146698902355, "learning_rate": 1.5100608802613775e-05, "loss": 0.9349, "step": 2171 }, { "epoch": 0.35, "grad_norm": 3.1510218764174933, "learning_rate": 1.5096118651040945e-05, "loss": 0.9218, "step": 2172 }, { "epoch": 0.35, "grad_norm": 2.5923709190553947, "learning_rate": 1.5091627111144923e-05, "loss": 0.8737, "step": 2173 }, { "epoch": 0.35, "grad_norm": 3.2426104533986555, "learning_rate": 1.508713418414932e-05, "loss": 0.9407, "step": 2174 }, { "epoch": 0.35, "grad_norm": 2.0020037388800005, "learning_rate": 1.5082639871278139e-05, "loss": 0.9119, "step": 2175 }, { "epoch": 0.35, "grad_norm": 2.799540693213182, "learning_rate": 1.5078144173755754e-05, "loss": 0.9291, "step": 2176 }, { "epoch": 0.35, "grad_norm": 2.5594370143955407, "learning_rate": 1.5073647092806916e-05, "loss": 0.933, "step": 2177 }, { "epoch": 0.35, "grad_norm": 3.826929769963567, "learning_rate": 1.5069148629656752e-05, "loss": 0.8985, "step": 2178 }, { "epoch": 0.35, "grad_norm": 2.9642016453073783, "learning_rate": 1.5064648785530774e-05, "loss": 0.9261, "step": 2179 }, { "epoch": 0.35, "grad_norm": 2.6165525662597977, "learning_rate": 1.5060147561654854e-05, "loss": 0.9312, "step": 2180 }, { "epoch": 0.35, "grad_norm": 2.6504436085633047, "learning_rate": 1.5055644959255257e-05, "loss": 0.9633, "step": 2181 }, { "epoch": 0.35, "grad_norm": 3.1196178627744153, "learning_rate": 1.5051140979558614e-05, "loss": 0.9347, "step": 2182 }, { "epoch": 0.35, "grad_norm": 4.230708600656854, "learning_rate": 1.504663562379193e-05, "loss": 0.9008, "step": 2183 }, { "epoch": 0.35, "grad_norm": 3.6620585933979117, "learning_rate": 1.5042128893182595e-05, "loss": 0.9278, "step": 2184 }, { "epoch": 0.35, "grad_norm": 2.36772476998389, "learning_rate": 1.5037620788958359e-05, "loss": 0.9386, "step": 2185 }, { "epoch": 0.35, "grad_norm": 2.9506929262661656, "learning_rate": 1.5033111312347357e-05, "loss": 0.9045, "step": 2186 }, { "epoch": 0.35, "grad_norm": 2.149370802253779, "learning_rate": 1.5028600464578099e-05, "loss": 0.3767, "step": 2187 }, { "epoch": 0.35, "grad_norm": 3.3718767196184216, "learning_rate": 1.5024088246879456e-05, "loss": 0.9373, "step": 2188 }, { "epoch": 0.35, "grad_norm": 2.499583972647432, "learning_rate": 1.5019574660480685e-05, "loss": 0.9298, "step": 2189 }, { "epoch": 0.35, "grad_norm": 2.153280120266908, "learning_rate": 1.5015059706611413e-05, "loss": 0.8815, "step": 2190 }, { "epoch": 0.35, "grad_norm": 2.2106401762523644, "learning_rate": 1.5010543386501634e-05, "loss": 0.8946, "step": 2191 }, { "epoch": 0.35, "grad_norm": 3.7594820307005268, "learning_rate": 1.500602570138172e-05, "loss": 0.9554, "step": 2192 }, { "epoch": 0.35, "grad_norm": 4.29341968670676, "learning_rate": 1.5001506652482415e-05, "loss": 0.9286, "step": 2193 }, { "epoch": 0.35, "grad_norm": 2.117748341055013, "learning_rate": 1.499698624103483e-05, "loss": 0.9636, "step": 2194 }, { "epoch": 0.35, "grad_norm": 2.795176325850739, "learning_rate": 1.4992464468270451e-05, "loss": 0.9644, "step": 2195 }, { "epoch": 0.35, "grad_norm": 2.6302731546906175, "learning_rate": 1.4987941335421132e-05, "loss": 0.9339, "step": 2196 }, { "epoch": 0.35, "grad_norm": 2.523499762649782, "learning_rate": 1.4983416843719099e-05, "loss": 0.8952, "step": 2197 }, { "epoch": 0.35, "grad_norm": 3.4155584414350035, "learning_rate": 1.497889099439695e-05, "loss": 0.851, "step": 2198 }, { "epoch": 0.35, "grad_norm": 3.5989506781681126, "learning_rate": 1.4974363788687651e-05, "loss": 0.9152, "step": 2199 }, { "epoch": 0.35, "grad_norm": 3.0201606439975603, "learning_rate": 1.4969835227824533e-05, "loss": 0.9528, "step": 2200 }, { "epoch": 0.35, "grad_norm": 1.9002055031919474, "learning_rate": 1.496530531304131e-05, "loss": 0.9553, "step": 2201 }, { "epoch": 0.35, "grad_norm": 1.6050291178786387, "learning_rate": 1.4960774045572046e-05, "loss": 0.9601, "step": 2202 }, { "epoch": 0.35, "grad_norm": 3.475873488824779, "learning_rate": 1.495624142665119e-05, "loss": 0.9596, "step": 2203 }, { "epoch": 0.36, "grad_norm": 2.578129716348669, "learning_rate": 1.4951707457513549e-05, "loss": 0.9364, "step": 2204 }, { "epoch": 0.36, "grad_norm": 3.0023411516768452, "learning_rate": 1.4947172139394301e-05, "loss": 0.922, "step": 2205 }, { "epoch": 0.36, "grad_norm": 2.5730325052437193, "learning_rate": 1.4942635473528994e-05, "loss": 0.9606, "step": 2206 }, { "epoch": 0.36, "grad_norm": 3.9005643436292017, "learning_rate": 1.493809746115354e-05, "loss": 0.9591, "step": 2207 }, { "epoch": 0.36, "grad_norm": 4.4925362891088545, "learning_rate": 1.4933558103504215e-05, "loss": 0.9049, "step": 2208 }, { "epoch": 0.36, "grad_norm": 2.2499934302340066, "learning_rate": 1.4929017401817672e-05, "loss": 0.8901, "step": 2209 }, { "epoch": 0.36, "grad_norm": 3.307825785282721, "learning_rate": 1.4924475357330919e-05, "loss": 0.8817, "step": 2210 }, { "epoch": 0.36, "grad_norm": 4.1481312784225315, "learning_rate": 1.491993197128133e-05, "loss": 0.9711, "step": 2211 }, { "epoch": 0.36, "grad_norm": 3.9495152815849006, "learning_rate": 1.4915387244906658e-05, "loss": 0.8511, "step": 2212 }, { "epoch": 0.36, "grad_norm": 3.232998909975736, "learning_rate": 1.4910841179445007e-05, "loss": 0.9887, "step": 2213 }, { "epoch": 0.36, "grad_norm": 1.8959008997799698, "learning_rate": 1.4906293776134849e-05, "loss": 0.3349, "step": 2214 }, { "epoch": 0.36, "grad_norm": 3.4909928089637092, "learning_rate": 1.4901745036215022e-05, "loss": 0.9535, "step": 2215 }, { "epoch": 0.36, "grad_norm": 3.2722448393193546, "learning_rate": 1.4897194960924732e-05, "loss": 0.9577, "step": 2216 }, { "epoch": 0.36, "grad_norm": 2.589374482460716, "learning_rate": 1.4892643551503545e-05, "loss": 0.9638, "step": 2217 }, { "epoch": 0.36, "grad_norm": 2.0860771675356533, "learning_rate": 1.4888090809191384e-05, "loss": 0.8361, "step": 2218 }, { "epoch": 0.36, "grad_norm": 3.351459492698841, "learning_rate": 1.4883536735228548e-05, "loss": 0.8682, "step": 2219 }, { "epoch": 0.36, "grad_norm": 3.8166620018498816, "learning_rate": 1.4878981330855688e-05, "loss": 0.9059, "step": 2220 }, { "epoch": 0.36, "grad_norm": 3.18603811212746, "learning_rate": 1.4874424597313828e-05, "loss": 0.9825, "step": 2221 }, { "epoch": 0.36, "grad_norm": 1.5026721994038101, "learning_rate": 1.4869866535844337e-05, "loss": 0.9036, "step": 2222 }, { "epoch": 0.36, "grad_norm": 2.744630773896914, "learning_rate": 1.4865307147688967e-05, "loss": 0.975, "step": 2223 }, { "epoch": 0.36, "grad_norm": 2.3378236230413, "learning_rate": 1.4860746434089817e-05, "loss": 0.8992, "step": 2224 }, { "epoch": 0.36, "grad_norm": 3.1960421524228178, "learning_rate": 1.4856184396289348e-05, "loss": 0.8756, "step": 2225 }, { "epoch": 0.36, "grad_norm": 1.8531067922897404, "learning_rate": 1.4851621035530392e-05, "loss": 0.3246, "step": 2226 }, { "epoch": 0.36, "grad_norm": 3.3215413052461136, "learning_rate": 1.4847056353056126e-05, "loss": 0.874, "step": 2227 }, { "epoch": 0.36, "grad_norm": 2.9526118079841552, "learning_rate": 1.4842490350110103e-05, "loss": 0.9947, "step": 2228 }, { "epoch": 0.36, "grad_norm": 1.7479567179526194, "learning_rate": 1.4837923027936223e-05, "loss": 0.9669, "step": 2229 }, { "epoch": 0.36, "grad_norm": 3.6714405857968915, "learning_rate": 1.4833354387778753e-05, "loss": 0.9211, "step": 2230 }, { "epoch": 0.36, "grad_norm": 2.3452532207359953, "learning_rate": 1.4828784430882315e-05, "loss": 0.9287, "step": 2231 }, { "epoch": 0.36, "grad_norm": 2.5166705310446607, "learning_rate": 1.482421315849189e-05, "loss": 0.947, "step": 2232 }, { "epoch": 0.36, "grad_norm": 2.4163830908656205, "learning_rate": 1.4819640571852823e-05, "loss": 0.913, "step": 2233 }, { "epoch": 0.36, "grad_norm": 3.9601914043600823, "learning_rate": 1.4815066672210809e-05, "loss": 0.8486, "step": 2234 }, { "epoch": 0.36, "grad_norm": 2.9872615894651817, "learning_rate": 1.4810491460811907e-05, "loss": 0.8991, "step": 2235 }, { "epoch": 0.36, "grad_norm": 3.822291074423452, "learning_rate": 1.4805914938902525e-05, "loss": 0.9087, "step": 2236 }, { "epoch": 0.36, "grad_norm": 2.6817348655372486, "learning_rate": 1.4801337107729443e-05, "loss": 0.9503, "step": 2237 }, { "epoch": 0.36, "grad_norm": 3.207216654725577, "learning_rate": 1.4796757968539779e-05, "loss": 0.8444, "step": 2238 }, { "epoch": 0.36, "grad_norm": 2.74539735735654, "learning_rate": 1.4792177522581023e-05, "loss": 1.005, "step": 2239 }, { "epoch": 0.36, "grad_norm": 3.8046895052612784, "learning_rate": 1.4787595771101013e-05, "loss": 0.9105, "step": 2240 }, { "epoch": 0.36, "grad_norm": 2.3898494684226392, "learning_rate": 1.4783012715347944e-05, "loss": 0.9788, "step": 2241 }, { "epoch": 0.36, "grad_norm": 2.405104426379098, "learning_rate": 1.4778428356570365e-05, "loss": 0.8927, "step": 2242 }, { "epoch": 0.36, "grad_norm": 3.378658007725331, "learning_rate": 1.4773842696017184e-05, "loss": 0.8808, "step": 2243 }, { "epoch": 0.36, "grad_norm": 3.016062176101458, "learning_rate": 1.4769255734937662e-05, "loss": 0.9121, "step": 2244 }, { "epoch": 0.36, "grad_norm": 1.9998131903188097, "learning_rate": 1.4764667474581416e-05, "loss": 0.9639, "step": 2245 }, { "epoch": 0.36, "grad_norm": 3.1531423245002257, "learning_rate": 1.4760077916198405e-05, "loss": 0.9363, "step": 2246 }, { "epoch": 0.36, "grad_norm": 2.0572481046160993, "learning_rate": 1.475548706103896e-05, "loss": 0.8775, "step": 2247 }, { "epoch": 0.36, "grad_norm": 3.3704171324976513, "learning_rate": 1.4750894910353754e-05, "loss": 0.9404, "step": 2248 }, { "epoch": 0.36, "grad_norm": 2.5644293244325853, "learning_rate": 1.4746301465393814e-05, "loss": 0.971, "step": 2249 }, { "epoch": 0.36, "grad_norm": 2.6838488950306836, "learning_rate": 1.4741706727410522e-05, "loss": 0.9147, "step": 2250 }, { "epoch": 0.36, "grad_norm": 2.1471848043603345, "learning_rate": 1.4737110697655613e-05, "loss": 0.9959, "step": 2251 }, { "epoch": 0.36, "grad_norm": 4.0921963371539745, "learning_rate": 1.473251337738117e-05, "loss": 0.8543, "step": 2252 }, { "epoch": 0.36, "grad_norm": 3.203794828020176, "learning_rate": 1.472791476783963e-05, "loss": 0.9262, "step": 2253 }, { "epoch": 0.36, "grad_norm": 2.6336197789845803, "learning_rate": 1.4723314870283783e-05, "loss": 0.9603, "step": 2254 }, { "epoch": 0.36, "grad_norm": 2.4902631930011454, "learning_rate": 1.4718713685966765e-05, "loss": 0.947, "step": 2255 }, { "epoch": 0.36, "grad_norm": 4.663390144691045, "learning_rate": 1.4714111216142068e-05, "loss": 0.8894, "step": 2256 }, { "epoch": 0.36, "grad_norm": 2.798767216958727, "learning_rate": 1.470950746206353e-05, "loss": 0.9617, "step": 2257 }, { "epoch": 0.36, "grad_norm": 3.8189817267752617, "learning_rate": 1.4704902424985341e-05, "loss": 1.0002, "step": 2258 }, { "epoch": 0.36, "grad_norm": 3.127233241327948, "learning_rate": 1.4700296106162042e-05, "loss": 0.9289, "step": 2259 }, { "epoch": 0.36, "grad_norm": 3.271735065021824, "learning_rate": 1.4695688506848513e-05, "loss": 0.946, "step": 2260 }, { "epoch": 0.36, "grad_norm": 3.1081589664372915, "learning_rate": 1.4691079628300004e-05, "loss": 0.9612, "step": 2261 }, { "epoch": 0.36, "grad_norm": 2.4545709515541363, "learning_rate": 1.4686469471772089e-05, "loss": 0.9643, "step": 2262 }, { "epoch": 0.36, "grad_norm": 2.3736293250612084, "learning_rate": 1.4681858038520711e-05, "loss": 0.9139, "step": 2263 }, { "epoch": 0.36, "grad_norm": 2.4602432482719516, "learning_rate": 1.4677245329802146e-05, "loss": 0.8828, "step": 2264 }, { "epoch": 0.36, "grad_norm": 3.0616100341620482, "learning_rate": 1.4672631346873023e-05, "loss": 0.8994, "step": 2265 }, { "epoch": 0.37, "grad_norm": 3.4063759876714674, "learning_rate": 1.466801609099032e-05, "loss": 0.9042, "step": 2266 }, { "epoch": 0.37, "grad_norm": 3.3176941364809003, "learning_rate": 1.4663399563411358e-05, "loss": 0.929, "step": 2267 }, { "epoch": 0.37, "grad_norm": 3.3801914835756284, "learning_rate": 1.4658781765393808e-05, "loss": 0.935, "step": 2268 }, { "epoch": 0.37, "grad_norm": 1.790002767358271, "learning_rate": 1.4654162698195684e-05, "loss": 0.9376, "step": 2269 }, { "epoch": 0.37, "grad_norm": 2.8958432496996545, "learning_rate": 1.4649542363075353e-05, "loss": 0.8758, "step": 2270 }, { "epoch": 0.37, "grad_norm": 3.6067564081484247, "learning_rate": 1.464492076129151e-05, "loss": 0.9553, "step": 2271 }, { "epoch": 0.37, "grad_norm": 4.341964464475337, "learning_rate": 1.464029789410322e-05, "loss": 0.9659, "step": 2272 }, { "epoch": 0.37, "grad_norm": 2.163512011860315, "learning_rate": 1.4635673762769868e-05, "loss": 0.3257, "step": 2273 }, { "epoch": 0.37, "grad_norm": 2.4862710687004514, "learning_rate": 1.4631048368551204e-05, "loss": 0.903, "step": 2274 }, { "epoch": 0.37, "grad_norm": 2.687159405794464, "learning_rate": 1.4626421712707307e-05, "loss": 0.9387, "step": 2275 }, { "epoch": 0.37, "grad_norm": 2.7510939936275682, "learning_rate": 1.4621793796498606e-05, "loss": 0.9341, "step": 2276 }, { "epoch": 0.37, "grad_norm": 3.0066928910030213, "learning_rate": 1.4617164621185877e-05, "loss": 0.9821, "step": 2277 }, { "epoch": 0.37, "grad_norm": 2.952896512996567, "learning_rate": 1.4612534188030233e-05, "loss": 0.9562, "step": 2278 }, { "epoch": 0.37, "grad_norm": 4.606483302425761, "learning_rate": 1.4607902498293127e-05, "loss": 0.8529, "step": 2279 }, { "epoch": 0.37, "grad_norm": 12.06977142800222, "learning_rate": 1.4603269553236366e-05, "loss": 0.9859, "step": 2280 }, { "epoch": 0.37, "grad_norm": 2.4992784412502327, "learning_rate": 1.4598635354122087e-05, "loss": 0.9084, "step": 2281 }, { "epoch": 0.37, "grad_norm": 3.271643390491748, "learning_rate": 1.4593999902212775e-05, "loss": 0.9498, "step": 2282 }, { "epoch": 0.37, "grad_norm": 3.1712269990212447, "learning_rate": 1.4589363198771258e-05, "loss": 0.8738, "step": 2283 }, { "epoch": 0.37, "grad_norm": 2.148857047991191, "learning_rate": 1.4584725245060694e-05, "loss": 0.9162, "step": 2284 }, { "epoch": 0.37, "grad_norm": 1.6705371816197179, "learning_rate": 1.45800860423446e-05, "loss": 0.9726, "step": 2285 }, { "epoch": 0.37, "grad_norm": 2.3058938182989976, "learning_rate": 1.4575445591886814e-05, "loss": 0.9795, "step": 2286 }, { "epoch": 0.37, "grad_norm": 2.995723059848182, "learning_rate": 1.4570803894951528e-05, "loss": 0.897, "step": 2287 }, { "epoch": 0.37, "grad_norm": 2.357567606469322, "learning_rate": 1.4566160952803268e-05, "loss": 0.9516, "step": 2288 }, { "epoch": 0.37, "grad_norm": 3.179095988823292, "learning_rate": 1.4561516766706893e-05, "loss": 0.9643, "step": 2289 }, { "epoch": 0.37, "grad_norm": 3.7299405507931587, "learning_rate": 1.4556871337927615e-05, "loss": 0.8592, "step": 2290 }, { "epoch": 0.37, "grad_norm": 4.469414934959591, "learning_rate": 1.455222466773097e-05, "loss": 0.9544, "step": 2291 }, { "epoch": 0.37, "grad_norm": 3.3090958287510235, "learning_rate": 1.4547576757382843e-05, "loss": 0.9262, "step": 2292 }, { "epoch": 0.37, "grad_norm": 1.4290246312746155, "learning_rate": 1.4542927608149456e-05, "loss": 0.3161, "step": 2293 }, { "epoch": 0.37, "grad_norm": 2.560859434137757, "learning_rate": 1.453827722129736e-05, "loss": 0.8916, "step": 2294 }, { "epoch": 0.37, "grad_norm": 3.430533128906198, "learning_rate": 1.4533625598093453e-05, "loss": 0.9767, "step": 2295 }, { "epoch": 0.37, "grad_norm": 1.8132327999033957, "learning_rate": 1.452897273980496e-05, "loss": 0.946, "step": 2296 }, { "epoch": 0.37, "grad_norm": 3.2386355143033363, "learning_rate": 1.452431864769945e-05, "loss": 0.9784, "step": 2297 }, { "epoch": 0.37, "grad_norm": 1.0369103881977777, "learning_rate": 1.451966332304483e-05, "loss": 0.9843, "step": 2298 }, { "epoch": 0.37, "grad_norm": 2.105480073746219, "learning_rate": 1.4515006767109336e-05, "loss": 0.3379, "step": 2299 }, { "epoch": 0.37, "grad_norm": 2.92282105495898, "learning_rate": 1.451034898116154e-05, "loss": 0.9805, "step": 2300 }, { "epoch": 0.37, "grad_norm": 2.888722386411826, "learning_rate": 1.4505689966470353e-05, "loss": 0.885, "step": 2301 }, { "epoch": 0.37, "grad_norm": 3.275256978210325, "learning_rate": 1.4501029724305019e-05, "loss": 0.8754, "step": 2302 }, { "epoch": 0.37, "grad_norm": 2.347047038064887, "learning_rate": 1.4496368255935115e-05, "loss": 0.9177, "step": 2303 }, { "epoch": 0.37, "grad_norm": 3.5655257108406224, "learning_rate": 1.4491705562630555e-05, "loss": 0.923, "step": 2304 }, { "epoch": 0.37, "grad_norm": 5.172088895218165, "learning_rate": 1.4487041645661588e-05, "loss": 0.9747, "step": 2305 }, { "epoch": 0.37, "grad_norm": 2.4454955011208255, "learning_rate": 1.448237650629879e-05, "loss": 0.9059, "step": 2306 }, { "epoch": 0.37, "grad_norm": 2.8052093105453753, "learning_rate": 1.4477710145813074e-05, "loss": 1.0042, "step": 2307 }, { "epoch": 0.37, "grad_norm": 1.8160797810191631, "learning_rate": 1.4473042565475684e-05, "loss": 0.3552, "step": 2308 }, { "epoch": 0.37, "grad_norm": 2.671840555961739, "learning_rate": 1.44683737665582e-05, "loss": 0.9279, "step": 2309 }, { "epoch": 0.37, "grad_norm": 3.481469829200141, "learning_rate": 1.4463703750332532e-05, "loss": 0.8787, "step": 2310 }, { "epoch": 0.37, "grad_norm": 3.352267104557065, "learning_rate": 1.4459032518070917e-05, "loss": 0.9162, "step": 2311 }, { "epoch": 0.37, "grad_norm": 3.4256715659108705, "learning_rate": 1.4454360071045933e-05, "loss": 0.9641, "step": 2312 }, { "epoch": 0.37, "grad_norm": 2.708167780193371, "learning_rate": 1.4449686410530478e-05, "loss": 0.8858, "step": 2313 }, { "epoch": 0.37, "grad_norm": 3.387153782342818, "learning_rate": 1.4445011537797788e-05, "loss": 1.0027, "step": 2314 }, { "epoch": 0.37, "grad_norm": 1.8377603125917905, "learning_rate": 1.4440335454121428e-05, "loss": 0.3219, "step": 2315 }, { "epoch": 0.37, "grad_norm": 2.23259947329847, "learning_rate": 1.4435658160775296e-05, "loss": 0.9079, "step": 2316 }, { "epoch": 0.37, "grad_norm": 2.893517114483528, "learning_rate": 1.4430979659033609e-05, "loss": 0.9329, "step": 2317 }, { "epoch": 0.37, "grad_norm": 1.6100147049984603, "learning_rate": 1.442629995017092e-05, "loss": 0.8972, "step": 2318 }, { "epoch": 0.37, "grad_norm": 1.8146357613780701, "learning_rate": 1.4421619035462115e-05, "loss": 0.9337, "step": 2319 }, { "epoch": 0.37, "grad_norm": 3.4111435761979476, "learning_rate": 1.44169369161824e-05, "loss": 0.8516, "step": 2320 }, { "epoch": 0.37, "grad_norm": 1.7800673858874474, "learning_rate": 1.4412253593607317e-05, "loss": 0.9721, "step": 2321 }, { "epoch": 0.37, "grad_norm": 4.413935890745064, "learning_rate": 1.4407569069012729e-05, "loss": 0.9298, "step": 2322 }, { "epoch": 0.37, "grad_norm": 2.7295251178972832, "learning_rate": 1.4402883343674834e-05, "loss": 0.9725, "step": 2323 }, { "epoch": 0.37, "grad_norm": 2.9396693558861404, "learning_rate": 1.4398196418870147e-05, "loss": 0.8894, "step": 2324 }, { "epoch": 0.37, "grad_norm": 2.1660432530009297, "learning_rate": 1.439350829587552e-05, "loss": 0.3293, "step": 2325 }, { "epoch": 0.37, "grad_norm": 2.7398967684490985, "learning_rate": 1.4388818975968126e-05, "loss": 0.8577, "step": 2326 }, { "epoch": 0.37, "grad_norm": 4.09406844203673, "learning_rate": 1.4384128460425467e-05, "loss": 0.9224, "step": 2327 }, { "epoch": 0.38, "grad_norm": 3.836858759185625, "learning_rate": 1.4379436750525362e-05, "loss": 0.975, "step": 2328 }, { "epoch": 0.38, "grad_norm": 3.2600205803438014, "learning_rate": 1.4374743847545967e-05, "loss": 0.9854, "step": 2329 }, { "epoch": 0.38, "grad_norm": 2.5848367889648154, "learning_rate": 1.437004975276576e-05, "loss": 0.9554, "step": 2330 }, { "epoch": 0.38, "grad_norm": 4.167152961327504, "learning_rate": 1.4365354467463535e-05, "loss": 0.9556, "step": 2331 }, { "epoch": 0.38, "grad_norm": 3.085659507108818, "learning_rate": 1.4360657992918423e-05, "loss": 0.9872, "step": 2332 }, { "epoch": 0.38, "grad_norm": 2.4764580448878193, "learning_rate": 1.435596033040987e-05, "loss": 0.9149, "step": 2333 }, { "epoch": 0.38, "grad_norm": 2.8106462091278086, "learning_rate": 1.4351261481217655e-05, "loss": 0.9705, "step": 2334 }, { "epoch": 0.38, "grad_norm": 3.297492236170909, "learning_rate": 1.4346561446621865e-05, "loss": 0.9436, "step": 2335 }, { "epoch": 0.38, "grad_norm": 2.518191716344026, "learning_rate": 1.4341860227902923e-05, "loss": 0.9811, "step": 2336 }, { "epoch": 0.38, "grad_norm": 3.357373728248801, "learning_rate": 1.4337157826341575e-05, "loss": 0.9257, "step": 2337 }, { "epoch": 0.38, "grad_norm": 1.2731408551838899, "learning_rate": 1.4332454243218878e-05, "loss": 0.8941, "step": 2338 }, { "epoch": 0.38, "grad_norm": 2.886084046745335, "learning_rate": 1.432774947981622e-05, "loss": 0.8886, "step": 2339 }, { "epoch": 0.38, "grad_norm": 1.9139693256977537, "learning_rate": 1.4323043537415311e-05, "loss": 0.8868, "step": 2340 }, { "epoch": 0.38, "grad_norm": 3.352725949677387, "learning_rate": 1.4318336417298173e-05, "loss": 0.9156, "step": 2341 }, { "epoch": 0.38, "grad_norm": 3.0173956065857634, "learning_rate": 1.431362812074716e-05, "loss": 0.8549, "step": 2342 }, { "epoch": 0.38, "grad_norm": 3.1023420223864333, "learning_rate": 1.4308918649044947e-05, "loss": 0.9765, "step": 2343 }, { "epoch": 0.38, "grad_norm": 2.0164615758010687, "learning_rate": 1.4304208003474508e-05, "loss": 0.8891, "step": 2344 }, { "epoch": 0.38, "grad_norm": 2.731477084638238, "learning_rate": 1.429949618531917e-05, "loss": 0.9971, "step": 2345 }, { "epoch": 0.38, "grad_norm": 4.276714512078432, "learning_rate": 1.4294783195862553e-05, "loss": 0.8571, "step": 2346 }, { "epoch": 0.38, "grad_norm": 2.523885870153221, "learning_rate": 1.4290069036388607e-05, "loss": 0.3485, "step": 2347 }, { "epoch": 0.38, "grad_norm": 2.139319920010518, "learning_rate": 1.42853537081816e-05, "loss": 0.324, "step": 2348 }, { "epoch": 0.38, "grad_norm": 4.2318792554219575, "learning_rate": 1.4280637212526116e-05, "loss": 0.9551, "step": 2349 }, { "epoch": 0.38, "grad_norm": 3.5159335530829967, "learning_rate": 1.4275919550707058e-05, "loss": 0.9055, "step": 2350 }, { "epoch": 0.38, "grad_norm": 3.5818704383886284, "learning_rate": 1.4271200724009648e-05, "loss": 1.0225, "step": 2351 }, { "epoch": 0.38, "grad_norm": 4.805349016008037, "learning_rate": 1.4266480733719426e-05, "loss": 0.9902, "step": 2352 }, { "epoch": 0.38, "grad_norm": 3.1980052691351237, "learning_rate": 1.4261759581122243e-05, "loss": 0.973, "step": 2353 }, { "epoch": 0.38, "grad_norm": 1.7926993240345808, "learning_rate": 1.4257037267504277e-05, "loss": 0.3262, "step": 2354 }, { "epoch": 0.38, "grad_norm": 3.05989446545293, "learning_rate": 1.425231379415201e-05, "loss": 0.8819, "step": 2355 }, { "epoch": 0.38, "grad_norm": 1.1230258574799619, "learning_rate": 1.4247589162352254e-05, "loss": 0.95, "step": 2356 }, { "epoch": 0.38, "grad_norm": 2.100177634764426, "learning_rate": 1.4242863373392123e-05, "loss": 0.3266, "step": 2357 }, { "epoch": 0.38, "grad_norm": 3.5383219165111703, "learning_rate": 1.423813642855905e-05, "loss": 0.9204, "step": 2358 }, { "epoch": 0.38, "grad_norm": 2.7768689056817286, "learning_rate": 1.4233408329140796e-05, "loss": 0.9517, "step": 2359 }, { "epoch": 0.38, "grad_norm": 3.754164735466859, "learning_rate": 1.4228679076425414e-05, "loss": 0.9158, "step": 2360 }, { "epoch": 0.38, "grad_norm": 2.1880060972864532, "learning_rate": 1.4223948671701289e-05, "loss": 0.8327, "step": 2361 }, { "epoch": 0.38, "grad_norm": 1.8348983096184446, "learning_rate": 1.4219217116257111e-05, "loss": 0.3138, "step": 2362 }, { "epoch": 0.38, "grad_norm": 3.1418525991557558, "learning_rate": 1.4214484411381885e-05, "loss": 0.9368, "step": 2363 }, { "epoch": 0.38, "grad_norm": 2.122824284343786, "learning_rate": 1.4209750558364936e-05, "loss": 0.9604, "step": 2364 }, { "epoch": 0.38, "grad_norm": 3.0065398459180237, "learning_rate": 1.4205015558495893e-05, "loss": 0.9018, "step": 2365 }, { "epoch": 0.38, "grad_norm": 3.8139956932983727, "learning_rate": 1.4200279413064695e-05, "loss": 1.0135, "step": 2366 }, { "epoch": 0.38, "grad_norm": 3.5577299072435933, "learning_rate": 1.419554212336161e-05, "loss": 0.9761, "step": 2367 }, { "epoch": 0.38, "grad_norm": 2.296053992696585, "learning_rate": 1.4190803690677195e-05, "loss": 0.9718, "step": 2368 }, { "epoch": 0.38, "grad_norm": 1.7919820759251504, "learning_rate": 1.4186064116302336e-05, "loss": 0.9495, "step": 2369 }, { "epoch": 0.38, "grad_norm": 2.1309535830028294, "learning_rate": 1.4181323401528224e-05, "loss": 0.3458, "step": 2370 }, { "epoch": 0.38, "grad_norm": 2.0525613089359642, "learning_rate": 1.4176581547646354e-05, "loss": 0.9375, "step": 2371 }, { "epoch": 0.38, "grad_norm": 2.2365144743592773, "learning_rate": 1.4171838555948548e-05, "loss": 0.8441, "step": 2372 }, { "epoch": 0.38, "grad_norm": 1.943890706946033, "learning_rate": 1.4167094427726916e-05, "loss": 0.9312, "step": 2373 }, { "epoch": 0.38, "grad_norm": 3.783612356665933, "learning_rate": 1.4162349164273899e-05, "loss": 0.9762, "step": 2374 }, { "epoch": 0.38, "grad_norm": 3.953211244388697, "learning_rate": 1.4157602766882233e-05, "loss": 0.8737, "step": 2375 }, { "epoch": 0.38, "grad_norm": 2.1062153074763637, "learning_rate": 1.4152855236844969e-05, "loss": 0.9167, "step": 2376 }, { "epoch": 0.38, "grad_norm": 3.3542668740288137, "learning_rate": 1.414810657545546e-05, "loss": 0.9051, "step": 2377 }, { "epoch": 0.38, "grad_norm": 3.5485712104307625, "learning_rate": 1.4143356784007383e-05, "loss": 0.943, "step": 2378 }, { "epoch": 0.38, "grad_norm": 2.4480720524811774, "learning_rate": 1.4138605863794703e-05, "loss": 0.9594, "step": 2379 }, { "epoch": 0.38, "grad_norm": 2.768892502400527, "learning_rate": 1.4133853816111703e-05, "loss": 0.8742, "step": 2380 }, { "epoch": 0.38, "grad_norm": 3.3123047969090877, "learning_rate": 1.4129100642252978e-05, "loss": 0.9453, "step": 2381 }, { "epoch": 0.38, "grad_norm": 2.353657242562372, "learning_rate": 1.4124346343513411e-05, "loss": 0.3343, "step": 2382 }, { "epoch": 0.38, "grad_norm": 3.50508538418325, "learning_rate": 1.4119590921188217e-05, "loss": 0.9483, "step": 2383 }, { "epoch": 0.38, "grad_norm": 1.944179342756984, "learning_rate": 1.4114834376572898e-05, "loss": 0.8727, "step": 2384 }, { "epoch": 0.38, "grad_norm": 2.9794813047777327, "learning_rate": 1.4110076710963269e-05, "loss": 0.9971, "step": 2385 }, { "epoch": 0.38, "grad_norm": 2.942411820768474, "learning_rate": 1.4105317925655448e-05, "loss": 0.9323, "step": 2386 }, { "epoch": 0.38, "grad_norm": 3.19662948366669, "learning_rate": 1.4100558021945863e-05, "loss": 0.9038, "step": 2387 }, { "epoch": 0.38, "grad_norm": 3.3726391482366966, "learning_rate": 1.4095797001131238e-05, "loss": 0.8978, "step": 2388 }, { "epoch": 0.38, "grad_norm": 1.6500341989845555, "learning_rate": 1.4091034864508608e-05, "loss": 0.2963, "step": 2389 }, { "epoch": 0.39, "grad_norm": 2.5965027506766174, "learning_rate": 1.408627161337531e-05, "loss": 0.8881, "step": 2390 }, { "epoch": 0.39, "grad_norm": 2.7776485656779624, "learning_rate": 1.4081507249028987e-05, "loss": 0.9725, "step": 2391 }, { "epoch": 0.39, "grad_norm": 3.211662266373074, "learning_rate": 1.4076741772767586e-05, "loss": 0.9245, "step": 2392 }, { "epoch": 0.39, "grad_norm": 3.1954381493671042, "learning_rate": 1.4071975185889344e-05, "loss": 0.9171, "step": 2393 }, { "epoch": 0.39, "grad_norm": 3.3482270972597155, "learning_rate": 1.406720748969282e-05, "loss": 0.9024, "step": 2394 }, { "epoch": 0.39, "grad_norm": 4.157020669488861, "learning_rate": 1.4062438685476862e-05, "loss": 0.9312, "step": 2395 }, { "epoch": 0.39, "grad_norm": 3.557817158719021, "learning_rate": 1.4057668774540622e-05, "loss": 0.8967, "step": 2396 }, { "epoch": 0.39, "grad_norm": 2.207825644333163, "learning_rate": 1.405289775818356e-05, "loss": 0.9448, "step": 2397 }, { "epoch": 0.39, "grad_norm": 2.1985003041485593, "learning_rate": 1.4048125637705433e-05, "loss": 0.9168, "step": 2398 }, { "epoch": 0.39, "grad_norm": 2.83037161769664, "learning_rate": 1.404335241440629e-05, "loss": 0.9075, "step": 2399 }, { "epoch": 0.39, "grad_norm": 3.163090460015291, "learning_rate": 1.4038578089586493e-05, "loss": 0.9243, "step": 2400 }, { "epoch": 0.39, "grad_norm": 2.53113838232277, "learning_rate": 1.4033802664546704e-05, "loss": 0.9582, "step": 2401 }, { "epoch": 0.39, "grad_norm": 2.9192595990679706, "learning_rate": 1.4029026140587876e-05, "loss": 0.9622, "step": 2402 }, { "epoch": 0.39, "grad_norm": 3.0907237346680674, "learning_rate": 1.4024248519011266e-05, "loss": 0.8882, "step": 2403 }, { "epoch": 0.39, "grad_norm": 2.5790244643081617, "learning_rate": 1.4019469801118429e-05, "loss": 0.8941, "step": 2404 }, { "epoch": 0.39, "grad_norm": 3.6036851300326527, "learning_rate": 1.4014689988211223e-05, "loss": 0.8704, "step": 2405 }, { "epoch": 0.39, "grad_norm": 3.401442468616744, "learning_rate": 1.4009909081591798e-05, "loss": 0.9958, "step": 2406 }, { "epoch": 0.39, "grad_norm": 4.156890819739129, "learning_rate": 1.4005127082562603e-05, "loss": 0.8975, "step": 2407 }, { "epoch": 0.39, "grad_norm": 2.7560603381446307, "learning_rate": 1.4000343992426391e-05, "loss": 0.8966, "step": 2408 }, { "epoch": 0.39, "grad_norm": 3.7342694000253025, "learning_rate": 1.3995559812486205e-05, "loss": 0.9155, "step": 2409 }, { "epoch": 0.39, "grad_norm": 4.581270666973549, "learning_rate": 1.399077454404539e-05, "loss": 0.953, "step": 2410 }, { "epoch": 0.39, "grad_norm": 2.168495897693373, "learning_rate": 1.398598818840758e-05, "loss": 0.9022, "step": 2411 }, { "epoch": 0.39, "grad_norm": 2.3021423769552554, "learning_rate": 1.3981200746876713e-05, "loss": 0.8555, "step": 2412 }, { "epoch": 0.39, "grad_norm": 1.868800914670446, "learning_rate": 1.397641222075702e-05, "loss": 0.9491, "step": 2413 }, { "epoch": 0.39, "grad_norm": 3.417127826241152, "learning_rate": 1.397162261135303e-05, "loss": 0.8969, "step": 2414 }, { "epoch": 0.39, "grad_norm": 2.950174623913722, "learning_rate": 1.396683191996956e-05, "loss": 0.8909, "step": 2415 }, { "epoch": 0.39, "grad_norm": 2.6914277761010985, "learning_rate": 1.3962040147911731e-05, "loss": 0.8336, "step": 2416 }, { "epoch": 0.39, "grad_norm": 2.2768762543785037, "learning_rate": 1.3957247296484948e-05, "loss": 0.9124, "step": 2417 }, { "epoch": 0.39, "grad_norm": 3.150600249002738, "learning_rate": 1.3952453366994921e-05, "loss": 1.0065, "step": 2418 }, { "epoch": 0.39, "grad_norm": 3.492346843182031, "learning_rate": 1.3947658360747646e-05, "loss": 0.9502, "step": 2419 }, { "epoch": 0.39, "grad_norm": 2.8897426598887415, "learning_rate": 1.3942862279049418e-05, "loss": 0.9227, "step": 2420 }, { "epoch": 0.39, "grad_norm": 3.211542003002349, "learning_rate": 1.393806512320682e-05, "loss": 0.9191, "step": 2421 }, { "epoch": 0.39, "grad_norm": 3.7060217492772116, "learning_rate": 1.3933266894526725e-05, "loss": 0.9071, "step": 2422 }, { "epoch": 0.39, "grad_norm": 2.900609110299834, "learning_rate": 1.392846759431631e-05, "loss": 0.8902, "step": 2423 }, { "epoch": 0.39, "grad_norm": 4.170193565504999, "learning_rate": 1.392366722388303e-05, "loss": 0.9531, "step": 2424 }, { "epoch": 0.39, "grad_norm": 3.1175364057652915, "learning_rate": 1.3918865784534647e-05, "loss": 0.9652, "step": 2425 }, { "epoch": 0.39, "grad_norm": 2.399483120413166, "learning_rate": 1.3914063277579195e-05, "loss": 0.8735, "step": 2426 }, { "epoch": 0.39, "grad_norm": 1.5663035827628866, "learning_rate": 1.3909259704325018e-05, "loss": 0.315, "step": 2427 }, { "epoch": 0.39, "grad_norm": 2.890336800719639, "learning_rate": 1.3904455066080737e-05, "loss": 0.8772, "step": 2428 }, { "epoch": 0.39, "grad_norm": 1.861644417787188, "learning_rate": 1.389964936415527e-05, "loss": 0.9324, "step": 2429 }, { "epoch": 0.39, "grad_norm": 2.2026853731498695, "learning_rate": 1.3894842599857818e-05, "loss": 0.8954, "step": 2430 }, { "epoch": 0.39, "grad_norm": 3.24342517439164, "learning_rate": 1.3890034774497884e-05, "loss": 0.9403, "step": 2431 }, { "epoch": 0.39, "grad_norm": 2.3441263532466317, "learning_rate": 1.3885225889385247e-05, "loss": 0.867, "step": 2432 }, { "epoch": 0.39, "grad_norm": 3.319373862040631, "learning_rate": 1.3880415945829979e-05, "loss": 0.8751, "step": 2433 }, { "epoch": 0.39, "grad_norm": 2.482826566086158, "learning_rate": 1.3875604945142445e-05, "loss": 0.8897, "step": 2434 }, { "epoch": 0.39, "grad_norm": 3.157892385281139, "learning_rate": 1.3870792888633293e-05, "loss": 0.8703, "step": 2435 }, { "epoch": 0.39, "grad_norm": 1.9462847137785397, "learning_rate": 1.3865979777613459e-05, "loss": 0.3276, "step": 2436 }, { "epoch": 0.39, "grad_norm": 2.1112123752310796, "learning_rate": 1.3861165613394163e-05, "loss": 0.31, "step": 2437 }, { "epoch": 0.39, "grad_norm": 2.797865282061694, "learning_rate": 1.3856350397286926e-05, "loss": 0.9332, "step": 2438 }, { "epoch": 0.39, "grad_norm": 3.076913198125093, "learning_rate": 1.3851534130603535e-05, "loss": 0.9134, "step": 2439 }, { "epoch": 0.39, "grad_norm": 3.304056310233875, "learning_rate": 1.384671681465608e-05, "loss": 0.9312, "step": 2440 }, { "epoch": 0.39, "grad_norm": 2.838770940544668, "learning_rate": 1.3841898450756933e-05, "loss": 0.9377, "step": 2441 }, { "epoch": 0.39, "grad_norm": 3.363526090599863, "learning_rate": 1.383707904021874e-05, "loss": 0.9208, "step": 2442 }, { "epoch": 0.39, "grad_norm": 1.6954584014973313, "learning_rate": 1.383225858435445e-05, "loss": 0.9866, "step": 2443 }, { "epoch": 0.39, "grad_norm": 2.637632491413374, "learning_rate": 1.3827437084477285e-05, "loss": 0.906, "step": 2444 }, { "epoch": 0.39, "grad_norm": 3.0395590565273896, "learning_rate": 1.3822614541900751e-05, "loss": 0.9538, "step": 2445 }, { "epoch": 0.39, "grad_norm": 3.143142981349798, "learning_rate": 1.3817790957938648e-05, "loss": 0.8857, "step": 2446 }, { "epoch": 0.39, "grad_norm": 2.3806791166893166, "learning_rate": 1.3812966333905052e-05, "loss": 0.9649, "step": 2447 }, { "epoch": 0.39, "grad_norm": 3.0834270841075213, "learning_rate": 1.3808140671114316e-05, "loss": 0.8938, "step": 2448 }, { "epoch": 0.39, "grad_norm": 2.6684755706841106, "learning_rate": 1.3803313970881093e-05, "loss": 0.868, "step": 2449 }, { "epoch": 0.39, "grad_norm": 3.803800569587391, "learning_rate": 1.3798486234520306e-05, "loss": 0.8621, "step": 2450 }, { "epoch": 0.39, "grad_norm": 3.3874005574062123, "learning_rate": 1.3793657463347158e-05, "loss": 0.9308, "step": 2451 }, { "epoch": 0.4, "grad_norm": 2.9426241072132435, "learning_rate": 1.3788827658677151e-05, "loss": 0.8934, "step": 2452 }, { "epoch": 0.4, "grad_norm": 2.1660599837305745, "learning_rate": 1.3783996821826043e-05, "loss": 0.9278, "step": 2453 }, { "epoch": 0.4, "grad_norm": 2.934562512988803, "learning_rate": 1.37791649541099e-05, "loss": 0.9345, "step": 2454 }, { "epoch": 0.4, "grad_norm": 3.9128380723843765, "learning_rate": 1.3774332056845047e-05, "loss": 0.908, "step": 2455 }, { "epoch": 0.4, "grad_norm": 3.1169357006892917, "learning_rate": 1.3769498131348102e-05, "loss": 0.9383, "step": 2456 }, { "epoch": 0.4, "grad_norm": 4.4441012859321765, "learning_rate": 1.376466317893596e-05, "loss": 0.9423, "step": 2457 }, { "epoch": 0.4, "grad_norm": 2.90605228530884, "learning_rate": 1.3759827200925796e-05, "loss": 0.9129, "step": 2458 }, { "epoch": 0.4, "grad_norm": 2.081610488601872, "learning_rate": 1.375499019863506e-05, "loss": 0.9358, "step": 2459 }, { "epoch": 0.4, "grad_norm": 3.0073842722252233, "learning_rate": 1.3750152173381488e-05, "loss": 0.9582, "step": 2460 }, { "epoch": 0.4, "grad_norm": 4.30307015222682, "learning_rate": 1.3745313126483089e-05, "loss": 0.8938, "step": 2461 }, { "epoch": 0.4, "grad_norm": 2.160252380991364, "learning_rate": 1.3740473059258155e-05, "loss": 0.9404, "step": 2462 }, { "epoch": 0.4, "grad_norm": 4.171813278598904, "learning_rate": 1.3735631973025254e-05, "loss": 0.8569, "step": 2463 }, { "epoch": 0.4, "grad_norm": 3.056452950758077, "learning_rate": 1.3730789869103227e-05, "loss": 1.0047, "step": 2464 }, { "epoch": 0.4, "grad_norm": 3.3616182156653465, "learning_rate": 1.3725946748811203e-05, "loss": 0.9138, "step": 2465 }, { "epoch": 0.4, "grad_norm": 3.2822932446830215, "learning_rate": 1.3721102613468578e-05, "loss": 0.9311, "step": 2466 }, { "epoch": 0.4, "grad_norm": 3.882341137526413, "learning_rate": 1.3716257464395026e-05, "loss": 0.9532, "step": 2467 }, { "epoch": 0.4, "grad_norm": 1.4332702438013416, "learning_rate": 1.3711411302910504e-05, "loss": 0.9018, "step": 2468 }, { "epoch": 0.4, "grad_norm": 4.688059862399103, "learning_rate": 1.3706564130335236e-05, "loss": 0.9379, "step": 2469 }, { "epoch": 0.4, "grad_norm": 2.8172991179888847, "learning_rate": 1.370171594798973e-05, "loss": 0.9593, "step": 2470 }, { "epoch": 0.4, "grad_norm": 1.7444211044423945, "learning_rate": 1.3696866757194757e-05, "loss": 0.8808, "step": 2471 }, { "epoch": 0.4, "grad_norm": 3.1278002590397644, "learning_rate": 1.3692016559271377e-05, "loss": 0.9401, "step": 2472 }, { "epoch": 0.4, "grad_norm": 1.9601975218835943, "learning_rate": 1.3687165355540915e-05, "loss": 0.9124, "step": 2473 }, { "epoch": 0.4, "grad_norm": 3.7919308217575156, "learning_rate": 1.3682313147324972e-05, "loss": 0.9331, "step": 2474 }, { "epoch": 0.4, "grad_norm": 3.279655577789128, "learning_rate": 1.3677459935945425e-05, "loss": 0.8949, "step": 2475 }, { "epoch": 0.4, "grad_norm": 3.0550558741278313, "learning_rate": 1.3672605722724422e-05, "loss": 0.9587, "step": 2476 }, { "epoch": 0.4, "grad_norm": 2.072675055092497, "learning_rate": 1.3667750508984383e-05, "loss": 0.8659, "step": 2477 }, { "epoch": 0.4, "grad_norm": 2.6699252683203563, "learning_rate": 1.3662894296048004e-05, "loss": 0.9579, "step": 2478 }, { "epoch": 0.4, "grad_norm": 2.5912649360517026, "learning_rate": 1.365803708523825e-05, "loss": 0.9834, "step": 2479 }, { "epoch": 0.4, "grad_norm": 2.8771854884311625, "learning_rate": 1.365317887787836e-05, "loss": 0.898, "step": 2480 }, { "epoch": 0.4, "grad_norm": 3.376353522263302, "learning_rate": 1.3648319675291842e-05, "loss": 0.9215, "step": 2481 }, { "epoch": 0.4, "grad_norm": 3.476937880234597, "learning_rate": 1.3643459478802479e-05, "loss": 0.9564, "step": 2482 }, { "epoch": 0.4, "grad_norm": 3.8674615936246375, "learning_rate": 1.3638598289734321e-05, "loss": 0.8647, "step": 2483 }, { "epoch": 0.4, "grad_norm": 3.2699987881626136, "learning_rate": 1.3633736109411691e-05, "loss": 0.8766, "step": 2484 }, { "epoch": 0.4, "grad_norm": 2.5694557315704514, "learning_rate": 1.362887293915918e-05, "loss": 0.3189, "step": 2485 }, { "epoch": 0.4, "grad_norm": 1.9365665279024595, "learning_rate": 1.362400878030165e-05, "loss": 0.9745, "step": 2486 }, { "epoch": 0.4, "grad_norm": 3.969963188673339, "learning_rate": 1.3619143634164234e-05, "loss": 0.9434, "step": 2487 }, { "epoch": 0.4, "grad_norm": 2.5780909680518995, "learning_rate": 1.3614277502072327e-05, "loss": 0.9054, "step": 2488 }, { "epoch": 0.4, "grad_norm": 3.1772471067814774, "learning_rate": 1.3609410385351598e-05, "loss": 0.9811, "step": 2489 }, { "epoch": 0.4, "grad_norm": 2.7468903038805053, "learning_rate": 1.3604542285327988e-05, "loss": 0.8302, "step": 2490 }, { "epoch": 0.4, "grad_norm": 2.4828569104572207, "learning_rate": 1.3599673203327702e-05, "loss": 0.9332, "step": 2491 }, { "epoch": 0.4, "grad_norm": 3.3677768412798867, "learning_rate": 1.3594803140677208e-05, "loss": 0.8485, "step": 2492 }, { "epoch": 0.4, "grad_norm": 2.135980733170214, "learning_rate": 1.358993209870325e-05, "loss": 0.8847, "step": 2493 }, { "epoch": 0.4, "grad_norm": 2.9650251611989686, "learning_rate": 1.3585060078732827e-05, "loss": 1.0092, "step": 2494 }, { "epoch": 0.4, "grad_norm": 2.772170774211358, "learning_rate": 1.3580187082093217e-05, "loss": 0.8729, "step": 2495 }, { "epoch": 0.4, "grad_norm": 2.938764746274828, "learning_rate": 1.3575313110111958e-05, "loss": 0.8862, "step": 2496 }, { "epoch": 0.4, "grad_norm": 1.6249835673748463, "learning_rate": 1.3570438164116852e-05, "loss": 0.9555, "step": 2497 }, { "epoch": 0.4, "grad_norm": 4.208376868498064, "learning_rate": 1.3565562245435974e-05, "loss": 0.9055, "step": 2498 }, { "epoch": 0.4, "grad_norm": 2.3167209765245462, "learning_rate": 1.3560685355397651e-05, "loss": 0.9436, "step": 2499 }, { "epoch": 0.4, "grad_norm": 2.982998835247246, "learning_rate": 1.355580749533049e-05, "loss": 0.8569, "step": 2500 }, { "epoch": 0.4, "grad_norm": 1.4333975759052204, "learning_rate": 1.3550928666563348e-05, "loss": 0.9398, "step": 2501 }, { "epoch": 0.4, "grad_norm": 2.59018839618557, "learning_rate": 1.3546048870425356e-05, "loss": 1.0158, "step": 2502 }, { "epoch": 0.4, "grad_norm": 3.665272071804706, "learning_rate": 1.3541168108245907e-05, "loss": 0.8346, "step": 2503 }, { "epoch": 0.4, "grad_norm": 3.8441983093331036, "learning_rate": 1.3536286381354651e-05, "loss": 0.9, "step": 2504 }, { "epoch": 0.4, "grad_norm": 3.8296007698289407, "learning_rate": 1.3531403691081505e-05, "loss": 0.9108, "step": 2505 }, { "epoch": 0.4, "grad_norm": 3.3261404791989353, "learning_rate": 1.352652003875665e-05, "loss": 0.9181, "step": 2506 }, { "epoch": 0.4, "grad_norm": 3.732251828650283, "learning_rate": 1.3521635425710531e-05, "loss": 0.8325, "step": 2507 }, { "epoch": 0.4, "grad_norm": 2.6142490573489927, "learning_rate": 1.351674985327384e-05, "loss": 0.866, "step": 2508 }, { "epoch": 0.4, "grad_norm": 3.3603243174026285, "learning_rate": 1.3511863322777557e-05, "loss": 0.7742, "step": 2509 }, { "epoch": 0.4, "grad_norm": 2.1591146486063253, "learning_rate": 1.3506975835552894e-05, "loss": 0.9092, "step": 2510 }, { "epoch": 0.4, "grad_norm": 2.963484421072672, "learning_rate": 1.3502087392931347e-05, "loss": 0.9174, "step": 2511 }, { "epoch": 0.4, "grad_norm": 1.5109480438337914, "learning_rate": 1.3497197996244655e-05, "loss": 0.9315, "step": 2512 }, { "epoch": 0.4, "grad_norm": 1.6679897619706239, "learning_rate": 1.3492307646824832e-05, "loss": 0.3498, "step": 2513 }, { "epoch": 0.41, "grad_norm": 2.885650643641048, "learning_rate": 1.3487416346004139e-05, "loss": 0.8852, "step": 2514 }, { "epoch": 0.41, "grad_norm": 3.6296586980181926, "learning_rate": 1.3482524095115099e-05, "loss": 0.9615, "step": 2515 }, { "epoch": 0.41, "grad_norm": 4.339845068491024, "learning_rate": 1.3477630895490501e-05, "loss": 0.9278, "step": 2516 }, { "epoch": 0.41, "grad_norm": 1.3766241018690177, "learning_rate": 1.3472736748463387e-05, "loss": 0.9488, "step": 2517 }, { "epoch": 0.41, "grad_norm": 3.1766953695254068, "learning_rate": 1.3467841655367058e-05, "loss": 0.9423, "step": 2518 }, { "epoch": 0.41, "grad_norm": 3.0313632589987978, "learning_rate": 1.3462945617535063e-05, "loss": 0.9471, "step": 2519 }, { "epoch": 0.41, "grad_norm": 2.514067932085104, "learning_rate": 1.3458048636301233e-05, "loss": 0.926, "step": 2520 }, { "epoch": 0.41, "grad_norm": 3.152084169725978, "learning_rate": 1.3453150712999628e-05, "loss": 0.9115, "step": 2521 }, { "epoch": 0.41, "grad_norm": 4.1389880129374905, "learning_rate": 1.3448251848964584e-05, "loss": 0.8727, "step": 2522 }, { "epoch": 0.41, "grad_norm": 2.5010676011776254, "learning_rate": 1.3443352045530685e-05, "loss": 0.8489, "step": 2523 }, { "epoch": 0.41, "grad_norm": 3.5264952389949853, "learning_rate": 1.3438451304032769e-05, "loss": 0.8874, "step": 2524 }, { "epoch": 0.41, "grad_norm": 3.139390997172841, "learning_rate": 1.3433549625805941e-05, "loss": 0.8422, "step": 2525 }, { "epoch": 0.41, "grad_norm": 3.195050888730818, "learning_rate": 1.3428647012185545e-05, "loss": 0.9784, "step": 2526 }, { "epoch": 0.41, "grad_norm": 3.443130182610983, "learning_rate": 1.3423743464507192e-05, "loss": 0.9045, "step": 2527 }, { "epoch": 0.41, "grad_norm": 2.365104841848105, "learning_rate": 1.3418838984106746e-05, "loss": 0.8884, "step": 2528 }, { "epoch": 0.41, "grad_norm": 1.1404137089088968, "learning_rate": 1.3413933572320317e-05, "loss": 0.9541, "step": 2529 }, { "epoch": 0.41, "grad_norm": 3.2194910770345335, "learning_rate": 1.3409027230484279e-05, "loss": 0.9311, "step": 2530 }, { "epoch": 0.41, "grad_norm": 2.7255275915523645, "learning_rate": 1.3404119959935254e-05, "loss": 0.8336, "step": 2531 }, { "epoch": 0.41, "grad_norm": 2.195895623247107, "learning_rate": 1.3399211762010117e-05, "loss": 0.9287, "step": 2532 }, { "epoch": 0.41, "grad_norm": 3.0285834155398152, "learning_rate": 1.3394302638045992e-05, "loss": 0.9255, "step": 2533 }, { "epoch": 0.41, "grad_norm": 2.688796506911338, "learning_rate": 1.3389392589380265e-05, "loss": 0.9288, "step": 2534 }, { "epoch": 0.41, "grad_norm": 2.578183999975939, "learning_rate": 1.3384481617350572e-05, "loss": 0.9543, "step": 2535 }, { "epoch": 0.41, "grad_norm": 2.8275485636809785, "learning_rate": 1.337956972329479e-05, "loss": 0.8766, "step": 2536 }, { "epoch": 0.41, "grad_norm": 1.6720295772217943, "learning_rate": 1.3374656908551055e-05, "loss": 0.3227, "step": 2537 }, { "epoch": 0.41, "grad_norm": 4.283493456587387, "learning_rate": 1.3369743174457756e-05, "loss": 0.8919, "step": 2538 }, { "epoch": 0.41, "grad_norm": 1.9147342048229614, "learning_rate": 1.336482852235353e-05, "loss": 0.9364, "step": 2539 }, { "epoch": 0.41, "grad_norm": 1.7663890569525873, "learning_rate": 1.3359912953577261e-05, "loss": 0.9194, "step": 2540 }, { "epoch": 0.41, "grad_norm": 3.0095799393159193, "learning_rate": 1.335499646946809e-05, "loss": 0.9479, "step": 2541 }, { "epoch": 0.41, "grad_norm": 3.1609293177183395, "learning_rate": 1.3350079071365398e-05, "loss": 0.8486, "step": 2542 }, { "epoch": 0.41, "grad_norm": 2.8447642770736308, "learning_rate": 1.334516076060882e-05, "loss": 0.9256, "step": 2543 }, { "epoch": 0.41, "grad_norm": 1.8384787645106926, "learning_rate": 1.334024153853824e-05, "loss": 0.9087, "step": 2544 }, { "epoch": 0.41, "grad_norm": 2.0784021637304027, "learning_rate": 1.3335321406493795e-05, "loss": 0.8908, "step": 2545 }, { "epoch": 0.41, "grad_norm": 1.8508910559192722, "learning_rate": 1.3330400365815856e-05, "loss": 0.9158, "step": 2546 }, { "epoch": 0.41, "grad_norm": 3.7129197779217793, "learning_rate": 1.3325478417845057e-05, "loss": 0.9536, "step": 2547 }, { "epoch": 0.41, "grad_norm": 1.8699178484858792, "learning_rate": 1.332055556392227e-05, "loss": 0.943, "step": 2548 }, { "epoch": 0.41, "grad_norm": 3.0546369231346846, "learning_rate": 1.3315631805388613e-05, "loss": 0.9293, "step": 2549 }, { "epoch": 0.41, "grad_norm": 3.7224318285580087, "learning_rate": 1.3310707143585458e-05, "loss": 0.8825, "step": 2550 }, { "epoch": 0.41, "grad_norm": 2.857897267197678, "learning_rate": 1.3305781579854417e-05, "loss": 0.9782, "step": 2551 }, { "epoch": 0.41, "grad_norm": 2.1777925081576632, "learning_rate": 1.3300855115537347e-05, "loss": 0.9472, "step": 2552 }, { "epoch": 0.41, "grad_norm": 2.4444007026727532, "learning_rate": 1.3295927751976358e-05, "loss": 0.9742, "step": 2553 }, { "epoch": 0.41, "grad_norm": 3.147855567633721, "learning_rate": 1.3290999490513796e-05, "loss": 0.9414, "step": 2554 }, { "epoch": 0.41, "grad_norm": 2.2694110674396515, "learning_rate": 1.3286070332492256e-05, "loss": 1.0008, "step": 2555 }, { "epoch": 0.41, "grad_norm": 2.7704931829561756, "learning_rate": 1.3281140279254576e-05, "loss": 0.9276, "step": 2556 }, { "epoch": 0.41, "grad_norm": 2.023666427799957, "learning_rate": 1.3276209332143836e-05, "loss": 0.8451, "step": 2557 }, { "epoch": 0.41, "grad_norm": 3.2609860996487017, "learning_rate": 1.327127749250337e-05, "loss": 0.9176, "step": 2558 }, { "epoch": 0.41, "grad_norm": 4.301304756753889, "learning_rate": 1.3266344761676735e-05, "loss": 0.8797, "step": 2559 }, { "epoch": 0.41, "grad_norm": 3.3354234500381588, "learning_rate": 1.3261411141007757e-05, "loss": 0.9361, "step": 2560 }, { "epoch": 0.41, "grad_norm": 2.687936082102721, "learning_rate": 1.3256476631840478e-05, "loss": 0.9245, "step": 2561 }, { "epoch": 0.41, "grad_norm": 3.083575539913294, "learning_rate": 1.3251541235519199e-05, "loss": 0.9868, "step": 2562 }, { "epoch": 0.41, "grad_norm": 1.541317135477147, "learning_rate": 1.324660495338846e-05, "loss": 0.345, "step": 2563 }, { "epoch": 0.41, "grad_norm": 3.294792928021061, "learning_rate": 1.324166778679304e-05, "loss": 0.9233, "step": 2564 }, { "epoch": 0.41, "grad_norm": 2.522299873269592, "learning_rate": 1.3236729737077956e-05, "loss": 0.9018, "step": 2565 }, { "epoch": 0.41, "grad_norm": 2.187025727220569, "learning_rate": 1.3231790805588469e-05, "loss": 0.9476, "step": 2566 }, { "epoch": 0.41, "grad_norm": 2.8188032936082363, "learning_rate": 1.3226850993670087e-05, "loss": 0.8769, "step": 2567 }, { "epoch": 0.41, "grad_norm": 2.3204711448816844, "learning_rate": 1.3221910302668538e-05, "loss": 0.955, "step": 2568 }, { "epoch": 0.41, "grad_norm": 4.724493106516866, "learning_rate": 1.3216968733929817e-05, "loss": 0.8395, "step": 2569 }, { "epoch": 0.41, "grad_norm": 3.6083318350585456, "learning_rate": 1.321202628880013e-05, "loss": 0.844, "step": 2570 }, { "epoch": 0.41, "grad_norm": 3.150472205962851, "learning_rate": 1.3207082968625948e-05, "loss": 0.8764, "step": 2571 }, { "epoch": 0.41, "grad_norm": 2.3213988564234818, "learning_rate": 1.3202138774753958e-05, "loss": 0.9176, "step": 2572 }, { "epoch": 0.41, "grad_norm": 3.0051443180205752, "learning_rate": 1.3197193708531099e-05, "loss": 0.884, "step": 2573 }, { "epoch": 0.41, "grad_norm": 2.1445384763724444, "learning_rate": 1.3192247771304543e-05, "loss": 0.9136, "step": 2574 }, { "epoch": 0.41, "grad_norm": 2.216823117273087, "learning_rate": 1.3187300964421702e-05, "loss": 0.8955, "step": 2575 }, { "epoch": 0.42, "grad_norm": 4.018308225469152, "learning_rate": 1.3182353289230216e-05, "loss": 0.9487, "step": 2576 }, { "epoch": 0.42, "grad_norm": 2.653214728789249, "learning_rate": 1.3177404747077973e-05, "loss": 0.8871, "step": 2577 }, { "epoch": 0.42, "grad_norm": 3.0813015266118295, "learning_rate": 1.3172455339313091e-05, "loss": 0.8902, "step": 2578 }, { "epoch": 0.42, "grad_norm": 3.017296678571896, "learning_rate": 1.3167505067283926e-05, "loss": 0.9455, "step": 2579 }, { "epoch": 0.42, "grad_norm": 3.4497773527090354, "learning_rate": 1.3162553932339068e-05, "loss": 0.9765, "step": 2580 }, { "epoch": 0.42, "grad_norm": 4.989940250933446, "learning_rate": 1.315760193582734e-05, "loss": 0.836, "step": 2581 }, { "epoch": 0.42, "grad_norm": 2.3392761255232077, "learning_rate": 1.3152649079097808e-05, "loss": 0.9644, "step": 2582 }, { "epoch": 0.42, "grad_norm": 3.6061651315735332, "learning_rate": 1.3147695363499762e-05, "loss": 0.8544, "step": 2583 }, { "epoch": 0.42, "grad_norm": 2.0224568115074404, "learning_rate": 1.3142740790382733e-05, "loss": 0.8736, "step": 2584 }, { "epoch": 0.42, "grad_norm": 2.149662958385747, "learning_rate": 1.3137785361096485e-05, "loss": 0.8763, "step": 2585 }, { "epoch": 0.42, "grad_norm": 2.760842709082579, "learning_rate": 1.3132829076991007e-05, "loss": 0.9083, "step": 2586 }, { "epoch": 0.42, "grad_norm": 1.6555303053598027, "learning_rate": 1.3127871939416533e-05, "loss": 0.2931, "step": 2587 }, { "epoch": 0.42, "grad_norm": 1.7071284565429374, "learning_rate": 1.3122913949723522e-05, "loss": 0.947, "step": 2588 }, { "epoch": 0.42, "grad_norm": 1.5728137843693144, "learning_rate": 1.3117955109262668e-05, "loss": 0.9351, "step": 2589 }, { "epoch": 0.42, "grad_norm": 1.7571867104395762, "learning_rate": 1.3112995419384894e-05, "loss": 0.9308, "step": 2590 }, { "epoch": 0.42, "grad_norm": 1.7522073856788094, "learning_rate": 1.3108034881441359e-05, "loss": 0.9545, "step": 2591 }, { "epoch": 0.42, "grad_norm": 3.565986934550211, "learning_rate": 1.3103073496783447e-05, "loss": 0.8968, "step": 2592 }, { "epoch": 0.42, "grad_norm": 3.775809543534394, "learning_rate": 1.309811126676278e-05, "loss": 0.8705, "step": 2593 }, { "epoch": 0.42, "grad_norm": 1.8123812801021537, "learning_rate": 1.3093148192731202e-05, "loss": 0.9393, "step": 2594 }, { "epoch": 0.42, "grad_norm": 2.1532470137042723, "learning_rate": 1.3088184276040794e-05, "loss": 0.9195, "step": 2595 }, { "epoch": 0.42, "grad_norm": 2.7151996626245114, "learning_rate": 1.3083219518043866e-05, "loss": 0.9594, "step": 2596 }, { "epoch": 0.42, "grad_norm": 2.787220579092827, "learning_rate": 1.3078253920092949e-05, "loss": 0.8911, "step": 2597 }, { "epoch": 0.42, "grad_norm": 3.629202281428238, "learning_rate": 1.3073287483540811e-05, "loss": 0.9623, "step": 2598 }, { "epoch": 0.42, "grad_norm": 2.896814760900698, "learning_rate": 1.3068320209740448e-05, "loss": 0.9621, "step": 2599 }, { "epoch": 0.42, "grad_norm": 3.206523302115816, "learning_rate": 1.3063352100045079e-05, "loss": 0.9512, "step": 2600 }, { "epoch": 0.42, "grad_norm": 2.5205123528872315, "learning_rate": 1.3058383155808159e-05, "loss": 0.9564, "step": 2601 }, { "epoch": 0.42, "grad_norm": 2.7216789867015603, "learning_rate": 1.3053413378383361e-05, "loss": 0.9766, "step": 2602 }, { "epoch": 0.42, "grad_norm": 2.4438274127377104, "learning_rate": 1.304844276912459e-05, "loss": 0.9063, "step": 2603 }, { "epoch": 0.42, "grad_norm": 1.6476939183031192, "learning_rate": 1.3043471329385979e-05, "loss": 0.3131, "step": 2604 }, { "epoch": 0.42, "grad_norm": 2.7270929645769444, "learning_rate": 1.3038499060521886e-05, "loss": 0.8826, "step": 2605 }, { "epoch": 0.42, "grad_norm": 2.7087145219212543, "learning_rate": 1.3033525963886888e-05, "loss": 1.0121, "step": 2606 }, { "epoch": 0.42, "grad_norm": 2.058917315134117, "learning_rate": 1.3028552040835802e-05, "loss": 0.8664, "step": 2607 }, { "epoch": 0.42, "grad_norm": 1.7539322135379918, "learning_rate": 1.3023577292723655e-05, "loss": 0.9478, "step": 2608 }, { "epoch": 0.42, "grad_norm": 2.442869678583065, "learning_rate": 1.3018601720905708e-05, "loss": 0.9677, "step": 2609 }, { "epoch": 0.42, "grad_norm": 2.081365482459995, "learning_rate": 1.3013625326737444e-05, "loss": 0.884, "step": 2610 }, { "epoch": 0.42, "grad_norm": 2.224777737778187, "learning_rate": 1.300864811157457e-05, "loss": 0.9727, "step": 2611 }, { "epoch": 0.42, "grad_norm": 3.641296979586049, "learning_rate": 1.3003670076773018e-05, "loss": 0.9223, "step": 2612 }, { "epoch": 0.42, "grad_norm": 2.1501316118987526, "learning_rate": 1.2998691223688942e-05, "loss": 0.9204, "step": 2613 }, { "epoch": 0.42, "grad_norm": 2.017180441468031, "learning_rate": 1.2993711553678714e-05, "loss": 0.7825, "step": 2614 }, { "epoch": 0.42, "grad_norm": 3.3564802605832584, "learning_rate": 1.2988731068098938e-05, "loss": 0.8582, "step": 2615 }, { "epoch": 0.42, "grad_norm": 2.321011935563731, "learning_rate": 1.2983749768306434e-05, "loss": 0.8893, "step": 2616 }, { "epoch": 0.42, "grad_norm": 1.963830098656024, "learning_rate": 1.2978767655658245e-05, "loss": 0.9566, "step": 2617 }, { "epoch": 0.42, "grad_norm": 2.927408455998157, "learning_rate": 1.2973784731511638e-05, "loss": 0.946, "step": 2618 }, { "epoch": 0.42, "grad_norm": 2.830398067609991, "learning_rate": 1.2968800997224093e-05, "loss": 0.8852, "step": 2619 }, { "epoch": 0.42, "grad_norm": 3.077881469387606, "learning_rate": 1.2963816454153327e-05, "loss": 0.9285, "step": 2620 }, { "epoch": 0.42, "grad_norm": 2.6772338488429397, "learning_rate": 1.2958831103657255e-05, "loss": 0.8483, "step": 2621 }, { "epoch": 0.42, "grad_norm": 4.76053675438173, "learning_rate": 1.2953844947094032e-05, "loss": 0.9611, "step": 2622 }, { "epoch": 0.42, "grad_norm": 4.380038384744026, "learning_rate": 1.2948857985822023e-05, "loss": 0.8833, "step": 2623 }, { "epoch": 0.42, "grad_norm": 3.888512708241201, "learning_rate": 1.2943870221199814e-05, "loss": 0.9194, "step": 2624 }, { "epoch": 0.42, "grad_norm": 2.5510921084390006, "learning_rate": 1.2938881654586207e-05, "loss": 0.9323, "step": 2625 }, { "epoch": 0.42, "grad_norm": 3.8595499470089814, "learning_rate": 1.2933892287340226e-05, "loss": 0.9125, "step": 2626 }, { "epoch": 0.42, "grad_norm": 3.5998500633805293, "learning_rate": 1.2928902120821111e-05, "loss": 0.8702, "step": 2627 }, { "epoch": 0.42, "grad_norm": 3.1091438859322955, "learning_rate": 1.2923911156388327e-05, "loss": 0.9597, "step": 2628 }, { "epoch": 0.42, "grad_norm": 1.59690104125526, "learning_rate": 1.2918919395401544e-05, "loss": 0.8622, "step": 2629 }, { "epoch": 0.42, "grad_norm": 2.547929510384162, "learning_rate": 1.2913926839220654e-05, "loss": 0.9135, "step": 2630 }, { "epoch": 0.42, "grad_norm": 2.8762728319915576, "learning_rate": 1.2908933489205776e-05, "loss": 0.8837, "step": 2631 }, { "epoch": 0.42, "grad_norm": 3.1285125827667346, "learning_rate": 1.2903939346717226e-05, "loss": 0.9269, "step": 2632 }, { "epoch": 0.42, "grad_norm": 4.8870229893177415, "learning_rate": 1.2898944413115553e-05, "loss": 1.0023, "step": 2633 }, { "epoch": 0.42, "grad_norm": 3.739497035880609, "learning_rate": 1.2893948689761509e-05, "loss": 0.8875, "step": 2634 }, { "epoch": 0.42, "grad_norm": 2.602920773338407, "learning_rate": 1.2888952178016075e-05, "loss": 0.9505, "step": 2635 }, { "epoch": 0.42, "grad_norm": 3.8632470602869704, "learning_rate": 1.2883954879240428e-05, "loss": 0.8987, "step": 2636 }, { "epoch": 0.42, "grad_norm": 3.870216616696829, "learning_rate": 1.2878956794795978e-05, "loss": 0.9167, "step": 2637 }, { "epoch": 0.43, "grad_norm": 3.1433503581368436, "learning_rate": 1.2873957926044336e-05, "loss": 0.9682, "step": 2638 }, { "epoch": 0.43, "grad_norm": 2.2252803925848443, "learning_rate": 1.2868958274347334e-05, "loss": 0.3348, "step": 2639 }, { "epoch": 0.43, "grad_norm": 4.121333718205951, "learning_rate": 1.2863957841067018e-05, "loss": 0.8625, "step": 2640 }, { "epoch": 0.43, "grad_norm": 1.288416521455346, "learning_rate": 1.2858956627565635e-05, "loss": 0.9292, "step": 2641 }, { "epoch": 0.43, "grad_norm": 2.1244557749651354, "learning_rate": 1.2853954635205665e-05, "loss": 0.8954, "step": 2642 }, { "epoch": 0.43, "grad_norm": 2.1365167776886786, "learning_rate": 1.2848951865349778e-05, "loss": 0.8755, "step": 2643 }, { "epoch": 0.43, "grad_norm": 3.0044776561539153, "learning_rate": 1.2843948319360875e-05, "loss": 0.9452, "step": 2644 }, { "epoch": 0.43, "grad_norm": 3.3141187635193368, "learning_rate": 1.2838943998602054e-05, "loss": 0.9048, "step": 2645 }, { "epoch": 0.43, "grad_norm": 2.431807301641998, "learning_rate": 1.2833938904436634e-05, "loss": 0.9232, "step": 2646 }, { "epoch": 0.43, "grad_norm": 2.949192718997289, "learning_rate": 1.2828933038228136e-05, "loss": 0.8806, "step": 2647 }, { "epoch": 0.43, "grad_norm": 3.569627959155501, "learning_rate": 1.2823926401340296e-05, "loss": 0.8856, "step": 2648 }, { "epoch": 0.43, "grad_norm": 3.7559549733104687, "learning_rate": 1.2818918995137066e-05, "loss": 0.976, "step": 2649 }, { "epoch": 0.43, "grad_norm": 1.8861019160827317, "learning_rate": 1.2813910820982596e-05, "loss": 0.9007, "step": 2650 }, { "epoch": 0.43, "grad_norm": 3.7316928313205393, "learning_rate": 1.2808901880241257e-05, "loss": 0.7672, "step": 2651 }, { "epoch": 0.43, "grad_norm": 1.9973931012058264, "learning_rate": 1.2803892174277611e-05, "loss": 0.8379, "step": 2652 }, { "epoch": 0.43, "grad_norm": 3.8391229335203163, "learning_rate": 1.2798881704456452e-05, "loss": 0.8937, "step": 2653 }, { "epoch": 0.43, "grad_norm": 2.7570050059685767, "learning_rate": 1.2793870472142762e-05, "loss": 0.8706, "step": 2654 }, { "epoch": 0.43, "grad_norm": 2.7956270811711526, "learning_rate": 1.2788858478701745e-05, "loss": 0.9001, "step": 2655 }, { "epoch": 0.43, "grad_norm": 2.4663884903875863, "learning_rate": 1.27838457254988e-05, "loss": 0.8921, "step": 2656 }, { "epoch": 0.43, "grad_norm": 3.127518211451992, "learning_rate": 1.277883221389954e-05, "loss": 0.8882, "step": 2657 }, { "epoch": 0.43, "grad_norm": 2.440070435335963, "learning_rate": 1.2773817945269793e-05, "loss": 0.895, "step": 2658 }, { "epoch": 0.43, "grad_norm": 3.326597337732974, "learning_rate": 1.2768802920975574e-05, "loss": 0.935, "step": 2659 }, { "epoch": 0.43, "grad_norm": 3.0442728521744447, "learning_rate": 1.2763787142383115e-05, "loss": 0.8859, "step": 2660 }, { "epoch": 0.43, "grad_norm": 1.2638671818407365, "learning_rate": 1.2758770610858855e-05, "loss": 0.868, "step": 2661 }, { "epoch": 0.43, "grad_norm": 3.219262387870931, "learning_rate": 1.2753753327769437e-05, "loss": 0.9373, "step": 2662 }, { "epoch": 0.43, "grad_norm": 2.6708240784610204, "learning_rate": 1.2748735294481702e-05, "loss": 0.907, "step": 2663 }, { "epoch": 0.43, "grad_norm": 3.4115841597243115, "learning_rate": 1.2743716512362705e-05, "loss": 0.9393, "step": 2664 }, { "epoch": 0.43, "grad_norm": 2.6224668178150075, "learning_rate": 1.2738696982779699e-05, "loss": 0.9896, "step": 2665 }, { "epoch": 0.43, "grad_norm": 3.346804213287302, "learning_rate": 1.273367670710014e-05, "loss": 0.9549, "step": 2666 }, { "epoch": 0.43, "grad_norm": 3.0649013835730234, "learning_rate": 1.2728655686691693e-05, "loss": 0.9627, "step": 2667 }, { "epoch": 0.43, "grad_norm": 4.23527624244907, "learning_rate": 1.2723633922922222e-05, "loss": 0.8813, "step": 2668 }, { "epoch": 0.43, "grad_norm": 4.31396658465054, "learning_rate": 1.2718611417159793e-05, "loss": 0.8385, "step": 2669 }, { "epoch": 0.43, "grad_norm": 2.0456076622498323, "learning_rate": 1.2713588170772674e-05, "loss": 0.9724, "step": 2670 }, { "epoch": 0.43, "grad_norm": 3.189332323186878, "learning_rate": 1.2708564185129339e-05, "loss": 0.8242, "step": 2671 }, { "epoch": 0.43, "grad_norm": 3.8047461534557505, "learning_rate": 1.2703539461598455e-05, "loss": 0.9674, "step": 2672 }, { "epoch": 0.43, "grad_norm": 2.8417878140963797, "learning_rate": 1.2698514001548904e-05, "loss": 0.8815, "step": 2673 }, { "epoch": 0.43, "grad_norm": 2.196500626377891, "learning_rate": 1.2693487806349744e-05, "loss": 0.8767, "step": 2674 }, { "epoch": 0.43, "grad_norm": 1.8353716039014984, "learning_rate": 1.2688460877370269e-05, "loss": 0.9644, "step": 2675 }, { "epoch": 0.43, "grad_norm": 3.230110489930746, "learning_rate": 1.2683433215979937e-05, "loss": 0.9802, "step": 2676 }, { "epoch": 0.43, "grad_norm": 4.202481018422936, "learning_rate": 1.267840482354843e-05, "loss": 0.9141, "step": 2677 }, { "epoch": 0.43, "grad_norm": 1.9286396100339342, "learning_rate": 1.2673375701445618e-05, "loss": 0.8626, "step": 2678 }, { "epoch": 0.43, "grad_norm": 2.9156566687659966, "learning_rate": 1.266834585104157e-05, "loss": 0.9149, "step": 2679 }, { "epoch": 0.43, "grad_norm": 3.7439964557207075, "learning_rate": 1.2663315273706563e-05, "loss": 0.9256, "step": 2680 }, { "epoch": 0.43, "grad_norm": 3.971845846121675, "learning_rate": 1.2658283970811058e-05, "loss": 0.9459, "step": 2681 }, { "epoch": 0.43, "grad_norm": 3.786618795584812, "learning_rate": 1.2653251943725726e-05, "loss": 0.9605, "step": 2682 }, { "epoch": 0.43, "grad_norm": 2.2040339549542027, "learning_rate": 1.2648219193821425e-05, "loss": 0.9902, "step": 2683 }, { "epoch": 0.43, "grad_norm": 2.401480964860009, "learning_rate": 1.264318572246922e-05, "loss": 0.9343, "step": 2684 }, { "epoch": 0.43, "grad_norm": 3.543074533719162, "learning_rate": 1.2638151531040359e-05, "loss": 0.9725, "step": 2685 }, { "epoch": 0.43, "grad_norm": 3.232047897786545, "learning_rate": 1.2633116620906306e-05, "loss": 0.9504, "step": 2686 }, { "epoch": 0.43, "grad_norm": 2.7803651923852275, "learning_rate": 1.2628080993438698e-05, "loss": 0.8974, "step": 2687 }, { "epoch": 0.43, "grad_norm": 2.416466167507523, "learning_rate": 1.2623044650009387e-05, "loss": 0.8938, "step": 2688 }, { "epoch": 0.43, "grad_norm": 3.005243170066354, "learning_rate": 1.261800759199041e-05, "loss": 0.8881, "step": 2689 }, { "epoch": 0.43, "grad_norm": 3.4605583172643546, "learning_rate": 1.2612969820753995e-05, "loss": 0.9231, "step": 2690 }, { "epoch": 0.43, "grad_norm": 4.495222310476477, "learning_rate": 1.2607931337672576e-05, "loss": 0.9219, "step": 2691 }, { "epoch": 0.43, "grad_norm": 2.4856001993754453, "learning_rate": 1.2602892144118772e-05, "loss": 0.9034, "step": 2692 }, { "epoch": 0.43, "grad_norm": 3.093821515114085, "learning_rate": 1.25978522414654e-05, "loss": 0.9383, "step": 2693 }, { "epoch": 0.43, "grad_norm": 3.150884165124718, "learning_rate": 1.2592811631085464e-05, "loss": 0.9211, "step": 2694 }, { "epoch": 0.43, "grad_norm": 2.2297572723143477, "learning_rate": 1.2587770314352175e-05, "loss": 0.9183, "step": 2695 }, { "epoch": 0.43, "grad_norm": 2.592131143961022, "learning_rate": 1.2582728292638913e-05, "loss": 0.9391, "step": 2696 }, { "epoch": 0.43, "grad_norm": 2.9308692278131403, "learning_rate": 1.2577685567319275e-05, "loss": 0.9494, "step": 2697 }, { "epoch": 0.43, "grad_norm": 2.934040061027101, "learning_rate": 1.2572642139767033e-05, "loss": 0.9511, "step": 2698 }, { "epoch": 0.43, "grad_norm": 3.146717444134089, "learning_rate": 1.2567598011356155e-05, "loss": 0.9484, "step": 2699 }, { "epoch": 0.44, "grad_norm": 1.9125593238555623, "learning_rate": 1.2562553183460806e-05, "loss": 1.0073, "step": 2700 }, { "epoch": 0.44, "grad_norm": 4.686572580781358, "learning_rate": 1.2557507657455327e-05, "loss": 0.9375, "step": 2701 }, { "epoch": 0.44, "grad_norm": 1.7641479255258634, "learning_rate": 1.2552461434714272e-05, "loss": 0.943, "step": 2702 }, { "epoch": 0.44, "grad_norm": 3.2594252146348386, "learning_rate": 1.2547414516612357e-05, "loss": 0.9573, "step": 2703 }, { "epoch": 0.44, "grad_norm": 1.9597440332535274, "learning_rate": 1.254236690452451e-05, "loss": 0.9838, "step": 2704 }, { "epoch": 0.44, "grad_norm": 2.90288028404229, "learning_rate": 1.2537318599825836e-05, "loss": 0.95, "step": 2705 }, { "epoch": 0.44, "grad_norm": 2.2826127462963233, "learning_rate": 1.2532269603891639e-05, "loss": 0.9785, "step": 2706 }, { "epoch": 0.44, "grad_norm": 2.9154918120976934, "learning_rate": 1.2527219918097392e-05, "loss": 0.8949, "step": 2707 }, { "epoch": 0.44, "grad_norm": 1.7956056838518593, "learning_rate": 1.2522169543818781e-05, "loss": 0.9469, "step": 2708 }, { "epoch": 0.44, "grad_norm": 2.4656002377039, "learning_rate": 1.251711848243166e-05, "loss": 0.9295, "step": 2709 }, { "epoch": 0.44, "grad_norm": 3.011425988678873, "learning_rate": 1.251206673531208e-05, "loss": 0.9385, "step": 2710 }, { "epoch": 0.44, "grad_norm": 3.9600624457165585, "learning_rate": 1.2507014303836277e-05, "loss": 0.8915, "step": 2711 }, { "epoch": 0.44, "grad_norm": 3.0899206024141836, "learning_rate": 1.250196118938067e-05, "loss": 0.9281, "step": 2712 }, { "epoch": 0.44, "grad_norm": 3.8093543911909205, "learning_rate": 1.2496907393321868e-05, "loss": 0.9115, "step": 2713 }, { "epoch": 0.44, "grad_norm": 2.0955929971646747, "learning_rate": 1.2491852917036666e-05, "loss": 0.9518, "step": 2714 }, { "epoch": 0.44, "grad_norm": 3.7296217031632115, "learning_rate": 1.2486797761902039e-05, "loss": 0.8427, "step": 2715 }, { "epoch": 0.44, "grad_norm": 3.0702141898919635, "learning_rate": 1.2481741929295154e-05, "loss": 0.9806, "step": 2716 }, { "epoch": 0.44, "grad_norm": 2.164056660888727, "learning_rate": 1.2476685420593357e-05, "loss": 0.9531, "step": 2717 }, { "epoch": 0.44, "grad_norm": 2.9700012782444114, "learning_rate": 1.2471628237174183e-05, "loss": 0.8743, "step": 2718 }, { "epoch": 0.44, "grad_norm": 2.6106367886573687, "learning_rate": 1.2466570380415346e-05, "loss": 0.9167, "step": 2719 }, { "epoch": 0.44, "grad_norm": 2.135501940505518, "learning_rate": 1.2461511851694743e-05, "loss": 0.9169, "step": 2720 }, { "epoch": 0.44, "grad_norm": 2.828595707116237, "learning_rate": 1.2456452652390463e-05, "loss": 0.9877, "step": 2721 }, { "epoch": 0.44, "grad_norm": 1.9020632433239448, "learning_rate": 1.2451392783880767e-05, "loss": 0.8888, "step": 2722 }, { "epoch": 0.44, "grad_norm": 3.070042411436692, "learning_rate": 1.2446332247544102e-05, "loss": 0.9028, "step": 2723 }, { "epoch": 0.44, "grad_norm": 3.4123168952081158, "learning_rate": 1.2441271044759103e-05, "loss": 0.9592, "step": 2724 }, { "epoch": 0.44, "grad_norm": 2.7964388544582257, "learning_rate": 1.2436209176904575e-05, "loss": 0.3326, "step": 2725 }, { "epoch": 0.44, "grad_norm": 2.6436910491936128, "learning_rate": 1.2431146645359511e-05, "loss": 0.9156, "step": 2726 }, { "epoch": 0.44, "grad_norm": 3.3264392290617777, "learning_rate": 1.2426083451503086e-05, "loss": 0.8664, "step": 2727 }, { "epoch": 0.44, "grad_norm": 3.5324533488330925, "learning_rate": 1.2421019596714656e-05, "loss": 0.9054, "step": 2728 }, { "epoch": 0.44, "grad_norm": 2.0292785467471157, "learning_rate": 1.2415955082373752e-05, "loss": 0.9157, "step": 2729 }, { "epoch": 0.44, "grad_norm": 2.5598072697689207, "learning_rate": 1.2410889909860086e-05, "loss": 0.8918, "step": 2730 }, { "epoch": 0.44, "grad_norm": 2.2619260913636743, "learning_rate": 1.2405824080553553e-05, "loss": 0.8407, "step": 2731 }, { "epoch": 0.44, "grad_norm": 3.38333273889195, "learning_rate": 1.2400757595834221e-05, "loss": 0.8914, "step": 2732 }, { "epoch": 0.44, "grad_norm": 3.8019291397948223, "learning_rate": 1.2395690457082348e-05, "loss": 0.8828, "step": 2733 }, { "epoch": 0.44, "grad_norm": 2.9320698321614693, "learning_rate": 1.239062266567835e-05, "loss": 0.8287, "step": 2734 }, { "epoch": 0.44, "grad_norm": 3.3800858219243812, "learning_rate": 1.2385554223002844e-05, "loss": 0.9734, "step": 2735 }, { "epoch": 0.44, "grad_norm": 2.7809361484334403, "learning_rate": 1.238048513043661e-05, "loss": 0.9037, "step": 2736 }, { "epoch": 0.44, "grad_norm": 3.768661983847654, "learning_rate": 1.2375415389360605e-05, "loss": 0.9211, "step": 2737 }, { "epoch": 0.44, "grad_norm": 2.338755970900889, "learning_rate": 1.2370345001155972e-05, "loss": 0.92, "step": 2738 }, { "epoch": 0.44, "grad_norm": 2.4245823648016733, "learning_rate": 1.2365273967204018e-05, "loss": 0.9257, "step": 2739 }, { "epoch": 0.44, "grad_norm": 3.286477130689029, "learning_rate": 1.2360202288886243e-05, "loss": 0.8833, "step": 2740 }, { "epoch": 0.44, "grad_norm": 2.8481414758282297, "learning_rate": 1.23551299675843e-05, "loss": 0.8642, "step": 2741 }, { "epoch": 0.44, "grad_norm": 2.709626422202956, "learning_rate": 1.2350057004680036e-05, "loss": 0.9543, "step": 2742 }, { "epoch": 0.44, "grad_norm": 3.0115157676206725, "learning_rate": 1.2344983401555464e-05, "loss": 0.9861, "step": 2743 }, { "epoch": 0.44, "grad_norm": 2.7042175907302903, "learning_rate": 1.2339909159592774e-05, "loss": 0.9383, "step": 2744 }, { "epoch": 0.44, "grad_norm": 3.740901080836868, "learning_rate": 1.2334834280174325e-05, "loss": 0.876, "step": 2745 }, { "epoch": 0.44, "grad_norm": 3.12135926837187, "learning_rate": 1.2329758764682663e-05, "loss": 0.9193, "step": 2746 }, { "epoch": 0.44, "grad_norm": 2.3586455979765693, "learning_rate": 1.2324682614500492e-05, "loss": 0.9008, "step": 2747 }, { "epoch": 0.44, "grad_norm": 3.529975053304644, "learning_rate": 1.2319605831010694e-05, "loss": 0.9207, "step": 2748 }, { "epoch": 0.44, "grad_norm": 3.508649220746931, "learning_rate": 1.231452841559633e-05, "loss": 0.9361, "step": 2749 }, { "epoch": 0.44, "grad_norm": 2.970439711108701, "learning_rate": 1.2309450369640622e-05, "loss": 0.9104, "step": 2750 }, { "epoch": 0.44, "grad_norm": 2.4528430363618976, "learning_rate": 1.230437169452698e-05, "loss": 0.9601, "step": 2751 }, { "epoch": 0.44, "grad_norm": 2.011141855427555, "learning_rate": 1.2299292391638962e-05, "loss": 0.8699, "step": 2752 }, { "epoch": 0.44, "grad_norm": 1.8275471613652436, "learning_rate": 1.2294212462360318e-05, "loss": 0.8842, "step": 2753 }, { "epoch": 0.44, "grad_norm": 2.424245351189956, "learning_rate": 1.2289131908074958e-05, "loss": 0.8786, "step": 2754 }, { "epoch": 0.44, "grad_norm": 1.9119494549562484, "learning_rate": 1.2284050730166968e-05, "loss": 0.8669, "step": 2755 }, { "epoch": 0.44, "grad_norm": 3.3353365283101613, "learning_rate": 1.2278968930020597e-05, "loss": 0.9247, "step": 2756 }, { "epoch": 0.44, "grad_norm": 3.4826064475414147, "learning_rate": 1.227388650902027e-05, "loss": 0.9035, "step": 2757 }, { "epoch": 0.44, "grad_norm": 3.8391516247015733, "learning_rate": 1.2268803468550576e-05, "loss": 0.9467, "step": 2758 }, { "epoch": 0.44, "grad_norm": 2.393623914460702, "learning_rate": 1.226371980999628e-05, "loss": 0.8961, "step": 2759 }, { "epoch": 0.44, "grad_norm": 1.908601404987463, "learning_rate": 1.2258635534742307e-05, "loss": 0.9342, "step": 2760 }, { "epoch": 0.44, "grad_norm": 2.518269067479074, "learning_rate": 1.2253550644173753e-05, "loss": 0.9299, "step": 2761 }, { "epoch": 0.45, "grad_norm": 3.0724198968935137, "learning_rate": 1.2248465139675886e-05, "loss": 0.8663, "step": 2762 }, { "epoch": 0.45, "grad_norm": 4.233836234751565, "learning_rate": 1.2243379022634136e-05, "loss": 0.929, "step": 2763 }, { "epoch": 0.45, "grad_norm": 3.496433348118753, "learning_rate": 1.22382922944341e-05, "loss": 0.8639, "step": 2764 }, { "epoch": 0.45, "grad_norm": 3.980171769044169, "learning_rate": 1.2233204956461545e-05, "loss": 0.8857, "step": 2765 }, { "epoch": 0.45, "grad_norm": 2.00604276923761, "learning_rate": 1.22281170101024e-05, "loss": 0.881, "step": 2766 }, { "epoch": 0.45, "grad_norm": 1.7149969566918777, "learning_rate": 1.2223028456742762e-05, "loss": 0.8724, "step": 2767 }, { "epoch": 0.45, "grad_norm": 2.408607319049062, "learning_rate": 1.2217939297768898e-05, "loss": 0.3107, "step": 2768 }, { "epoch": 0.45, "grad_norm": 3.011675368020077, "learning_rate": 1.221284953456723e-05, "loss": 0.8122, "step": 2769 }, { "epoch": 0.45, "grad_norm": 1.990347517426935, "learning_rate": 1.2207759168524352e-05, "loss": 0.3535, "step": 2770 }, { "epoch": 0.45, "grad_norm": 2.699110545130148, "learning_rate": 1.2202668201027016e-05, "loss": 0.8992, "step": 2771 }, { "epoch": 0.45, "grad_norm": 2.9267850205713137, "learning_rate": 1.2197576633462147e-05, "loss": 0.8577, "step": 2772 }, { "epoch": 0.45, "grad_norm": 2.0570241891034935, "learning_rate": 1.2192484467216828e-05, "loss": 0.9555, "step": 2773 }, { "epoch": 0.45, "grad_norm": 3.1090897472405783, "learning_rate": 1.2187391703678301e-05, "loss": 0.9126, "step": 2774 }, { "epoch": 0.45, "grad_norm": 1.3186547520667855, "learning_rate": 1.2182298344233981e-05, "loss": 0.3509, "step": 2775 }, { "epoch": 0.45, "grad_norm": 4.045031506919047, "learning_rate": 1.2177204390271434e-05, "loss": 0.9108, "step": 2776 }, { "epoch": 0.45, "grad_norm": 2.683943857512586, "learning_rate": 1.2172109843178396e-05, "loss": 0.9175, "step": 2777 }, { "epoch": 0.45, "grad_norm": 3.020935916255032, "learning_rate": 1.2167014704342763e-05, "loss": 0.9089, "step": 2778 }, { "epoch": 0.45, "grad_norm": 1.529736452030512, "learning_rate": 1.2161918975152593e-05, "loss": 0.8548, "step": 2779 }, { "epoch": 0.45, "grad_norm": 2.2470361474070475, "learning_rate": 1.2156822656996098e-05, "loss": 0.8563, "step": 2780 }, { "epoch": 0.45, "grad_norm": 2.04186958809688, "learning_rate": 1.2151725751261659e-05, "loss": 0.8914, "step": 2781 }, { "epoch": 0.45, "grad_norm": 3.7322522119340125, "learning_rate": 1.2146628259337814e-05, "loss": 0.9232, "step": 2782 }, { "epoch": 0.45, "grad_norm": 3.469814790075862, "learning_rate": 1.2141530182613255e-05, "loss": 0.8484, "step": 2783 }, { "epoch": 0.45, "grad_norm": 2.6875290979983624, "learning_rate": 1.2136431522476847e-05, "loss": 0.9115, "step": 2784 }, { "epoch": 0.45, "grad_norm": 1.887106908829376, "learning_rate": 1.2131332280317598e-05, "loss": 0.9355, "step": 2785 }, { "epoch": 0.45, "grad_norm": 2.364795647274652, "learning_rate": 1.2126232457524686e-05, "loss": 0.99, "step": 2786 }, { "epoch": 0.45, "grad_norm": 2.112622622029105, "learning_rate": 1.2121132055487442e-05, "loss": 0.3602, "step": 2787 }, { "epoch": 0.45, "grad_norm": 2.3780692496262734, "learning_rate": 1.2116031075595356e-05, "loss": 0.9306, "step": 2788 }, { "epoch": 0.45, "grad_norm": 2.8008226208045466, "learning_rate": 1.2110929519238077e-05, "loss": 0.874, "step": 2789 }, { "epoch": 0.45, "grad_norm": 2.7962336310980596, "learning_rate": 1.2105827387805413e-05, "loss": 0.8763, "step": 2790 }, { "epoch": 0.45, "grad_norm": 2.2160574465442626, "learning_rate": 1.2100724682687317e-05, "loss": 0.3208, "step": 2791 }, { "epoch": 0.45, "grad_norm": 4.370453352119407, "learning_rate": 1.2095621405273912e-05, "loss": 0.8469, "step": 2792 }, { "epoch": 0.45, "grad_norm": 2.5816818208621144, "learning_rate": 1.209051755695547e-05, "loss": 0.9492, "step": 2793 }, { "epoch": 0.45, "grad_norm": 2.592736931093041, "learning_rate": 1.2085413139122417e-05, "loss": 0.8542, "step": 2794 }, { "epoch": 0.45, "grad_norm": 4.307518113198391, "learning_rate": 1.2080308153165345e-05, "loss": 0.9295, "step": 2795 }, { "epoch": 0.45, "grad_norm": 3.389530690883822, "learning_rate": 1.2075202600474985e-05, "loss": 0.8923, "step": 2796 }, { "epoch": 0.45, "grad_norm": 3.353102821354963, "learning_rate": 1.2070096482442235e-05, "loss": 0.9599, "step": 2797 }, { "epoch": 0.45, "grad_norm": 1.9009873961623318, "learning_rate": 1.2064989800458138e-05, "loss": 0.9056, "step": 2798 }, { "epoch": 0.45, "grad_norm": 2.953598065304768, "learning_rate": 1.20598825559139e-05, "loss": 0.9396, "step": 2799 }, { "epoch": 0.45, "grad_norm": 2.082284650716051, "learning_rate": 1.205477475020087e-05, "loss": 0.3178, "step": 2800 }, { "epoch": 0.45, "grad_norm": 3.371764433482485, "learning_rate": 1.2049666384710563e-05, "loss": 0.9038, "step": 2801 }, { "epoch": 0.45, "grad_norm": 3.6674900575384735, "learning_rate": 1.204455746083463e-05, "loss": 0.891, "step": 2802 }, { "epoch": 0.45, "grad_norm": 2.388385596474382, "learning_rate": 1.2039447979964887e-05, "loss": 0.8716, "step": 2803 }, { "epoch": 0.45, "grad_norm": 2.96793137356368, "learning_rate": 1.2034337943493297e-05, "loss": 0.881, "step": 2804 }, { "epoch": 0.45, "grad_norm": 2.3717485557496265, "learning_rate": 1.202922735281197e-05, "loss": 0.9091, "step": 2805 }, { "epoch": 0.45, "grad_norm": 2.6648302510148056, "learning_rate": 1.202411620931318e-05, "loss": 0.9016, "step": 2806 }, { "epoch": 0.45, "grad_norm": 3.533914961417268, "learning_rate": 1.2019004514389338e-05, "loss": 0.9772, "step": 2807 }, { "epoch": 0.45, "grad_norm": 1.5837957643853302, "learning_rate": 1.2013892269433017e-05, "loss": 0.8669, "step": 2808 }, { "epoch": 0.45, "grad_norm": 2.223404528761432, "learning_rate": 1.2008779475836923e-05, "loss": 0.9501, "step": 2809 }, { "epoch": 0.45, "grad_norm": 2.2093581095134844, "learning_rate": 1.2003666134993928e-05, "loss": 0.328, "step": 2810 }, { "epoch": 0.45, "grad_norm": 4.136002173240578, "learning_rate": 1.199855224829705e-05, "loss": 0.9479, "step": 2811 }, { "epoch": 0.45, "grad_norm": 1.9810453098590035, "learning_rate": 1.1993437817139446e-05, "loss": 0.8942, "step": 2812 }, { "epoch": 0.45, "grad_norm": 2.03990103481567, "learning_rate": 1.1988322842914431e-05, "loss": 0.3025, "step": 2813 }, { "epoch": 0.45, "grad_norm": 2.828454435772169, "learning_rate": 1.1983207327015465e-05, "loss": 0.9148, "step": 2814 }, { "epoch": 0.45, "grad_norm": 2.2243408898495285, "learning_rate": 1.197809127083616e-05, "loss": 0.3428, "step": 2815 }, { "epoch": 0.45, "grad_norm": 3.073980804502558, "learning_rate": 1.1972974675770259e-05, "loss": 0.8834, "step": 2816 }, { "epoch": 0.45, "grad_norm": 3.1985770339979984, "learning_rate": 1.1967857543211679e-05, "loss": 0.8917, "step": 2817 }, { "epoch": 0.45, "grad_norm": 3.007134537026785, "learning_rate": 1.1962739874554452e-05, "loss": 0.8782, "step": 2818 }, { "epoch": 0.45, "grad_norm": 3.5565570986983, "learning_rate": 1.1957621671192785e-05, "loss": 0.8163, "step": 2819 }, { "epoch": 0.45, "grad_norm": 3.8073414156995016, "learning_rate": 1.195250293452101e-05, "loss": 0.8883, "step": 2820 }, { "epoch": 0.45, "grad_norm": 2.1920654610932067, "learning_rate": 1.1947383665933619e-05, "loss": 0.9422, "step": 2821 }, { "epoch": 0.45, "grad_norm": 2.7249670744140424, "learning_rate": 1.1942263866825234e-05, "loss": 0.8848, "step": 2822 }, { "epoch": 0.45, "grad_norm": 3.285576864887353, "learning_rate": 1.1937143538590635e-05, "loss": 1.0254, "step": 2823 }, { "epoch": 0.46, "grad_norm": 3.4226588914656837, "learning_rate": 1.1932022682624735e-05, "loss": 0.96, "step": 2824 }, { "epoch": 0.46, "grad_norm": 3.2436765629388047, "learning_rate": 1.1926901300322601e-05, "loss": 0.9613, "step": 2825 }, { "epoch": 0.46, "grad_norm": 3.8803502955058735, "learning_rate": 1.1921779393079438e-05, "loss": 0.9146, "step": 2826 }, { "epoch": 0.46, "grad_norm": 3.649017875955859, "learning_rate": 1.1916656962290594e-05, "loss": 0.8511, "step": 2827 }, { "epoch": 0.46, "grad_norm": 2.381527513568147, "learning_rate": 1.1911534009351561e-05, "loss": 0.8761, "step": 2828 }, { "epoch": 0.46, "grad_norm": 2.2978174261369513, "learning_rate": 1.190641053565797e-05, "loss": 0.8631, "step": 2829 }, { "epoch": 0.46, "grad_norm": 2.663210735467396, "learning_rate": 1.1901286542605604e-05, "loss": 0.9817, "step": 2830 }, { "epoch": 0.46, "grad_norm": 3.912363380264962, "learning_rate": 1.1896162031590367e-05, "loss": 0.9102, "step": 2831 }, { "epoch": 0.46, "grad_norm": 3.5758294483792827, "learning_rate": 1.189103700400833e-05, "loss": 0.9432, "step": 2832 }, { "epoch": 0.46, "grad_norm": 2.203050760952746, "learning_rate": 1.1885911461255685e-05, "loss": 0.9167, "step": 2833 }, { "epoch": 0.46, "grad_norm": 1.272460562763759, "learning_rate": 1.1880785404728773e-05, "loss": 0.3428, "step": 2834 }, { "epoch": 0.46, "grad_norm": 2.8813873324485075, "learning_rate": 1.1875658835824071e-05, "loss": 0.9145, "step": 2835 }, { "epoch": 0.46, "grad_norm": 3.440625521099387, "learning_rate": 1.1870531755938202e-05, "loss": 0.96, "step": 2836 }, { "epoch": 0.46, "grad_norm": 2.3911574026426297, "learning_rate": 1.1865404166467918e-05, "loss": 0.2986, "step": 2837 }, { "epoch": 0.46, "grad_norm": 4.042839247246562, "learning_rate": 1.1860276068810119e-05, "loss": 0.8879, "step": 2838 }, { "epoch": 0.46, "grad_norm": 1.5435695589071503, "learning_rate": 1.1855147464361845e-05, "loss": 0.903, "step": 2839 }, { "epoch": 0.46, "grad_norm": 2.683465724151157, "learning_rate": 1.1850018354520256e-05, "loss": 0.883, "step": 2840 }, { "epoch": 0.46, "grad_norm": 2.6243161718575463, "learning_rate": 1.1844888740682678e-05, "loss": 0.9228, "step": 2841 }, { "epoch": 0.46, "grad_norm": 3.463863745989621, "learning_rate": 1.1839758624246551e-05, "loss": 0.9612, "step": 2842 }, { "epoch": 0.46, "grad_norm": 3.7305155326745885, "learning_rate": 1.183462800660946e-05, "loss": 0.9417, "step": 2843 }, { "epoch": 0.46, "grad_norm": 2.3426408304813666, "learning_rate": 1.182949688916913e-05, "loss": 0.3502, "step": 2844 }, { "epoch": 0.46, "grad_norm": 2.2731476169622105, "learning_rate": 1.1824365273323414e-05, "loss": 0.9419, "step": 2845 }, { "epoch": 0.46, "grad_norm": 1.8862989760323232, "learning_rate": 1.1819233160470311e-05, "loss": 0.9733, "step": 2846 }, { "epoch": 0.46, "grad_norm": 3.268981376905306, "learning_rate": 1.1814100552007947e-05, "loss": 0.9327, "step": 2847 }, { "epoch": 0.46, "grad_norm": 2.912785718271079, "learning_rate": 1.1808967449334588e-05, "loss": 0.9682, "step": 2848 }, { "epoch": 0.46, "grad_norm": 3.1866863932723883, "learning_rate": 1.1803833853848627e-05, "loss": 0.9241, "step": 2849 }, { "epoch": 0.46, "grad_norm": 2.903722012237574, "learning_rate": 1.1798699766948606e-05, "loss": 0.9532, "step": 2850 }, { "epoch": 0.46, "grad_norm": 3.0924624451413107, "learning_rate": 1.1793565190033188e-05, "loss": 0.959, "step": 2851 }, { "epoch": 0.46, "grad_norm": 2.2539856895053423, "learning_rate": 1.1788430124501167e-05, "loss": 0.863, "step": 2852 }, { "epoch": 0.46, "grad_norm": 1.4895084157746314, "learning_rate": 1.1783294571751484e-05, "loss": 0.944, "step": 2853 }, { "epoch": 0.46, "grad_norm": 3.1080834855069224, "learning_rate": 1.1778158533183203e-05, "loss": 0.8587, "step": 2854 }, { "epoch": 0.46, "grad_norm": 2.2593224568146946, "learning_rate": 1.1773022010195525e-05, "loss": 0.3406, "step": 2855 }, { "epoch": 0.46, "grad_norm": 3.1773352017649947, "learning_rate": 1.1767885004187773e-05, "loss": 0.9756, "step": 2856 }, { "epoch": 0.46, "grad_norm": 2.5211583287940598, "learning_rate": 1.1762747516559418e-05, "loss": 0.9111, "step": 2857 }, { "epoch": 0.46, "grad_norm": 4.00502556762916, "learning_rate": 1.1757609548710048e-05, "loss": 0.8542, "step": 2858 }, { "epoch": 0.46, "grad_norm": 2.574962641620924, "learning_rate": 1.1752471102039385e-05, "loss": 0.8472, "step": 2859 }, { "epoch": 0.46, "grad_norm": 2.9498485978924887, "learning_rate": 1.1747332177947288e-05, "loss": 0.8784, "step": 2860 }, { "epoch": 0.46, "grad_norm": 4.490076567542859, "learning_rate": 1.1742192777833746e-05, "loss": 0.8849, "step": 2861 }, { "epoch": 0.46, "grad_norm": 1.9926653718945222, "learning_rate": 1.173705290309886e-05, "loss": 0.9685, "step": 2862 }, { "epoch": 0.46, "grad_norm": 1.7761988714753405, "learning_rate": 1.1731912555142883e-05, "loss": 0.8926, "step": 2863 }, { "epoch": 0.46, "grad_norm": 1.9987211907405769, "learning_rate": 1.1726771735366186e-05, "loss": 0.3336, "step": 2864 }, { "epoch": 0.46, "grad_norm": 3.664324069904602, "learning_rate": 1.172163044516927e-05, "loss": 0.9159, "step": 2865 }, { "epoch": 0.46, "grad_norm": 2.00343230891087, "learning_rate": 1.1716488685952765e-05, "loss": 0.8466, "step": 2866 }, { "epoch": 0.46, "grad_norm": 2.3749520146391125, "learning_rate": 1.1711346459117423e-05, "loss": 0.9051, "step": 2867 }, { "epoch": 0.46, "grad_norm": 2.804344645081488, "learning_rate": 1.1706203766064137e-05, "loss": 0.9146, "step": 2868 }, { "epoch": 0.46, "grad_norm": 1.311790456169436, "learning_rate": 1.1701060608193911e-05, "loss": 0.869, "step": 2869 }, { "epoch": 0.46, "grad_norm": 3.598914374797492, "learning_rate": 1.1695916986907882e-05, "loss": 0.9176, "step": 2870 }, { "epoch": 0.46, "grad_norm": 1.7925104623486565, "learning_rate": 1.1690772903607321e-05, "loss": 0.9531, "step": 2871 }, { "epoch": 0.46, "grad_norm": 2.583641485067976, "learning_rate": 1.1685628359693617e-05, "loss": 0.8839, "step": 2872 }, { "epoch": 0.46, "grad_norm": 1.3840737060175914, "learning_rate": 1.168048335656828e-05, "loss": 0.927, "step": 2873 }, { "epoch": 0.46, "grad_norm": 2.7399454107206678, "learning_rate": 1.1675337895632953e-05, "loss": 0.9052, "step": 2874 }, { "epoch": 0.46, "grad_norm": 2.532756121865163, "learning_rate": 1.1670191978289402e-05, "loss": 0.8606, "step": 2875 }, { "epoch": 0.46, "grad_norm": 2.180913764812433, "learning_rate": 1.166504560593952e-05, "loss": 0.9235, "step": 2876 }, { "epoch": 0.46, "grad_norm": 2.302404896183388, "learning_rate": 1.1659898779985317e-05, "loss": 0.9025, "step": 2877 }, { "epoch": 0.46, "grad_norm": 3.0349540495811214, "learning_rate": 1.1654751501828927e-05, "loss": 0.9091, "step": 2878 }, { "epoch": 0.46, "grad_norm": 2.02094884588178, "learning_rate": 1.164960377287262e-05, "loss": 0.8263, "step": 2879 }, { "epoch": 0.46, "grad_norm": 1.8479923644217748, "learning_rate": 1.164445559451877e-05, "loss": 0.8748, "step": 2880 }, { "epoch": 0.46, "grad_norm": 2.2686966692748847, "learning_rate": 1.1639306968169887e-05, "loss": 0.957, "step": 2881 }, { "epoch": 0.46, "grad_norm": 2.6183342542187362, "learning_rate": 1.1634157895228599e-05, "loss": 0.9785, "step": 2882 }, { "epoch": 0.46, "grad_norm": 3.9059771633232616, "learning_rate": 1.1629008377097655e-05, "loss": 0.9369, "step": 2883 }, { "epoch": 0.46, "grad_norm": 3.875360164516857, "learning_rate": 1.1623858415179925e-05, "loss": 0.9953, "step": 2884 }, { "epoch": 0.46, "grad_norm": 3.998241991910808, "learning_rate": 1.16187080108784e-05, "loss": 0.9168, "step": 2885 }, { "epoch": 0.46, "grad_norm": 3.8134959436527343, "learning_rate": 1.1613557165596192e-05, "loss": 0.8944, "step": 2886 }, { "epoch": 0.47, "grad_norm": 1.570144416581519, "learning_rate": 1.1608405880736535e-05, "loss": 0.9097, "step": 2887 }, { "epoch": 0.47, "grad_norm": 3.1346881987614954, "learning_rate": 1.1603254157702782e-05, "loss": 0.8089, "step": 2888 }, { "epoch": 0.47, "grad_norm": 2.2518509562889815, "learning_rate": 1.1598101997898398e-05, "loss": 0.3281, "step": 2889 }, { "epoch": 0.47, "grad_norm": 2.7430229109955646, "learning_rate": 1.1592949402726983e-05, "loss": 0.9615, "step": 2890 }, { "epoch": 0.47, "grad_norm": 2.6190628848105737, "learning_rate": 1.1587796373592237e-05, "loss": 0.8373, "step": 2891 }, { "epoch": 0.47, "grad_norm": 2.096194534068392, "learning_rate": 1.1582642911897991e-05, "loss": 0.8211, "step": 2892 }, { "epoch": 0.47, "grad_norm": 1.3867048235986446, "learning_rate": 1.1577489019048191e-05, "loss": 0.9745, "step": 2893 }, { "epoch": 0.47, "grad_norm": 3.3188448904951144, "learning_rate": 1.15723346964469e-05, "loss": 0.8765, "step": 2894 }, { "epoch": 0.47, "grad_norm": 2.335727643949308, "learning_rate": 1.1567179945498297e-05, "loss": 0.9249, "step": 2895 }, { "epoch": 0.47, "grad_norm": 2.323663109790818, "learning_rate": 1.1562024767606674e-05, "loss": 0.3441, "step": 2896 }, { "epoch": 0.47, "grad_norm": 2.7702623701643025, "learning_rate": 1.1556869164176447e-05, "loss": 0.953, "step": 2897 }, { "epoch": 0.47, "grad_norm": 2.8110735348772984, "learning_rate": 1.1551713136612146e-05, "loss": 0.9554, "step": 2898 }, { "epoch": 0.47, "grad_norm": 2.1732929458649015, "learning_rate": 1.1546556686318414e-05, "loss": 0.8913, "step": 2899 }, { "epoch": 0.47, "grad_norm": 3.3145584332489713, "learning_rate": 1.1541399814700006e-05, "loss": 0.9466, "step": 2900 }, { "epoch": 0.47, "grad_norm": 2.489772284026423, "learning_rate": 1.1536242523161802e-05, "loss": 0.9174, "step": 2901 }, { "epoch": 0.47, "grad_norm": 3.182465560498999, "learning_rate": 1.1531084813108788e-05, "loss": 0.8976, "step": 2902 }, { "epoch": 0.47, "grad_norm": 2.078667132571896, "learning_rate": 1.1525926685946067e-05, "loss": 0.2941, "step": 2903 }, { "epoch": 0.47, "grad_norm": 2.069650911390501, "learning_rate": 1.1520768143078853e-05, "loss": 0.8642, "step": 2904 }, { "epoch": 0.47, "grad_norm": 3.4441591438625414, "learning_rate": 1.1515609185912475e-05, "loss": 0.9046, "step": 2905 }, { "epoch": 0.47, "grad_norm": 3.8103527838912132, "learning_rate": 1.151044981585238e-05, "loss": 0.8508, "step": 2906 }, { "epoch": 0.47, "grad_norm": 3.08832189454542, "learning_rate": 1.1505290034304117e-05, "loss": 0.895, "step": 2907 }, { "epoch": 0.47, "grad_norm": 3.250029930563801, "learning_rate": 1.1500129842673358e-05, "loss": 0.9324, "step": 2908 }, { "epoch": 0.47, "grad_norm": 3.255394786326806, "learning_rate": 1.1494969242365875e-05, "loss": 0.8805, "step": 2909 }, { "epoch": 0.47, "grad_norm": 3.57455586162101, "learning_rate": 1.1489808234787565e-05, "loss": 0.8911, "step": 2910 }, { "epoch": 0.47, "grad_norm": 3.842136943991809, "learning_rate": 1.1484646821344421e-05, "loss": 0.8443, "step": 2911 }, { "epoch": 0.47, "grad_norm": 2.6924989915312825, "learning_rate": 1.1479485003442564e-05, "loss": 0.8905, "step": 2912 }, { "epoch": 0.47, "grad_norm": 3.3273055092986987, "learning_rate": 1.1474322782488203e-05, "loss": 0.8969, "step": 2913 }, { "epoch": 0.47, "grad_norm": 3.113272826064905, "learning_rate": 1.146916015988768e-05, "loss": 0.8483, "step": 2914 }, { "epoch": 0.47, "grad_norm": 2.7123029716411007, "learning_rate": 1.1463997137047431e-05, "loss": 0.919, "step": 2915 }, { "epoch": 0.47, "grad_norm": 2.9447950318385514, "learning_rate": 1.1458833715374005e-05, "loss": 0.9195, "step": 2916 }, { "epoch": 0.47, "grad_norm": 2.3824379548422865, "learning_rate": 1.1453669896274066e-05, "loss": 0.9117, "step": 2917 }, { "epoch": 0.47, "grad_norm": 1.6288135236881296, "learning_rate": 1.1448505681154373e-05, "loss": 0.3565, "step": 2918 }, { "epoch": 0.47, "grad_norm": 2.2079144086488474, "learning_rate": 1.1443341071421804e-05, "loss": 0.885, "step": 2919 }, { "epoch": 0.47, "grad_norm": 4.269025516136192, "learning_rate": 1.1438176068483345e-05, "loss": 0.9214, "step": 2920 }, { "epoch": 0.47, "grad_norm": 3.7923960700281993, "learning_rate": 1.1433010673746079e-05, "loss": 0.9158, "step": 2921 }, { "epoch": 0.47, "grad_norm": 2.810434897222036, "learning_rate": 1.1427844888617203e-05, "loss": 0.9366, "step": 2922 }, { "epoch": 0.47, "grad_norm": 4.090897359734871, "learning_rate": 1.1422678714504022e-05, "loss": 0.8834, "step": 2923 }, { "epoch": 0.47, "grad_norm": 2.6960710867783435, "learning_rate": 1.1417512152813944e-05, "loss": 0.9403, "step": 2924 }, { "epoch": 0.47, "grad_norm": 6.207104384205713, "learning_rate": 1.1412345204954477e-05, "loss": 0.9197, "step": 2925 }, { "epoch": 0.47, "grad_norm": 2.511975597105127, "learning_rate": 1.140717787233325e-05, "loss": 0.916, "step": 2926 }, { "epoch": 0.47, "grad_norm": 3.7683766548628124, "learning_rate": 1.1402010156357978e-05, "loss": 0.9649, "step": 2927 }, { "epoch": 0.47, "grad_norm": 2.5939452316161233, "learning_rate": 1.1396842058436497e-05, "loss": 0.9822, "step": 2928 }, { "epoch": 0.47, "grad_norm": 2.2261479577437897, "learning_rate": 1.139167357997673e-05, "loss": 0.3575, "step": 2929 }, { "epoch": 0.47, "grad_norm": 2.8261933869635243, "learning_rate": 1.1386504722386719e-05, "loss": 0.8776, "step": 2930 }, { "epoch": 0.47, "grad_norm": 2.3061500176154106, "learning_rate": 1.1381335487074605e-05, "loss": 0.9594, "step": 2931 }, { "epoch": 0.47, "grad_norm": 1.0696185013017614, "learning_rate": 1.1376165875448628e-05, "loss": 0.9046, "step": 2932 }, { "epoch": 0.47, "grad_norm": 2.697134447050029, "learning_rate": 1.1370995888917129e-05, "loss": 0.8753, "step": 2933 }, { "epoch": 0.47, "grad_norm": 3.88326391286291, "learning_rate": 1.136582552888856e-05, "loss": 0.9731, "step": 2934 }, { "epoch": 0.47, "grad_norm": 3.7272432932991975, "learning_rate": 1.136065479677147e-05, "loss": 0.9413, "step": 2935 }, { "epoch": 0.47, "grad_norm": 3.2361539656858143, "learning_rate": 1.1355483693974502e-05, "loss": 0.882, "step": 2936 }, { "epoch": 0.47, "grad_norm": 2.0614055851039135, "learning_rate": 1.1350312221906416e-05, "loss": 0.9071, "step": 2937 }, { "epoch": 0.47, "grad_norm": 3.405865288803789, "learning_rate": 1.1345140381976054e-05, "loss": 0.9026, "step": 2938 }, { "epoch": 0.47, "grad_norm": 2.2614582550831317, "learning_rate": 1.1339968175592377e-05, "loss": 0.931, "step": 2939 }, { "epoch": 0.47, "grad_norm": 2.640518908654909, "learning_rate": 1.1334795604164432e-05, "loss": 0.8459, "step": 2940 }, { "epoch": 0.47, "grad_norm": 2.094049460787895, "learning_rate": 1.132962266910137e-05, "loss": 0.3314, "step": 2941 }, { "epoch": 0.47, "grad_norm": 2.8839210095110306, "learning_rate": 1.1324449371812441e-05, "loss": 0.8482, "step": 2942 }, { "epoch": 0.47, "grad_norm": 2.9344789919235286, "learning_rate": 1.1319275713706997e-05, "loss": 0.9003, "step": 2943 }, { "epoch": 0.47, "grad_norm": 3.357380687556411, "learning_rate": 1.1314101696194479e-05, "loss": 0.9012, "step": 2944 }, { "epoch": 0.47, "grad_norm": 1.418379290710667, "learning_rate": 1.1308927320684438e-05, "loss": 0.918, "step": 2945 }, { "epoch": 0.47, "grad_norm": 2.9431058271994255, "learning_rate": 1.1303752588586512e-05, "loss": 0.8563, "step": 2946 }, { "epoch": 0.47, "grad_norm": 3.6603629950531134, "learning_rate": 1.1298577501310444e-05, "loss": 0.8811, "step": 2947 }, { "epoch": 0.47, "grad_norm": 2.666911551039767, "learning_rate": 1.1293402060266072e-05, "loss": 0.9443, "step": 2948 }, { "epoch": 0.48, "grad_norm": 3.604797234619746, "learning_rate": 1.128822626686332e-05, "loss": 0.9211, "step": 2949 }, { "epoch": 0.48, "grad_norm": 3.537754111716678, "learning_rate": 1.1283050122512227e-05, "loss": 0.9335, "step": 2950 }, { "epoch": 0.48, "grad_norm": 2.305080781129316, "learning_rate": 1.127787362862291e-05, "loss": 0.9096, "step": 2951 }, { "epoch": 0.48, "grad_norm": 2.6057443544498127, "learning_rate": 1.1272696786605595e-05, "loss": 0.9868, "step": 2952 }, { "epoch": 0.48, "grad_norm": 4.278394651866415, "learning_rate": 1.1267519597870591e-05, "loss": 0.8904, "step": 2953 }, { "epoch": 0.48, "grad_norm": 3.8170073083733143, "learning_rate": 1.126234206382831e-05, "loss": 0.9669, "step": 2954 }, { "epoch": 0.48, "grad_norm": 3.5708153034646752, "learning_rate": 1.1257164185889255e-05, "loss": 0.8667, "step": 2955 }, { "epoch": 0.48, "grad_norm": 1.0839368655129176, "learning_rate": 1.1251985965464022e-05, "loss": 0.9107, "step": 2956 }, { "epoch": 0.48, "grad_norm": 3.2028906759755285, "learning_rate": 1.12468074039633e-05, "loss": 0.9291, "step": 2957 }, { "epoch": 0.48, "grad_norm": 3.849688029344561, "learning_rate": 1.1241628502797872e-05, "loss": 0.8976, "step": 2958 }, { "epoch": 0.48, "grad_norm": 2.4539255184569635, "learning_rate": 1.1236449263378617e-05, "loss": 0.8958, "step": 2959 }, { "epoch": 0.48, "grad_norm": 3.1662465034281824, "learning_rate": 1.1231269687116495e-05, "loss": 0.9264, "step": 2960 }, { "epoch": 0.48, "grad_norm": 2.616520400930191, "learning_rate": 1.1226089775422575e-05, "loss": 0.9388, "step": 2961 }, { "epoch": 0.48, "grad_norm": 2.885816874562262, "learning_rate": 1.1220909529707999e-05, "loss": 1.0058, "step": 2962 }, { "epoch": 0.48, "grad_norm": 3.3544643262125353, "learning_rate": 1.1215728951384013e-05, "loss": 0.9019, "step": 2963 }, { "epoch": 0.48, "grad_norm": 3.5284853214857548, "learning_rate": 1.121054804186195e-05, "loss": 0.9327, "step": 2964 }, { "epoch": 0.48, "grad_norm": 3.3347975058325847, "learning_rate": 1.1205366802553231e-05, "loss": 0.8877, "step": 2965 }, { "epoch": 0.48, "grad_norm": 4.369733419776555, "learning_rate": 1.1200185234869372e-05, "loss": 0.9169, "step": 2966 }, { "epoch": 0.48, "grad_norm": 3.367868447541241, "learning_rate": 1.1195003340221968e-05, "loss": 0.9076, "step": 2967 }, { "epoch": 0.48, "grad_norm": 4.199108374276141, "learning_rate": 1.1189821120022712e-05, "loss": 0.8983, "step": 2968 }, { "epoch": 0.48, "grad_norm": 2.693397704274267, "learning_rate": 1.1184638575683388e-05, "loss": 0.8877, "step": 2969 }, { "epoch": 0.48, "grad_norm": 2.454315382678059, "learning_rate": 1.1179455708615863e-05, "loss": 0.9074, "step": 2970 }, { "epoch": 0.48, "grad_norm": 3.0366227818098097, "learning_rate": 1.1174272520232087e-05, "loss": 0.914, "step": 2971 }, { "epoch": 0.48, "grad_norm": 3.95971684955964, "learning_rate": 1.1169089011944113e-05, "loss": 0.9007, "step": 2972 }, { "epoch": 0.48, "grad_norm": 3.059025876442391, "learning_rate": 1.116390518516406e-05, "loss": 0.894, "step": 2973 }, { "epoch": 0.48, "grad_norm": 3.301413932523067, "learning_rate": 1.1158721041304155e-05, "loss": 0.9318, "step": 2974 }, { "epoch": 0.48, "grad_norm": 2.4818761002003313, "learning_rate": 1.1153536581776697e-05, "loss": 0.9027, "step": 2975 }, { "epoch": 0.48, "grad_norm": 3.620357039321272, "learning_rate": 1.1148351807994075e-05, "loss": 0.9185, "step": 2976 }, { "epoch": 0.48, "grad_norm": 3.5359699134841267, "learning_rate": 1.114316672136877e-05, "loss": 0.8811, "step": 2977 }, { "epoch": 0.48, "grad_norm": 3.6448652598904596, "learning_rate": 1.1137981323313335e-05, "loss": 0.8785, "step": 2978 }, { "epoch": 0.48, "grad_norm": 2.835961712190031, "learning_rate": 1.1132795615240419e-05, "loss": 0.9676, "step": 2979 }, { "epoch": 0.48, "grad_norm": 2.7276106516685896, "learning_rate": 1.1127609598562754e-05, "loss": 0.8345, "step": 2980 }, { "epoch": 0.48, "grad_norm": 3.902955519919015, "learning_rate": 1.1122423274693152e-05, "loss": 0.8811, "step": 2981 }, { "epoch": 0.48, "grad_norm": 3.88693245918433, "learning_rate": 1.1117236645044506e-05, "loss": 0.9054, "step": 2982 }, { "epoch": 0.48, "grad_norm": 2.3077885858307883, "learning_rate": 1.1112049711029807e-05, "loss": 0.9785, "step": 2983 }, { "epoch": 0.48, "grad_norm": 3.087205843484355, "learning_rate": 1.1106862474062107e-05, "loss": 0.9645, "step": 2984 }, { "epoch": 0.48, "grad_norm": 1.848477590324471, "learning_rate": 1.1101674935554561e-05, "loss": 0.3427, "step": 2985 }, { "epoch": 0.48, "grad_norm": 3.473118960402933, "learning_rate": 1.1096487096920394e-05, "loss": 0.9109, "step": 2986 }, { "epoch": 0.48, "grad_norm": 2.4382998792431057, "learning_rate": 1.1091298959572919e-05, "loss": 0.7724, "step": 2987 }, { "epoch": 0.48, "grad_norm": 4.505258454795555, "learning_rate": 1.1086110524925524e-05, "loss": 0.9547, "step": 2988 }, { "epoch": 0.48, "grad_norm": 2.7130631350107657, "learning_rate": 1.1080921794391684e-05, "loss": 0.94, "step": 2989 }, { "epoch": 0.48, "grad_norm": 2.456063618274021, "learning_rate": 1.1075732769384948e-05, "loss": 0.8863, "step": 2990 }, { "epoch": 0.48, "grad_norm": 2.349683448086865, "learning_rate": 1.1070543451318956e-05, "loss": 0.9123, "step": 2991 }, { "epoch": 0.48, "grad_norm": 3.1663172847015666, "learning_rate": 1.106535384160742e-05, "loss": 0.925, "step": 2992 }, { "epoch": 0.48, "grad_norm": 2.960538892663487, "learning_rate": 1.1060163941664125e-05, "loss": 0.8769, "step": 2993 }, { "epoch": 0.48, "grad_norm": 1.992276417860934, "learning_rate": 1.1054973752902956e-05, "loss": 0.3342, "step": 2994 }, { "epoch": 0.48, "grad_norm": 2.952720331793732, "learning_rate": 1.1049783276737852e-05, "loss": 0.8851, "step": 2995 }, { "epoch": 0.48, "grad_norm": 2.3511040833221344, "learning_rate": 1.1044592514582843e-05, "loss": 0.8977, "step": 2996 }, { "epoch": 0.48, "grad_norm": 4.170592150096469, "learning_rate": 1.1039401467852044e-05, "loss": 0.9076, "step": 2997 }, { "epoch": 0.48, "grad_norm": 1.9094187555122573, "learning_rate": 1.1034210137959632e-05, "loss": 0.3286, "step": 2998 }, { "epoch": 0.48, "grad_norm": 2.801055256356757, "learning_rate": 1.102901852631987e-05, "loss": 0.932, "step": 2999 }, { "epoch": 0.48, "grad_norm": 1.8158076130465077, "learning_rate": 1.1023826634347096e-05, "loss": 0.9147, "step": 3000 }, { "epoch": 0.48, "grad_norm": 3.036822829880836, "learning_rate": 1.1018634463455727e-05, "loss": 0.8471, "step": 3001 }, { "epoch": 0.48, "grad_norm": 3.2209228292634258, "learning_rate": 1.101344201506025e-05, "loss": 0.9252, "step": 3002 }, { "epoch": 0.48, "grad_norm": 1.4223551515870057, "learning_rate": 1.1008249290575232e-05, "loss": 0.894, "step": 3003 }, { "epoch": 0.48, "grad_norm": 4.119950643077123, "learning_rate": 1.1003056291415313e-05, "loss": 0.9095, "step": 3004 }, { "epoch": 0.48, "grad_norm": 3.569452428417125, "learning_rate": 1.0997863018995214e-05, "loss": 0.954, "step": 3005 }, { "epoch": 0.48, "grad_norm": 2.8546041074535236, "learning_rate": 1.0992669474729723e-05, "loss": 0.8162, "step": 3006 }, { "epoch": 0.48, "grad_norm": 3.677207548440675, "learning_rate": 1.09874756600337e-05, "loss": 0.945, "step": 3007 }, { "epoch": 0.48, "grad_norm": 2.3765235832484053, "learning_rate": 1.0982281576322091e-05, "loss": 0.938, "step": 3008 }, { "epoch": 0.48, "grad_norm": 2.771600679157629, "learning_rate": 1.09770872250099e-05, "loss": 0.8787, "step": 3009 }, { "epoch": 0.48, "grad_norm": 2.374099460327658, "learning_rate": 1.097189260751222e-05, "loss": 0.9326, "step": 3010 }, { "epoch": 0.49, "grad_norm": 3.018173010991879, "learning_rate": 1.09666977252442e-05, "loss": 0.861, "step": 3011 }, { "epoch": 0.49, "grad_norm": 1.6267923593946207, "learning_rate": 1.0961502579621073e-05, "loss": 0.3502, "step": 3012 }, { "epoch": 0.49, "grad_norm": 1.6625448120502482, "learning_rate": 1.0956307172058138e-05, "loss": 0.9549, "step": 3013 }, { "epoch": 0.49, "grad_norm": 2.35188995818836, "learning_rate": 1.0951111503970771e-05, "loss": 0.8634, "step": 3014 }, { "epoch": 0.49, "grad_norm": 2.4808691467563615, "learning_rate": 1.0945915576774411e-05, "loss": 0.8968, "step": 3015 }, { "epoch": 0.49, "grad_norm": 2.670923253141116, "learning_rate": 1.0940719391884578e-05, "loss": 0.9026, "step": 3016 }, { "epoch": 0.49, "grad_norm": 2.8967424973489213, "learning_rate": 1.0935522950716847e-05, "loss": 0.9507, "step": 3017 }, { "epoch": 0.49, "grad_norm": 2.964520302319377, "learning_rate": 1.0930326254686877e-05, "loss": 0.8619, "step": 3018 }, { "epoch": 0.49, "grad_norm": 2.374462066765091, "learning_rate": 1.0925129305210393e-05, "loss": 0.9073, "step": 3019 }, { "epoch": 0.49, "grad_norm": 3.979920653697634, "learning_rate": 1.0919932103703178e-05, "loss": 0.875, "step": 3020 }, { "epoch": 0.49, "grad_norm": 2.9872761151686054, "learning_rate": 1.091473465158111e-05, "loss": 0.9421, "step": 3021 }, { "epoch": 0.49, "grad_norm": 3.2126700745919603, "learning_rate": 1.0909536950260102e-05, "loss": 0.9769, "step": 3022 }, { "epoch": 0.49, "grad_norm": 4.089398816234809, "learning_rate": 1.0904339001156158e-05, "loss": 0.9741, "step": 3023 }, { "epoch": 0.49, "grad_norm": 2.0396725264604987, "learning_rate": 1.0899140805685342e-05, "loss": 0.9061, "step": 3024 }, { "epoch": 0.49, "grad_norm": 2.5630615363441653, "learning_rate": 1.0893942365263786e-05, "loss": 0.9424, "step": 3025 }, { "epoch": 0.49, "grad_norm": 4.87373844353753, "learning_rate": 1.0888743681307686e-05, "loss": 0.97, "step": 3026 }, { "epoch": 0.49, "grad_norm": 3.316568862648704, "learning_rate": 1.0883544755233313e-05, "loss": 0.8521, "step": 3027 }, { "epoch": 0.49, "grad_norm": 3.63242999288331, "learning_rate": 1.087834558845699e-05, "loss": 0.9201, "step": 3028 }, { "epoch": 0.49, "grad_norm": 3.680608613853364, "learning_rate": 1.0873146182395118e-05, "loss": 0.8963, "step": 3029 }, { "epoch": 0.49, "grad_norm": 3.643943485730434, "learning_rate": 1.086794653846416e-05, "loss": 0.8958, "step": 3030 }, { "epoch": 0.49, "grad_norm": 2.5954485927040407, "learning_rate": 1.0862746658080635e-05, "loss": 0.94, "step": 3031 }, { "epoch": 0.49, "grad_norm": 2.6213889943510083, "learning_rate": 1.085754654266114e-05, "loss": 0.8172, "step": 3032 }, { "epoch": 0.49, "grad_norm": 2.6655263151302053, "learning_rate": 1.0852346193622332e-05, "loss": 0.8953, "step": 3033 }, { "epoch": 0.49, "grad_norm": 3.095875789989556, "learning_rate": 1.0847145612380922e-05, "loss": 0.9679, "step": 3034 }, { "epoch": 0.49, "grad_norm": 2.8340047340003554, "learning_rate": 1.0841944800353696e-05, "loss": 0.9244, "step": 3035 }, { "epoch": 0.49, "grad_norm": 2.5569119815767536, "learning_rate": 1.08367437589575e-05, "loss": 0.8609, "step": 3036 }, { "epoch": 0.49, "grad_norm": 1.6466415529356624, "learning_rate": 1.083154248960924e-05, "loss": 0.357, "step": 3037 }, { "epoch": 0.49, "grad_norm": 3.918608623673998, "learning_rate": 1.0826340993725882e-05, "loss": 0.9129, "step": 3038 }, { "epoch": 0.49, "grad_norm": 2.892876320561751, "learning_rate": 1.0821139272724457e-05, "loss": 0.8673, "step": 3039 }, { "epoch": 0.49, "grad_norm": 2.9811818880821583, "learning_rate": 1.0815937328022061e-05, "loss": 0.9756, "step": 3040 }, { "epoch": 0.49, "grad_norm": 1.7948307516484634, "learning_rate": 1.0810735161035848e-05, "loss": 0.9022, "step": 3041 }, { "epoch": 0.49, "grad_norm": 4.4008535250866485, "learning_rate": 1.0805532773183022e-05, "loss": 0.876, "step": 3042 }, { "epoch": 0.49, "grad_norm": 2.2643920700689106, "learning_rate": 1.0800330165880872e-05, "loss": 0.8915, "step": 3043 }, { "epoch": 0.49, "grad_norm": 2.9129273195271552, "learning_rate": 1.0795127340546718e-05, "loss": 0.9425, "step": 3044 }, { "epoch": 0.49, "grad_norm": 4.290523981848646, "learning_rate": 1.0789924298597963e-05, "loss": 0.9427, "step": 3045 }, { "epoch": 0.49, "grad_norm": 3.165578521357183, "learning_rate": 1.0784721041452054e-05, "loss": 0.9231, "step": 3046 }, { "epoch": 0.49, "grad_norm": 2.3976751352251933, "learning_rate": 1.07795175705265e-05, "loss": 0.8806, "step": 3047 }, { "epoch": 0.49, "grad_norm": 2.769100096464243, "learning_rate": 1.0774313887238874e-05, "loss": 0.8773, "step": 3048 }, { "epoch": 0.49, "grad_norm": 4.134361049083605, "learning_rate": 1.0769109993006802e-05, "loss": 0.8377, "step": 3049 }, { "epoch": 0.49, "grad_norm": 2.0186324046934154, "learning_rate": 1.0763905889247964e-05, "loss": 0.8844, "step": 3050 }, { "epoch": 0.49, "grad_norm": 2.18854061987911, "learning_rate": 1.0758701577380107e-05, "loss": 0.875, "step": 3051 }, { "epoch": 0.49, "grad_norm": 3.5841228738992084, "learning_rate": 1.0753497058821027e-05, "loss": 0.8782, "step": 3052 }, { "epoch": 0.49, "grad_norm": 2.9131658161400047, "learning_rate": 1.0748292334988573e-05, "loss": 0.9009, "step": 3053 }, { "epoch": 0.49, "grad_norm": 1.9010466553783716, "learning_rate": 1.0743087407300665e-05, "loss": 0.8968, "step": 3054 }, { "epoch": 0.49, "grad_norm": 3.034216775903836, "learning_rate": 1.0737882277175262e-05, "loss": 0.8316, "step": 3055 }, { "epoch": 0.49, "grad_norm": 2.9677694207051624, "learning_rate": 1.073267694603039e-05, "loss": 0.9709, "step": 3056 }, { "epoch": 0.49, "grad_norm": 3.6297534163192475, "learning_rate": 1.0727471415284119e-05, "loss": 0.9363, "step": 3057 }, { "epoch": 0.49, "grad_norm": 2.7093693927852067, "learning_rate": 1.072226568635458e-05, "loss": 0.9279, "step": 3058 }, { "epoch": 0.49, "grad_norm": 3.274040075663892, "learning_rate": 1.0717059760659963e-05, "loss": 0.9317, "step": 3059 }, { "epoch": 0.49, "grad_norm": 4.361847425910144, "learning_rate": 1.0711853639618497e-05, "loss": 0.9289, "step": 3060 }, { "epoch": 0.49, "grad_norm": 3.806090548578298, "learning_rate": 1.0706647324648481e-05, "loss": 0.8576, "step": 3061 }, { "epoch": 0.49, "grad_norm": 3.9243147962642118, "learning_rate": 1.0701440817168251e-05, "loss": 0.9194, "step": 3062 }, { "epoch": 0.49, "grad_norm": 1.6067593253129047, "learning_rate": 1.069623411859621e-05, "loss": 0.9121, "step": 3063 }, { "epoch": 0.49, "grad_norm": 1.763400000890572, "learning_rate": 1.0691027230350802e-05, "loss": 0.9429, "step": 3064 }, { "epoch": 0.49, "grad_norm": 2.8838032826435716, "learning_rate": 1.0685820153850528e-05, "loss": 0.9089, "step": 3065 }, { "epoch": 0.49, "grad_norm": 3.1661228583012098, "learning_rate": 1.0680612890513937e-05, "loss": 1.0123, "step": 3066 }, { "epoch": 0.49, "grad_norm": 2.745824678568497, "learning_rate": 1.0675405441759639e-05, "loss": 0.9, "step": 3067 }, { "epoch": 0.49, "grad_norm": 2.1049019186769145, "learning_rate": 1.0670197809006277e-05, "loss": 0.3376, "step": 3068 }, { "epoch": 0.49, "grad_norm": 2.809494447131354, "learning_rate": 1.0664989993672559e-05, "loss": 0.88, "step": 3069 }, { "epoch": 0.49, "grad_norm": 3.6822285878319496, "learning_rate": 1.0659781997177239e-05, "loss": 0.9044, "step": 3070 }, { "epoch": 0.49, "grad_norm": 2.9095042067866275, "learning_rate": 1.0654573820939112e-05, "loss": 0.9135, "step": 3071 }, { "epoch": 0.49, "grad_norm": 2.889585235822437, "learning_rate": 1.0649365466377033e-05, "loss": 0.8906, "step": 3072 }, { "epoch": 0.5, "grad_norm": 3.89082746476835, "learning_rate": 1.0644156934909902e-05, "loss": 0.9476, "step": 3073 }, { "epoch": 0.5, "grad_norm": 2.3167676981386527, "learning_rate": 1.0638948227956665e-05, "loss": 0.9581, "step": 3074 }, { "epoch": 0.5, "grad_norm": 1.7008430522296498, "learning_rate": 1.0633739346936318e-05, "loss": 0.8964, "step": 3075 }, { "epoch": 0.5, "grad_norm": 2.983542600854032, "learning_rate": 1.0628530293267908e-05, "loss": 0.9413, "step": 3076 }, { "epoch": 0.5, "grad_norm": 3.210740132078544, "learning_rate": 1.0623321068370515e-05, "loss": 0.9071, "step": 3077 }, { "epoch": 0.5, "grad_norm": 2.2637166371828106, "learning_rate": 1.0618111673663283e-05, "loss": 0.9773, "step": 3078 }, { "epoch": 0.5, "grad_norm": 3.1444596750394522, "learning_rate": 1.0612902110565393e-05, "loss": 0.9537, "step": 3079 }, { "epoch": 0.5, "grad_norm": 4.247909031527949, "learning_rate": 1.060769238049607e-05, "loss": 0.9697, "step": 3080 }, { "epoch": 0.5, "grad_norm": 3.4540158326670207, "learning_rate": 1.0602482484874598e-05, "loss": 0.968, "step": 3081 }, { "epoch": 0.5, "grad_norm": 2.359724814648246, "learning_rate": 1.0597272425120286e-05, "loss": 0.9533, "step": 3082 }, { "epoch": 0.5, "grad_norm": 3.0121389212214806, "learning_rate": 1.05920622026525e-05, "loss": 0.9321, "step": 3083 }, { "epoch": 0.5, "grad_norm": 3.8352695357437656, "learning_rate": 1.0586851818890651e-05, "loss": 0.9415, "step": 3084 }, { "epoch": 0.5, "grad_norm": 3.411854603977737, "learning_rate": 1.058164127525419e-05, "loss": 0.945, "step": 3085 }, { "epoch": 0.5, "grad_norm": 2.7092685454155294, "learning_rate": 1.0576430573162612e-05, "loss": 0.9159, "step": 3086 }, { "epoch": 0.5, "grad_norm": 2.524004704176302, "learning_rate": 1.057121971403546e-05, "loss": 0.9004, "step": 3087 }, { "epoch": 0.5, "grad_norm": 2.685446287178729, "learning_rate": 1.0566008699292307e-05, "loss": 0.9394, "step": 3088 }, { "epoch": 0.5, "grad_norm": 1.8212132660585167, "learning_rate": 1.0560797530352784e-05, "loss": 0.8773, "step": 3089 }, { "epoch": 0.5, "grad_norm": 1.8076274754062989, "learning_rate": 1.0555586208636557e-05, "loss": 0.9187, "step": 3090 }, { "epoch": 0.5, "grad_norm": 3.2084844644463764, "learning_rate": 1.0550374735563329e-05, "loss": 0.8653, "step": 3091 }, { "epoch": 0.5, "grad_norm": 2.466384140365048, "learning_rate": 1.0545163112552856e-05, "loss": 0.9198, "step": 3092 }, { "epoch": 0.5, "grad_norm": 2.7806122295063234, "learning_rate": 1.053995134102492e-05, "loss": 0.9093, "step": 3093 }, { "epoch": 0.5, "grad_norm": 1.7432965909442382, "learning_rate": 1.0534739422399357e-05, "loss": 0.9566, "step": 3094 }, { "epoch": 0.5, "grad_norm": 2.2391935024018648, "learning_rate": 1.0529527358096035e-05, "loss": 0.8936, "step": 3095 }, { "epoch": 0.5, "grad_norm": 1.9329937861422122, "learning_rate": 1.052431514953486e-05, "loss": 0.9544, "step": 3096 }, { "epoch": 0.5, "grad_norm": 4.453802974020796, "learning_rate": 1.051910279813579e-05, "loss": 0.9115, "step": 3097 }, { "epoch": 0.5, "grad_norm": 3.3388066337920805, "learning_rate": 1.0513890305318808e-05, "loss": 0.8637, "step": 3098 }, { "epoch": 0.5, "grad_norm": 3.583970138978879, "learning_rate": 1.0508677672503942e-05, "loss": 0.8559, "step": 3099 }, { "epoch": 0.5, "grad_norm": 1.9377262844720706, "learning_rate": 1.0503464901111253e-05, "loss": 0.9243, "step": 3100 }, { "epoch": 0.5, "grad_norm": 4.190248185411776, "learning_rate": 1.0498251992560851e-05, "loss": 0.9026, "step": 3101 }, { "epoch": 0.5, "grad_norm": 2.0327885596282553, "learning_rate": 1.0493038948272866e-05, "loss": 0.9621, "step": 3102 }, { "epoch": 0.5, "grad_norm": 2.9416053167615144, "learning_rate": 1.0487825769667489e-05, "loss": 0.9196, "step": 3103 }, { "epoch": 0.5, "grad_norm": 3.1504266480232794, "learning_rate": 1.0482612458164918e-05, "loss": 0.8997, "step": 3104 }, { "epoch": 0.5, "grad_norm": 3.012553810493782, "learning_rate": 1.0477399015185415e-05, "loss": 0.8918, "step": 3105 }, { "epoch": 0.5, "grad_norm": 1.206715959276279, "learning_rate": 1.0472185442149257e-05, "loss": 0.3452, "step": 3106 }, { "epoch": 0.5, "grad_norm": 3.4290189678015737, "learning_rate": 1.046697174047677e-05, "loss": 0.8983, "step": 3107 }, { "epoch": 0.5, "grad_norm": 3.041937480697887, "learning_rate": 1.0461757911588309e-05, "loss": 0.8726, "step": 3108 }, { "epoch": 0.5, "grad_norm": 3.133373181786346, "learning_rate": 1.0456543956904266e-05, "loss": 0.9391, "step": 3109 }, { "epoch": 0.5, "grad_norm": 3.9827038420517353, "learning_rate": 1.0451329877845062e-05, "loss": 0.8647, "step": 3110 }, { "epoch": 0.5, "grad_norm": 3.440538623914, "learning_rate": 1.0446115675831159e-05, "loss": 0.8551, "step": 3111 }, { "epoch": 0.5, "grad_norm": 3.5144863064768206, "learning_rate": 1.044090135228305e-05, "loss": 0.878, "step": 3112 }, { "epoch": 0.5, "grad_norm": 2.896824143507542, "learning_rate": 1.0435686908621256e-05, "loss": 0.8958, "step": 3113 }, { "epoch": 0.5, "grad_norm": 1.7172723140024861, "learning_rate": 1.0430472346266343e-05, "loss": 0.8713, "step": 3114 }, { "epoch": 0.5, "grad_norm": 2.1710882065336077, "learning_rate": 1.0425257666638891e-05, "loss": 0.9509, "step": 3115 }, { "epoch": 0.5, "grad_norm": 2.143361702054448, "learning_rate": 1.0420042871159532e-05, "loss": 0.8686, "step": 3116 }, { "epoch": 0.5, "grad_norm": 3.8050539439484194, "learning_rate": 1.0414827961248917e-05, "loss": 0.9564, "step": 3117 }, { "epoch": 0.5, "grad_norm": 2.322309052467944, "learning_rate": 1.040961293832773e-05, "loss": 0.9298, "step": 3118 }, { "epoch": 0.5, "grad_norm": 2.946937659073786, "learning_rate": 1.0404397803816686e-05, "loss": 0.9383, "step": 3119 }, { "epoch": 0.5, "grad_norm": 2.4761391647929707, "learning_rate": 1.0399182559136536e-05, "loss": 0.9183, "step": 3120 }, { "epoch": 0.5, "grad_norm": 1.782669773122401, "learning_rate": 1.0393967205708049e-05, "loss": 0.7782, "step": 3121 }, { "epoch": 0.5, "grad_norm": 4.434262236222599, "learning_rate": 1.0388751744952037e-05, "loss": 0.8863, "step": 3122 }, { "epoch": 0.5, "grad_norm": 2.175249291526294, "learning_rate": 1.0383536178289336e-05, "loss": 0.8967, "step": 3123 }, { "epoch": 0.5, "grad_norm": 3.5426307861909723, "learning_rate": 1.0378320507140804e-05, "loss": 0.8286, "step": 3124 }, { "epoch": 0.5, "grad_norm": 1.7459051043097975, "learning_rate": 1.0373104732927341e-05, "loss": 0.323, "step": 3125 }, { "epoch": 0.5, "grad_norm": 1.5598965126337558, "learning_rate": 1.036788885706986e-05, "loss": 0.3392, "step": 3126 }, { "epoch": 0.5, "grad_norm": 5.388939989422291, "learning_rate": 1.0362672880989317e-05, "loss": 0.9711, "step": 3127 }, { "epoch": 0.5, "grad_norm": 3.3247828649308797, "learning_rate": 1.0357456806106684e-05, "loss": 1.0002, "step": 3128 }, { "epoch": 0.5, "grad_norm": 3.927440084076862, "learning_rate": 1.0352240633842962e-05, "loss": 0.8943, "step": 3129 }, { "epoch": 0.5, "grad_norm": 2.3846142570076445, "learning_rate": 1.0347024365619183e-05, "loss": 0.9334, "step": 3130 }, { "epoch": 0.5, "grad_norm": 3.1343842556665593, "learning_rate": 1.0341808002856402e-05, "loss": 0.8898, "step": 3131 }, { "epoch": 0.5, "grad_norm": 2.6536676757233018, "learning_rate": 1.0336591546975695e-05, "loss": 0.901, "step": 3132 }, { "epoch": 0.5, "grad_norm": 1.72224217150356, "learning_rate": 1.0331374999398175e-05, "loss": 0.868, "step": 3133 }, { "epoch": 0.5, "grad_norm": 2.471479235192024, "learning_rate": 1.0326158361544971e-05, "loss": 0.8637, "step": 3134 }, { "epoch": 0.51, "grad_norm": 3.8608390718369816, "learning_rate": 1.0320941634837238e-05, "loss": 0.8414, "step": 3135 }, { "epoch": 0.51, "grad_norm": 2.1248370837422548, "learning_rate": 1.031572482069616e-05, "loss": 0.9576, "step": 3136 }, { "epoch": 0.51, "grad_norm": 3.3658036576848867, "learning_rate": 1.0310507920542932e-05, "loss": 0.915, "step": 3137 }, { "epoch": 0.51, "grad_norm": 2.4603301736959273, "learning_rate": 1.0305290935798794e-05, "loss": 0.828, "step": 3138 }, { "epoch": 0.51, "grad_norm": 2.9170765361716686, "learning_rate": 1.0300073867884983e-05, "loss": 1.0173, "step": 3139 }, { "epoch": 0.51, "grad_norm": 1.9786928178497738, "learning_rate": 1.0294856718222782e-05, "loss": 0.9384, "step": 3140 }, { "epoch": 0.51, "grad_norm": 1.0572395249211126, "learning_rate": 1.0289639488233484e-05, "loss": 0.9156, "step": 3141 }, { "epoch": 0.51, "grad_norm": 2.497862569707304, "learning_rate": 1.0284422179338404e-05, "loss": 0.846, "step": 3142 }, { "epoch": 0.51, "grad_norm": 2.510346365507536, "learning_rate": 1.0279204792958883e-05, "loss": 0.9142, "step": 3143 }, { "epoch": 0.51, "grad_norm": 3.469084440037719, "learning_rate": 1.0273987330516279e-05, "loss": 0.8441, "step": 3144 }, { "epoch": 0.51, "grad_norm": 2.4291700198465, "learning_rate": 1.0268769793431975e-05, "loss": 0.3349, "step": 3145 }, { "epoch": 0.51, "grad_norm": 3.41114665153398, "learning_rate": 1.026355218312737e-05, "loss": 0.8991, "step": 3146 }, { "epoch": 0.51, "grad_norm": 1.9982484061441086, "learning_rate": 1.0258334501023886e-05, "loss": 0.8842, "step": 3147 }, { "epoch": 0.51, "grad_norm": 2.5868096393550126, "learning_rate": 1.025311674854296e-05, "loss": 0.8652, "step": 3148 }, { "epoch": 0.51, "grad_norm": 3.734634071206038, "learning_rate": 1.0247898927106058e-05, "loss": 0.8443, "step": 3149 }, { "epoch": 0.51, "grad_norm": 3.5702103280905173, "learning_rate": 1.0242681038134654e-05, "loss": 0.9376, "step": 3150 }, { "epoch": 0.51, "grad_norm": 3.843759552238194, "learning_rate": 1.0237463083050246e-05, "loss": 0.8907, "step": 3151 }, { "epoch": 0.51, "grad_norm": 3.1107486010325043, "learning_rate": 1.0232245063274349e-05, "loss": 0.9253, "step": 3152 }, { "epoch": 0.51, "grad_norm": 3.4859273557288804, "learning_rate": 1.0227026980228492e-05, "loss": 0.9448, "step": 3153 }, { "epoch": 0.51, "grad_norm": 1.949108846925927, "learning_rate": 1.022180883533423e-05, "loss": 0.8851, "step": 3154 }, { "epoch": 0.51, "grad_norm": 3.01584652101898, "learning_rate": 1.0216590630013125e-05, "loss": 0.9153, "step": 3155 }, { "epoch": 0.51, "grad_norm": 2.915593049588805, "learning_rate": 1.0211372365686763e-05, "loss": 0.8538, "step": 3156 }, { "epoch": 0.51, "grad_norm": 1.8756394249661055, "learning_rate": 1.0206154043776741e-05, "loss": 0.9055, "step": 3157 }, { "epoch": 0.51, "grad_norm": 3.1372481833998416, "learning_rate": 1.0200935665704679e-05, "loss": 0.8949, "step": 3158 }, { "epoch": 0.51, "grad_norm": 3.052858863874453, "learning_rate": 1.0195717232892198e-05, "loss": 0.9226, "step": 3159 }, { "epoch": 0.51, "grad_norm": 2.843318864607436, "learning_rate": 1.0190498746760951e-05, "loss": 0.9083, "step": 3160 }, { "epoch": 0.51, "grad_norm": 2.909248200141551, "learning_rate": 1.0185280208732594e-05, "loss": 0.883, "step": 3161 }, { "epoch": 0.51, "grad_norm": 3.374852212566506, "learning_rate": 1.0180061620228799e-05, "loss": 0.938, "step": 3162 }, { "epoch": 0.51, "grad_norm": 2.628254643892888, "learning_rate": 1.0174842982671258e-05, "loss": 0.9346, "step": 3163 }, { "epoch": 0.51, "grad_norm": 3.6293045006091855, "learning_rate": 1.0169624297481663e-05, "loss": 0.9132, "step": 3164 }, { "epoch": 0.51, "grad_norm": 1.4844965834766208, "learning_rate": 1.016440556608174e-05, "loss": 0.9078, "step": 3165 }, { "epoch": 0.51, "grad_norm": 4.021960296556294, "learning_rate": 1.0159186789893207e-05, "loss": 0.9003, "step": 3166 }, { "epoch": 0.51, "grad_norm": 2.8485700399644394, "learning_rate": 1.0153967970337803e-05, "loss": 0.9239, "step": 3167 }, { "epoch": 0.51, "grad_norm": 3.533358189357444, "learning_rate": 1.0148749108837282e-05, "loss": 0.9271, "step": 3168 }, { "epoch": 0.51, "grad_norm": 3.4586899048851283, "learning_rate": 1.0143530206813403e-05, "loss": 0.9326, "step": 3169 }, { "epoch": 0.51, "grad_norm": 2.3985966433930765, "learning_rate": 1.013831126568794e-05, "loss": 0.9234, "step": 3170 }, { "epoch": 0.51, "grad_norm": 2.5006535629475595, "learning_rate": 1.0133092286882672e-05, "loss": 0.9094, "step": 3171 }, { "epoch": 0.51, "grad_norm": 2.089105950005677, "learning_rate": 1.01278732718194e-05, "loss": 0.9141, "step": 3172 }, { "epoch": 0.51, "grad_norm": 3.6040884167476888, "learning_rate": 1.0122654221919921e-05, "loss": 0.9069, "step": 3173 }, { "epoch": 0.51, "grad_norm": 2.9890869650111243, "learning_rate": 1.0117435138606054e-05, "loss": 0.921, "step": 3174 }, { "epoch": 0.51, "grad_norm": 3.8698838207144717, "learning_rate": 1.0112216023299615e-05, "loss": 0.9352, "step": 3175 }, { "epoch": 0.51, "grad_norm": 1.295864768837779, "learning_rate": 1.0106996877422442e-05, "loss": 0.9337, "step": 3176 }, { "epoch": 0.51, "grad_norm": 2.862112100630356, "learning_rate": 1.0101777702396367e-05, "loss": 0.8299, "step": 3177 }, { "epoch": 0.51, "grad_norm": 2.1163024178427547, "learning_rate": 1.0096558499643239e-05, "loss": 0.933, "step": 3178 }, { "epoch": 0.51, "grad_norm": 3.1302947393711142, "learning_rate": 1.0091339270584917e-05, "loss": 0.9667, "step": 3179 }, { "epoch": 0.51, "grad_norm": 3.6451800097061904, "learning_rate": 1.008612001664326e-05, "loss": 0.959, "step": 3180 }, { "epoch": 0.51, "grad_norm": 2.437139729015622, "learning_rate": 1.0080900739240136e-05, "loss": 0.9183, "step": 3181 }, { "epoch": 0.51, "grad_norm": 3.638513697791464, "learning_rate": 1.007568143979742e-05, "loss": 0.8955, "step": 3182 }, { "epoch": 0.51, "grad_norm": 4.138798609580329, "learning_rate": 1.0070462119736993e-05, "loss": 0.8334, "step": 3183 }, { "epoch": 0.51, "grad_norm": 3.3903472364715235, "learning_rate": 1.0065242780480742e-05, "loss": 0.9013, "step": 3184 }, { "epoch": 0.51, "grad_norm": 2.8142866605600783, "learning_rate": 1.0060023423450562e-05, "loss": 0.8919, "step": 3185 }, { "epoch": 0.51, "grad_norm": 2.914168368709839, "learning_rate": 1.0054804050068343e-05, "loss": 0.9443, "step": 3186 }, { "epoch": 0.51, "grad_norm": 2.415820916485948, "learning_rate": 1.0049584661755993e-05, "loss": 0.9818, "step": 3187 }, { "epoch": 0.51, "grad_norm": 2.06456860929359, "learning_rate": 1.0044365259935413e-05, "loss": 0.8778, "step": 3188 }, { "epoch": 0.51, "grad_norm": 3.1968357773627907, "learning_rate": 1.0039145846028515e-05, "loss": 0.9406, "step": 3189 }, { "epoch": 0.51, "grad_norm": 3.2352368961720606, "learning_rate": 1.0033926421457208e-05, "loss": 0.8987, "step": 3190 }, { "epoch": 0.51, "grad_norm": 3.516269200961145, "learning_rate": 1.002870698764341e-05, "loss": 0.8195, "step": 3191 }, { "epoch": 0.51, "grad_norm": 2.603943614308225, "learning_rate": 1.0023487546009036e-05, "loss": 0.9121, "step": 3192 }, { "epoch": 0.51, "grad_norm": 3.2734634289135673, "learning_rate": 1.0018268097976007e-05, "loss": 0.8573, "step": 3193 }, { "epoch": 0.51, "grad_norm": 3.6676815680224504, "learning_rate": 1.0013048644966246e-05, "loss": 0.866, "step": 3194 }, { "epoch": 0.51, "grad_norm": 2.061889558100697, "learning_rate": 1.0007829188401673e-05, "loss": 0.8798, "step": 3195 }, { "epoch": 0.51, "grad_norm": 1.994629205075712, "learning_rate": 1.0002609729704213e-05, "loss": 0.9653, "step": 3196 }, { "epoch": 0.52, "grad_norm": 3.6500794598016917, "learning_rate": 9.997390270295792e-06, "loss": 0.9236, "step": 3197 }, { "epoch": 0.52, "grad_norm": 2.9691595497727694, "learning_rate": 9.992170811598332e-06, "loss": 0.9412, "step": 3198 }, { "epoch": 0.52, "grad_norm": 2.516320647732194, "learning_rate": 9.986951355033755e-06, "loss": 0.9409, "step": 3199 }, { "epoch": 0.52, "grad_norm": 3.6896140294204476, "learning_rate": 9.981731902023998e-06, "loss": 0.9024, "step": 3200 }, { "epoch": 0.52, "grad_norm": 2.5435747165882874, "learning_rate": 9.976512453990967e-06, "loss": 0.8467, "step": 3201 }, { "epoch": 0.52, "grad_norm": 3.3022874995671354, "learning_rate": 9.971293012356593e-06, "loss": 0.8723, "step": 3202 }, { "epoch": 0.52, "grad_norm": 2.2603468876863437, "learning_rate": 9.966073578542795e-06, "loss": 0.8605, "step": 3203 }, { "epoch": 0.52, "grad_norm": 2.9147984288765914, "learning_rate": 9.960854153971488e-06, "loss": 0.911, "step": 3204 }, { "epoch": 0.52, "grad_norm": 2.1327947259518663, "learning_rate": 9.955634740064588e-06, "loss": 0.8892, "step": 3205 }, { "epoch": 0.52, "grad_norm": 3.561835645333411, "learning_rate": 9.95041533824401e-06, "loss": 0.9421, "step": 3206 }, { "epoch": 0.52, "grad_norm": 2.815231437993863, "learning_rate": 9.94519594993166e-06, "loss": 0.8584, "step": 3207 }, { "epoch": 0.52, "grad_norm": 2.371625812292123, "learning_rate": 9.939976576549441e-06, "loss": 0.8813, "step": 3208 }, { "epoch": 0.52, "grad_norm": 1.8381372138392267, "learning_rate": 9.934757219519258e-06, "loss": 0.9093, "step": 3209 }, { "epoch": 0.52, "grad_norm": 2.538180435728793, "learning_rate": 9.92953788026301e-06, "loss": 0.9218, "step": 3210 }, { "epoch": 0.52, "grad_norm": 1.7533673178469893, "learning_rate": 9.924318560202584e-06, "loss": 0.3263, "step": 3211 }, { "epoch": 0.52, "grad_norm": 3.188304276068913, "learning_rate": 9.919099260759867e-06, "loss": 0.9103, "step": 3212 }, { "epoch": 0.52, "grad_norm": 2.883856194189837, "learning_rate": 9.913879983356745e-06, "loss": 0.8754, "step": 3213 }, { "epoch": 0.52, "grad_norm": 1.5855336544202763, "learning_rate": 9.908660729415087e-06, "loss": 0.3214, "step": 3214 }, { "epoch": 0.52, "grad_norm": 2.46253426232683, "learning_rate": 9.903441500356761e-06, "loss": 0.8664, "step": 3215 }, { "epoch": 0.52, "grad_norm": 2.253333589207712, "learning_rate": 9.898222297603638e-06, "loss": 0.9238, "step": 3216 }, { "epoch": 0.52, "grad_norm": 2.58370072818425, "learning_rate": 9.893003122577563e-06, "loss": 0.9104, "step": 3217 }, { "epoch": 0.52, "grad_norm": 2.548860863954011, "learning_rate": 9.887783976700387e-06, "loss": 0.9479, "step": 3218 }, { "epoch": 0.52, "grad_norm": 3.324522692617397, "learning_rate": 9.882564861393951e-06, "loss": 0.9483, "step": 3219 }, { "epoch": 0.52, "grad_norm": 3.961044279987905, "learning_rate": 9.877345778080082e-06, "loss": 0.8693, "step": 3220 }, { "epoch": 0.52, "grad_norm": 2.424851293035299, "learning_rate": 9.872126728180604e-06, "loss": 0.8618, "step": 3221 }, { "epoch": 0.52, "grad_norm": 2.4878157295481134, "learning_rate": 9.866907713117333e-06, "loss": 0.9416, "step": 3222 }, { "epoch": 0.52, "grad_norm": 2.994890152790702, "learning_rate": 9.861688734312064e-06, "loss": 0.9616, "step": 3223 }, { "epoch": 0.52, "grad_norm": 1.6856722646953093, "learning_rate": 9.8564697931866e-06, "loss": 0.343, "step": 3224 }, { "epoch": 0.52, "grad_norm": 2.1960214576506667, "learning_rate": 9.851250891162722e-06, "loss": 0.3334, "step": 3225 }, { "epoch": 0.52, "grad_norm": 3.147468360941835, "learning_rate": 9.846032029662199e-06, "loss": 0.8625, "step": 3226 }, { "epoch": 0.52, "grad_norm": 3.4626243045679908, "learning_rate": 9.840813210106795e-06, "loss": 0.7733, "step": 3227 }, { "epoch": 0.52, "grad_norm": 3.643109959289738, "learning_rate": 9.83559443391826e-06, "loss": 0.8306, "step": 3228 }, { "epoch": 0.52, "grad_norm": 3.7377119278547193, "learning_rate": 9.830375702518339e-06, "loss": 0.8168, "step": 3229 }, { "epoch": 0.52, "grad_norm": 2.0303162996392485, "learning_rate": 9.825157017328745e-06, "loss": 0.8751, "step": 3230 }, { "epoch": 0.52, "grad_norm": 2.6855032844473654, "learning_rate": 9.8199383797712e-06, "loss": 0.9451, "step": 3231 }, { "epoch": 0.52, "grad_norm": 1.4292213221887897, "learning_rate": 9.81471979126741e-06, "loss": 0.3351, "step": 3232 }, { "epoch": 0.52, "grad_norm": 3.3540772293835075, "learning_rate": 9.80950125323905e-06, "loss": 0.9159, "step": 3233 }, { "epoch": 0.52, "grad_norm": 3.6244178830744533, "learning_rate": 9.804282767107802e-06, "loss": 0.9017, "step": 3234 }, { "epoch": 0.52, "grad_norm": 3.243714343042219, "learning_rate": 9.799064334295324e-06, "loss": 0.9172, "step": 3235 }, { "epoch": 0.52, "grad_norm": 2.90493465724343, "learning_rate": 9.79384595622326e-06, "loss": 0.9286, "step": 3236 }, { "epoch": 0.52, "grad_norm": 4.011125351682999, "learning_rate": 9.788627634313237e-06, "loss": 0.8929, "step": 3237 }, { "epoch": 0.52, "grad_norm": 3.001093188426937, "learning_rate": 9.78340936998688e-06, "loss": 0.8667, "step": 3238 }, { "epoch": 0.52, "grad_norm": 2.2347454484209788, "learning_rate": 9.778191164665774e-06, "loss": 0.9059, "step": 3239 }, { "epoch": 0.52, "grad_norm": 2.3639285375505223, "learning_rate": 9.77297301977151e-06, "loss": 0.843, "step": 3240 }, { "epoch": 0.52, "grad_norm": 3.30450482235564, "learning_rate": 9.767754936725656e-06, "loss": 0.9544, "step": 3241 }, { "epoch": 0.52, "grad_norm": 3.0842886467220976, "learning_rate": 9.76253691694976e-06, "loss": 0.9198, "step": 3242 }, { "epoch": 0.52, "grad_norm": 2.308054904694582, "learning_rate": 9.757318961865348e-06, "loss": 0.8478, "step": 3243 }, { "epoch": 0.52, "grad_norm": 3.4274141254421346, "learning_rate": 9.752101072893947e-06, "loss": 0.8814, "step": 3244 }, { "epoch": 0.52, "grad_norm": 3.0062962264791495, "learning_rate": 9.746883251457043e-06, "loss": 0.9588, "step": 3245 }, { "epoch": 0.52, "grad_norm": 3.39525039131528, "learning_rate": 9.741665498976116e-06, "loss": 0.8776, "step": 3246 }, { "epoch": 0.52, "grad_norm": 2.5785460157240356, "learning_rate": 9.736447816872632e-06, "loss": 0.9554, "step": 3247 }, { "epoch": 0.52, "grad_norm": 3.921085354160791, "learning_rate": 9.731230206568029e-06, "loss": 0.9581, "step": 3248 }, { "epoch": 0.52, "grad_norm": 3.185415895577124, "learning_rate": 9.726012669483723e-06, "loss": 0.8693, "step": 3249 }, { "epoch": 0.52, "grad_norm": 4.050751350314555, "learning_rate": 9.720795207041119e-06, "loss": 0.932, "step": 3250 }, { "epoch": 0.52, "grad_norm": 2.940841073378851, "learning_rate": 9.7155778206616e-06, "loss": 0.856, "step": 3251 }, { "epoch": 0.52, "grad_norm": 2.2641726343972253, "learning_rate": 9.710360511766517e-06, "loss": 0.8619, "step": 3252 }, { "epoch": 0.52, "grad_norm": 2.04794004264625, "learning_rate": 9.705143281777218e-06, "loss": 0.8384, "step": 3253 }, { "epoch": 0.52, "grad_norm": 3.476455652545243, "learning_rate": 9.69992613211502e-06, "loss": 0.9355, "step": 3254 }, { "epoch": 0.52, "grad_norm": 2.4682624432027866, "learning_rate": 9.694709064201211e-06, "loss": 0.9469, "step": 3255 }, { "epoch": 0.52, "grad_norm": 2.715909416660131, "learning_rate": 9.68949207945707e-06, "loss": 0.9148, "step": 3256 }, { "epoch": 0.52, "grad_norm": 3.0381678659396716, "learning_rate": 9.684275179303846e-06, "loss": 0.8845, "step": 3257 }, { "epoch": 0.52, "grad_norm": 1.2208412031282427, "learning_rate": 9.679058365162765e-06, "loss": 0.8626, "step": 3258 }, { "epoch": 0.53, "grad_norm": 2.7993174708783735, "learning_rate": 9.673841638455029e-06, "loss": 0.9389, "step": 3259 }, { "epoch": 0.53, "grad_norm": 3.3158556657492713, "learning_rate": 9.66862500060183e-06, "loss": 0.8284, "step": 3260 }, { "epoch": 0.53, "grad_norm": 3.1878295428894945, "learning_rate": 9.663408453024306e-06, "loss": 0.8758, "step": 3261 }, { "epoch": 0.53, "grad_norm": 3.4109993118647783, "learning_rate": 9.658191997143602e-06, "loss": 0.9266, "step": 3262 }, { "epoch": 0.53, "grad_norm": 2.0150337239133993, "learning_rate": 9.652975634380822e-06, "loss": 0.9202, "step": 3263 }, { "epoch": 0.53, "grad_norm": 2.911646437098847, "learning_rate": 9.647759366157041e-06, "loss": 0.9025, "step": 3264 }, { "epoch": 0.53, "grad_norm": 3.3635354472118886, "learning_rate": 9.642543193893318e-06, "loss": 0.904, "step": 3265 }, { "epoch": 0.53, "grad_norm": 3.6077235022959497, "learning_rate": 9.637327119010683e-06, "loss": 0.8587, "step": 3266 }, { "epoch": 0.53, "grad_norm": 2.685308583178039, "learning_rate": 9.632111142930143e-06, "loss": 0.9156, "step": 3267 }, { "epoch": 0.53, "grad_norm": 3.391800496478258, "learning_rate": 9.626895267072662e-06, "loss": 0.8832, "step": 3268 }, { "epoch": 0.53, "grad_norm": 2.065104487364184, "learning_rate": 9.621679492859196e-06, "loss": 0.3532, "step": 3269 }, { "epoch": 0.53, "grad_norm": 1.8147455314500285, "learning_rate": 9.61646382171067e-06, "loss": 0.926, "step": 3270 }, { "epoch": 0.53, "grad_norm": 2.8933834626503407, "learning_rate": 9.611248255047965e-06, "loss": 0.9214, "step": 3271 }, { "epoch": 0.53, "grad_norm": 3.988590897244421, "learning_rate": 9.606032794291953e-06, "loss": 0.8686, "step": 3272 }, { "epoch": 0.53, "grad_norm": 2.045560108697537, "learning_rate": 9.60081744086347e-06, "loss": 0.8604, "step": 3273 }, { "epoch": 0.53, "grad_norm": 1.3982172105212594, "learning_rate": 9.595602196183317e-06, "loss": 0.9232, "step": 3274 }, { "epoch": 0.53, "grad_norm": 2.5535001182698047, "learning_rate": 9.590387061672272e-06, "loss": 0.927, "step": 3275 }, { "epoch": 0.53, "grad_norm": 2.8906520532940125, "learning_rate": 9.585172038751086e-06, "loss": 0.8366, "step": 3276 }, { "epoch": 0.53, "grad_norm": 2.8129973925413325, "learning_rate": 9.57995712884047e-06, "loss": 0.837, "step": 3277 }, { "epoch": 0.53, "grad_norm": 3.597311229087422, "learning_rate": 9.57474233336111e-06, "loss": 0.9235, "step": 3278 }, { "epoch": 0.53, "grad_norm": 3.009205205926394, "learning_rate": 9.569527653733662e-06, "loss": 0.8649, "step": 3279 }, { "epoch": 0.53, "grad_norm": 3.1385266330349233, "learning_rate": 9.564313091378746e-06, "loss": 0.8772, "step": 3280 }, { "epoch": 0.53, "grad_norm": 2.5586982065092023, "learning_rate": 9.559098647716953e-06, "loss": 0.9317, "step": 3281 }, { "epoch": 0.53, "grad_norm": 2.109884243000043, "learning_rate": 9.553884324168846e-06, "loss": 0.7961, "step": 3282 }, { "epoch": 0.53, "grad_norm": 2.5671772952582192, "learning_rate": 9.54867012215494e-06, "loss": 0.9748, "step": 3283 }, { "epoch": 0.53, "grad_norm": 4.91938549235944, "learning_rate": 9.543456043095736e-06, "loss": 0.8793, "step": 3284 }, { "epoch": 0.53, "grad_norm": 2.656960336287913, "learning_rate": 9.538242088411694e-06, "loss": 0.8991, "step": 3285 }, { "epoch": 0.53, "grad_norm": 3.253738600498063, "learning_rate": 9.533028259523233e-06, "loss": 0.8518, "step": 3286 }, { "epoch": 0.53, "grad_norm": 3.4196577857949824, "learning_rate": 9.527814557850744e-06, "loss": 0.9246, "step": 3287 }, { "epoch": 0.53, "grad_norm": 2.8206761514060488, "learning_rate": 9.522600984814587e-06, "loss": 0.9228, "step": 3288 }, { "epoch": 0.53, "grad_norm": 2.7924543000309296, "learning_rate": 9.517387541835087e-06, "loss": 0.8832, "step": 3289 }, { "epoch": 0.53, "grad_norm": 2.526251390278399, "learning_rate": 9.512174230332515e-06, "loss": 0.8965, "step": 3290 }, { "epoch": 0.53, "grad_norm": 2.519517055737633, "learning_rate": 9.506961051727132e-06, "loss": 0.8702, "step": 3291 }, { "epoch": 0.53, "grad_norm": 3.463207593978342, "learning_rate": 9.501748007439154e-06, "loss": 0.9299, "step": 3292 }, { "epoch": 0.53, "grad_norm": 3.002761523492629, "learning_rate": 9.496535098888749e-06, "loss": 0.8883, "step": 3293 }, { "epoch": 0.53, "grad_norm": 2.577901842111576, "learning_rate": 9.491322327496062e-06, "loss": 0.8628, "step": 3294 }, { "epoch": 0.53, "grad_norm": 3.145868583296445, "learning_rate": 9.486109694681195e-06, "loss": 0.9044, "step": 3295 }, { "epoch": 0.53, "grad_norm": 2.850331832574259, "learning_rate": 9.480897201864214e-06, "loss": 0.8179, "step": 3296 }, { "epoch": 0.53, "grad_norm": 2.781903050936238, "learning_rate": 9.47568485046514e-06, "loss": 0.9396, "step": 3297 }, { "epoch": 0.53, "grad_norm": 2.7816784132276506, "learning_rate": 9.470472641903972e-06, "loss": 0.8749, "step": 3298 }, { "epoch": 0.53, "grad_norm": 3.187901340724163, "learning_rate": 9.465260577600645e-06, "loss": 0.9274, "step": 3299 }, { "epoch": 0.53, "grad_norm": 2.964691922166513, "learning_rate": 9.460048658975082e-06, "loss": 0.9447, "step": 3300 }, { "epoch": 0.53, "grad_norm": 1.8575180782911882, "learning_rate": 9.454836887447149e-06, "loss": 0.9376, "step": 3301 }, { "epoch": 0.53, "grad_norm": 3.544453874281162, "learning_rate": 9.449625264436673e-06, "loss": 0.901, "step": 3302 }, { "epoch": 0.53, "grad_norm": 3.26095919416785, "learning_rate": 9.444413791363446e-06, "loss": 0.892, "step": 3303 }, { "epoch": 0.53, "grad_norm": 1.5361178349506244, "learning_rate": 9.43920246964722e-06, "loss": 0.9214, "step": 3304 }, { "epoch": 0.53, "grad_norm": 1.7373377607605838, "learning_rate": 9.433991300707694e-06, "loss": 0.9538, "step": 3305 }, { "epoch": 0.53, "grad_norm": 2.5603664168746763, "learning_rate": 9.428780285964544e-06, "loss": 0.8803, "step": 3306 }, { "epoch": 0.53, "grad_norm": 1.8358863336453917, "learning_rate": 9.423569426837387e-06, "loss": 0.9692, "step": 3307 }, { "epoch": 0.53, "grad_norm": 3.4327523477568658, "learning_rate": 9.418358724745813e-06, "loss": 0.8571, "step": 3308 }, { "epoch": 0.53, "grad_norm": 1.8736087087708984, "learning_rate": 9.413148181109352e-06, "loss": 0.9202, "step": 3309 }, { "epoch": 0.53, "grad_norm": 3.858277570240508, "learning_rate": 9.4079377973475e-06, "loss": 0.8458, "step": 3310 }, { "epoch": 0.53, "grad_norm": 1.6305274854096357, "learning_rate": 9.40272757487972e-06, "loss": 0.9795, "step": 3311 }, { "epoch": 0.53, "grad_norm": 4.030229306193141, "learning_rate": 9.397517515125405e-06, "loss": 0.8373, "step": 3312 }, { "epoch": 0.53, "grad_norm": 2.2894981412394757, "learning_rate": 9.392307619503928e-06, "loss": 0.8306, "step": 3313 }, { "epoch": 0.53, "grad_norm": 1.9824754529205386, "learning_rate": 9.38709788943461e-06, "loss": 0.3417, "step": 3314 }, { "epoch": 0.53, "grad_norm": 2.5903013351591917, "learning_rate": 9.38188832633672e-06, "loss": 0.9072, "step": 3315 }, { "epoch": 0.53, "grad_norm": 3.7430287731551677, "learning_rate": 9.376678931629488e-06, "loss": 0.9103, "step": 3316 }, { "epoch": 0.53, "grad_norm": 3.626015093511612, "learning_rate": 9.371469706732097e-06, "loss": 0.9474, "step": 3317 }, { "epoch": 0.53, "grad_norm": 2.896099124118294, "learning_rate": 9.366260653063684e-06, "loss": 0.8384, "step": 3318 }, { "epoch": 0.53, "grad_norm": 3.538554376162543, "learning_rate": 9.361051772043335e-06, "loss": 0.8891, "step": 3319 }, { "epoch": 0.53, "grad_norm": 2.0074962559875313, "learning_rate": 9.355843065090103e-06, "loss": 0.8967, "step": 3320 }, { "epoch": 0.54, "grad_norm": 1.9638495840443637, "learning_rate": 9.35063453362297e-06, "loss": 0.8708, "step": 3321 }, { "epoch": 0.54, "grad_norm": 3.0297531679541203, "learning_rate": 9.34542617906089e-06, "loss": 0.9489, "step": 3322 }, { "epoch": 0.54, "grad_norm": 2.8417623091755204, "learning_rate": 9.340218002822766e-06, "loss": 0.9063, "step": 3323 }, { "epoch": 0.54, "grad_norm": 3.3599576223062466, "learning_rate": 9.335010006327443e-06, "loss": 0.9248, "step": 3324 }, { "epoch": 0.54, "grad_norm": 2.4223838025580817, "learning_rate": 9.329802190993726e-06, "loss": 0.9101, "step": 3325 }, { "epoch": 0.54, "grad_norm": 3.653310131802174, "learning_rate": 9.324594558240361e-06, "loss": 0.9058, "step": 3326 }, { "epoch": 0.54, "grad_norm": 2.6861089610639417, "learning_rate": 9.319387109486065e-06, "loss": 0.9055, "step": 3327 }, { "epoch": 0.54, "grad_norm": 3.64558393806132, "learning_rate": 9.314179846149474e-06, "loss": 0.8852, "step": 3328 }, { "epoch": 0.54, "grad_norm": 3.082542696097915, "learning_rate": 9.3089727696492e-06, "loss": 0.8356, "step": 3329 }, { "epoch": 0.54, "grad_norm": 3.6252993756025362, "learning_rate": 9.303765881403794e-06, "loss": 0.8569, "step": 3330 }, { "epoch": 0.54, "grad_norm": 2.4689894934268843, "learning_rate": 9.298559182831752e-06, "loss": 0.9377, "step": 3331 }, { "epoch": 0.54, "grad_norm": 2.5137965031386083, "learning_rate": 9.293352675351524e-06, "loss": 0.9273, "step": 3332 }, { "epoch": 0.54, "grad_norm": 2.3644497087228027, "learning_rate": 9.288146360381507e-06, "loss": 0.3195, "step": 3333 }, { "epoch": 0.54, "grad_norm": 2.724133302941922, "learning_rate": 9.28294023934004e-06, "loss": 0.8788, "step": 3334 }, { "epoch": 0.54, "grad_norm": 2.7998674463503748, "learning_rate": 9.277734313645422e-06, "loss": 0.8262, "step": 3335 }, { "epoch": 0.54, "grad_norm": 3.0703955874472473, "learning_rate": 9.272528584715886e-06, "loss": 0.9466, "step": 3336 }, { "epoch": 0.54, "grad_norm": 2.1158882461920183, "learning_rate": 9.267323053969615e-06, "loss": 0.9757, "step": 3337 }, { "epoch": 0.54, "grad_norm": 3.3847447750728286, "learning_rate": 9.26211772282474e-06, "loss": 0.8808, "step": 3338 }, { "epoch": 0.54, "grad_norm": 2.375284378694305, "learning_rate": 9.256912592699339e-06, "loss": 0.9039, "step": 3339 }, { "epoch": 0.54, "grad_norm": 2.5532292394941662, "learning_rate": 9.251707665011429e-06, "loss": 0.9109, "step": 3340 }, { "epoch": 0.54, "grad_norm": 1.2771330960127178, "learning_rate": 9.246502941178976e-06, "loss": 0.8709, "step": 3341 }, { "epoch": 0.54, "grad_norm": 3.259003126631598, "learning_rate": 9.2412984226199e-06, "loss": 0.8936, "step": 3342 }, { "epoch": 0.54, "grad_norm": 2.5564919727959667, "learning_rate": 9.236094110752038e-06, "loss": 0.8691, "step": 3343 }, { "epoch": 0.54, "grad_norm": 1.641599892292614, "learning_rate": 9.230890006993203e-06, "loss": 0.7875, "step": 3344 }, { "epoch": 0.54, "grad_norm": 4.0468309319059355, "learning_rate": 9.225686112761128e-06, "loss": 0.8981, "step": 3345 }, { "epoch": 0.54, "grad_norm": 3.797157685231314, "learning_rate": 9.220482429473502e-06, "loss": 0.8098, "step": 3346 }, { "epoch": 0.54, "grad_norm": 3.3242950609998343, "learning_rate": 9.215278958547951e-06, "loss": 0.8746, "step": 3347 }, { "epoch": 0.54, "grad_norm": 3.6970040641434205, "learning_rate": 9.210075701402037e-06, "loss": 0.9114, "step": 3348 }, { "epoch": 0.54, "grad_norm": 2.172830604709321, "learning_rate": 9.204872659453285e-06, "loss": 0.9253, "step": 3349 }, { "epoch": 0.54, "grad_norm": 1.698428687219295, "learning_rate": 9.19966983411913e-06, "loss": 0.8889, "step": 3350 }, { "epoch": 0.54, "grad_norm": 4.190288469312666, "learning_rate": 9.194467226816976e-06, "loss": 0.8453, "step": 3351 }, { "epoch": 0.54, "grad_norm": 2.9335783083086437, "learning_rate": 9.189264838964159e-06, "loss": 0.9105, "step": 3352 }, { "epoch": 0.54, "grad_norm": 1.7965118041145731, "learning_rate": 9.184062671977942e-06, "loss": 0.9554, "step": 3353 }, { "epoch": 0.54, "grad_norm": 2.4332197707166117, "learning_rate": 9.178860727275546e-06, "loss": 0.8884, "step": 3354 }, { "epoch": 0.54, "grad_norm": 2.13383464982767, "learning_rate": 9.173659006274125e-06, "loss": 0.8968, "step": 3355 }, { "epoch": 0.54, "grad_norm": 4.593031431296964, "learning_rate": 9.168457510390764e-06, "loss": 0.8858, "step": 3356 }, { "epoch": 0.54, "grad_norm": 3.6038231364386446, "learning_rate": 9.163256241042502e-06, "loss": 0.8894, "step": 3357 }, { "epoch": 0.54, "grad_norm": 3.5986137317167146, "learning_rate": 9.158055199646306e-06, "loss": 0.9727, "step": 3358 }, { "epoch": 0.54, "grad_norm": 3.559426839315352, "learning_rate": 9.152854387619081e-06, "loss": 0.8581, "step": 3359 }, { "epoch": 0.54, "grad_norm": 2.9152754235728886, "learning_rate": 9.147653806377672e-06, "loss": 0.9289, "step": 3360 }, { "epoch": 0.54, "grad_norm": 1.6880985893773757, "learning_rate": 9.142453457338864e-06, "loss": 0.3256, "step": 3361 }, { "epoch": 0.54, "grad_norm": 2.338797359180737, "learning_rate": 9.137253341919369e-06, "loss": 0.8835, "step": 3362 }, { "epoch": 0.54, "grad_norm": 2.370197258639213, "learning_rate": 9.132053461535844e-06, "loss": 0.9543, "step": 3363 }, { "epoch": 0.54, "grad_norm": 2.341672052488095, "learning_rate": 9.126853817604887e-06, "loss": 0.9761, "step": 3364 }, { "epoch": 0.54, "grad_norm": 3.7062014906283416, "learning_rate": 9.121654411543013e-06, "loss": 0.872, "step": 3365 }, { "epoch": 0.54, "grad_norm": 2.8523483238716407, "learning_rate": 9.11645524476669e-06, "loss": 0.8784, "step": 3366 }, { "epoch": 0.54, "grad_norm": 2.627404882449753, "learning_rate": 9.111256318692316e-06, "loss": 0.8947, "step": 3367 }, { "epoch": 0.54, "grad_norm": 3.458011259838369, "learning_rate": 9.10605763473622e-06, "loss": 0.9205, "step": 3368 }, { "epoch": 0.54, "grad_norm": 3.4181890275001128, "learning_rate": 9.100859194314661e-06, "loss": 0.8983, "step": 3369 }, { "epoch": 0.54, "grad_norm": 3.122300011588528, "learning_rate": 9.095660998843842e-06, "loss": 0.8118, "step": 3370 }, { "epoch": 0.54, "grad_norm": 2.493273172706716, "learning_rate": 9.090463049739903e-06, "loss": 0.8499, "step": 3371 }, { "epoch": 0.54, "grad_norm": 2.454267102330657, "learning_rate": 9.085265348418894e-06, "loss": 0.8449, "step": 3372 }, { "epoch": 0.54, "grad_norm": 2.380577264709283, "learning_rate": 9.08006789629682e-06, "loss": 0.7983, "step": 3373 }, { "epoch": 0.54, "grad_norm": 2.499994087212246, "learning_rate": 9.074870694789613e-06, "loss": 0.8458, "step": 3374 }, { "epoch": 0.54, "grad_norm": 3.831353394947417, "learning_rate": 9.069673745313127e-06, "loss": 0.8798, "step": 3375 }, { "epoch": 0.54, "grad_norm": 2.211454620813412, "learning_rate": 9.064477049283157e-06, "loss": 0.8704, "step": 3376 }, { "epoch": 0.54, "grad_norm": 2.8411175627078427, "learning_rate": 9.059280608115427e-06, "loss": 0.8823, "step": 3377 }, { "epoch": 0.54, "grad_norm": 2.79025610592607, "learning_rate": 9.054084423225592e-06, "loss": 0.9124, "step": 3378 }, { "epoch": 0.54, "grad_norm": 2.337631886887354, "learning_rate": 9.04888849602923e-06, "loss": 0.9553, "step": 3379 }, { "epoch": 0.54, "grad_norm": 2.5619557430724034, "learning_rate": 9.043692827941864e-06, "loss": 0.9127, "step": 3380 }, { "epoch": 0.54, "grad_norm": 3.111653628806404, "learning_rate": 9.03849742037893e-06, "loss": 0.967, "step": 3381 }, { "epoch": 0.54, "grad_norm": 1.523927502201848, "learning_rate": 9.033302274755803e-06, "loss": 0.3273, "step": 3382 }, { "epoch": 0.55, "grad_norm": 2.453032570816692, "learning_rate": 9.028107392487787e-06, "loss": 0.8621, "step": 3383 }, { "epoch": 0.55, "grad_norm": 2.045190015699613, "learning_rate": 9.022912774990104e-06, "loss": 0.8957, "step": 3384 }, { "epoch": 0.55, "grad_norm": 2.7122204298105257, "learning_rate": 9.01771842367791e-06, "loss": 0.8711, "step": 3385 }, { "epoch": 0.55, "grad_norm": 3.106421980519892, "learning_rate": 9.0125243399663e-06, "loss": 0.9349, "step": 3386 }, { "epoch": 0.55, "grad_norm": 3.0448158544222848, "learning_rate": 9.007330525270282e-06, "loss": 0.804, "step": 3387 }, { "epoch": 0.55, "grad_norm": 3.1711797844943983, "learning_rate": 9.002136981004787e-06, "loss": 0.8783, "step": 3388 }, { "epoch": 0.55, "grad_norm": 2.5155613103350687, "learning_rate": 8.996943708584688e-06, "loss": 0.853, "step": 3389 }, { "epoch": 0.55, "grad_norm": 2.8109428333615676, "learning_rate": 8.991750709424772e-06, "loss": 0.903, "step": 3390 }, { "epoch": 0.55, "grad_norm": 2.9850306564622633, "learning_rate": 8.986557984939754e-06, "loss": 0.9683, "step": 3391 }, { "epoch": 0.55, "grad_norm": 2.047618235992406, "learning_rate": 8.981365536544275e-06, "loss": 0.8947, "step": 3392 }, { "epoch": 0.55, "grad_norm": 3.0150356210910267, "learning_rate": 8.976173365652909e-06, "loss": 0.9699, "step": 3393 }, { "epoch": 0.55, "grad_norm": 2.7756964282178074, "learning_rate": 8.970981473680132e-06, "loss": 0.8859, "step": 3394 }, { "epoch": 0.55, "grad_norm": 1.7117988125468602, "learning_rate": 8.96578986204037e-06, "loss": 0.3253, "step": 3395 }, { "epoch": 0.55, "grad_norm": 2.369253132311586, "learning_rate": 8.96059853214796e-06, "loss": 0.9636, "step": 3396 }, { "epoch": 0.55, "grad_norm": 2.327743908714901, "learning_rate": 8.955407485417158e-06, "loss": 0.9297, "step": 3397 }, { "epoch": 0.55, "grad_norm": 3.290558173650368, "learning_rate": 8.950216723262152e-06, "loss": 0.8833, "step": 3398 }, { "epoch": 0.55, "grad_norm": 2.3697315809296877, "learning_rate": 8.94502624709705e-06, "loss": 0.8403, "step": 3399 }, { "epoch": 0.55, "grad_norm": 1.9319381870388137, "learning_rate": 8.939836058335878e-06, "loss": 0.9364, "step": 3400 }, { "epoch": 0.55, "grad_norm": 1.3363909035627428, "learning_rate": 8.934646158392584e-06, "loss": 0.9075, "step": 3401 }, { "epoch": 0.55, "grad_norm": 2.8735694643369296, "learning_rate": 8.92945654868105e-06, "loss": 0.8387, "step": 3402 }, { "epoch": 0.55, "grad_norm": 2.339246058985929, "learning_rate": 8.924267230615054e-06, "loss": 0.923, "step": 3403 }, { "epoch": 0.55, "grad_norm": 4.407917329501068, "learning_rate": 8.91907820560832e-06, "loss": 0.8673, "step": 3404 }, { "epoch": 0.55, "grad_norm": 3.7351550959362108, "learning_rate": 8.913889475074479e-06, "loss": 0.8887, "step": 3405 }, { "epoch": 0.55, "grad_norm": 2.9982310483943135, "learning_rate": 8.908701040427086e-06, "loss": 0.8971, "step": 3406 }, { "epoch": 0.55, "grad_norm": 2.180934644927618, "learning_rate": 8.903512903079608e-06, "loss": 0.3368, "step": 3407 }, { "epoch": 0.55, "grad_norm": 2.7081505249144633, "learning_rate": 8.89832506444544e-06, "loss": 0.8336, "step": 3408 }, { "epoch": 0.55, "grad_norm": 4.012364826718222, "learning_rate": 8.893137525937894e-06, "loss": 0.9269, "step": 3409 }, { "epoch": 0.55, "grad_norm": 2.707976029130783, "learning_rate": 8.887950288970198e-06, "loss": 0.9057, "step": 3410 }, { "epoch": 0.55, "grad_norm": 3.032172780320138, "learning_rate": 8.882763354955495e-06, "loss": 0.932, "step": 3411 }, { "epoch": 0.55, "grad_norm": 3.582961100089, "learning_rate": 8.877576725306853e-06, "loss": 0.9374, "step": 3412 }, { "epoch": 0.55, "grad_norm": 3.1708289615502383, "learning_rate": 8.87239040143725e-06, "loss": 0.8562, "step": 3413 }, { "epoch": 0.55, "grad_norm": 3.2743329482664887, "learning_rate": 8.86720438475958e-06, "loss": 0.929, "step": 3414 }, { "epoch": 0.55, "grad_norm": 2.395382504270739, "learning_rate": 8.86201867668667e-06, "loss": 0.9123, "step": 3415 }, { "epoch": 0.55, "grad_norm": 2.3965457575739615, "learning_rate": 8.856833278631232e-06, "loss": 0.9506, "step": 3416 }, { "epoch": 0.55, "grad_norm": 3.088579268760001, "learning_rate": 8.851648192005925e-06, "loss": 0.8546, "step": 3417 }, { "epoch": 0.55, "grad_norm": 3.2757280661561823, "learning_rate": 8.846463418223307e-06, "loss": 0.8952, "step": 3418 }, { "epoch": 0.55, "grad_norm": 2.4497925300725303, "learning_rate": 8.841278958695848e-06, "loss": 0.8602, "step": 3419 }, { "epoch": 0.55, "grad_norm": 1.7034105533980157, "learning_rate": 8.836094814835941e-06, "loss": 0.9323, "step": 3420 }, { "epoch": 0.55, "grad_norm": 3.171457648420148, "learning_rate": 8.830910988055892e-06, "loss": 0.8838, "step": 3421 }, { "epoch": 0.55, "grad_norm": 5.156877144330071, "learning_rate": 8.825727479767916e-06, "loss": 0.952, "step": 3422 }, { "epoch": 0.55, "grad_norm": 2.970388341875569, "learning_rate": 8.820544291384138e-06, "loss": 0.8758, "step": 3423 }, { "epoch": 0.55, "grad_norm": 1.928809580151544, "learning_rate": 8.815361424316617e-06, "loss": 0.9853, "step": 3424 }, { "epoch": 0.55, "grad_norm": 1.7248124449119941, "learning_rate": 8.81017887997729e-06, "loss": 0.3336, "step": 3425 }, { "epoch": 0.55, "grad_norm": 3.051378413916332, "learning_rate": 8.804996659778036e-06, "loss": 0.8619, "step": 3426 }, { "epoch": 0.55, "grad_norm": 2.344247383746778, "learning_rate": 8.799814765130631e-06, "loss": 0.8901, "step": 3427 }, { "epoch": 0.55, "grad_norm": 2.7018952852538174, "learning_rate": 8.79463319744677e-06, "loss": 0.8666, "step": 3428 }, { "epoch": 0.55, "grad_norm": 3.2560254508296147, "learning_rate": 8.789451958138053e-06, "loss": 0.9619, "step": 3429 }, { "epoch": 0.55, "grad_norm": 1.5893359673822793, "learning_rate": 8.784271048615987e-06, "loss": 0.8548, "step": 3430 }, { "epoch": 0.55, "grad_norm": 1.7104653181559273, "learning_rate": 8.779090470292006e-06, "loss": 0.93, "step": 3431 }, { "epoch": 0.55, "grad_norm": 2.23331218064193, "learning_rate": 8.773910224577428e-06, "loss": 0.8901, "step": 3432 }, { "epoch": 0.55, "grad_norm": 3.1864659745240207, "learning_rate": 8.768730312883505e-06, "loss": 0.9275, "step": 3433 }, { "epoch": 0.55, "grad_norm": 3.7679810810302583, "learning_rate": 8.763550736621388e-06, "loss": 0.9035, "step": 3434 }, { "epoch": 0.55, "grad_norm": 4.137689204658284, "learning_rate": 8.758371497202131e-06, "loss": 0.9149, "step": 3435 }, { "epoch": 0.55, "grad_norm": 2.3606139713077194, "learning_rate": 8.753192596036703e-06, "loss": 0.951, "step": 3436 }, { "epoch": 0.55, "grad_norm": 2.7822891180116516, "learning_rate": 8.748014034535983e-06, "loss": 0.8783, "step": 3437 }, { "epoch": 0.55, "grad_norm": 2.529172445057066, "learning_rate": 8.742835814110746e-06, "loss": 0.8561, "step": 3438 }, { "epoch": 0.55, "grad_norm": 1.3939227317893546, "learning_rate": 8.737657936171691e-06, "loss": 0.941, "step": 3439 }, { "epoch": 0.55, "grad_norm": 4.0524726948650684, "learning_rate": 8.73248040212941e-06, "loss": 0.9215, "step": 3440 }, { "epoch": 0.55, "grad_norm": 3.0505441336610053, "learning_rate": 8.727303213394408e-06, "loss": 0.8602, "step": 3441 }, { "epoch": 0.55, "grad_norm": 4.201873025594392, "learning_rate": 8.722126371377091e-06, "loss": 0.8675, "step": 3442 }, { "epoch": 0.55, "grad_norm": 3.3991264960242535, "learning_rate": 8.716949877487778e-06, "loss": 0.9032, "step": 3443 }, { "epoch": 0.55, "grad_norm": 2.5119363029082096, "learning_rate": 8.711773733136684e-06, "loss": 0.9547, "step": 3444 }, { "epoch": 0.56, "grad_norm": 3.3395523390320263, "learning_rate": 8.706597939733931e-06, "loss": 0.8295, "step": 3445 }, { "epoch": 0.56, "grad_norm": 2.750457032112672, "learning_rate": 8.701422498689556e-06, "loss": 0.8469, "step": 3446 }, { "epoch": 0.56, "grad_norm": 3.362025860679602, "learning_rate": 8.69624741141349e-06, "loss": 0.8928, "step": 3447 }, { "epoch": 0.56, "grad_norm": 2.866719470166183, "learning_rate": 8.691072679315566e-06, "loss": 0.9053, "step": 3448 }, { "epoch": 0.56, "grad_norm": 2.1152985088456564, "learning_rate": 8.685898303805523e-06, "loss": 0.8516, "step": 3449 }, { "epoch": 0.56, "grad_norm": 3.6755167331846565, "learning_rate": 8.680724286293008e-06, "loss": 0.8786, "step": 3450 }, { "epoch": 0.56, "grad_norm": 2.7867812970721584, "learning_rate": 8.675550628187562e-06, "loss": 0.9461, "step": 3451 }, { "epoch": 0.56, "grad_norm": 2.943508577267238, "learning_rate": 8.670377330898631e-06, "loss": 0.9529, "step": 3452 }, { "epoch": 0.56, "grad_norm": 3.1468620050004774, "learning_rate": 8.665204395835573e-06, "loss": 0.8602, "step": 3453 }, { "epoch": 0.56, "grad_norm": 3.6430534154979646, "learning_rate": 8.660031824407625e-06, "loss": 0.8416, "step": 3454 }, { "epoch": 0.56, "grad_norm": 3.153386091418673, "learning_rate": 8.654859618023946e-06, "loss": 0.9585, "step": 3455 }, { "epoch": 0.56, "grad_norm": 3.8280049207888935, "learning_rate": 8.64968777809359e-06, "loss": 0.9327, "step": 3456 }, { "epoch": 0.56, "grad_norm": 3.970259492018118, "learning_rate": 8.644516306025501e-06, "loss": 0.9218, "step": 3457 }, { "epoch": 0.56, "grad_norm": 3.129650622725276, "learning_rate": 8.639345203228536e-06, "loss": 0.9523, "step": 3458 }, { "epoch": 0.56, "grad_norm": 2.766696442210208, "learning_rate": 8.634174471111445e-06, "loss": 0.9185, "step": 3459 }, { "epoch": 0.56, "grad_norm": 3.62308056756571, "learning_rate": 8.629004111082875e-06, "loss": 0.8642, "step": 3460 }, { "epoch": 0.56, "grad_norm": 4.113024349467969, "learning_rate": 8.623834124551375e-06, "loss": 0.9192, "step": 3461 }, { "epoch": 0.56, "grad_norm": 2.3394239045804133, "learning_rate": 8.618664512925398e-06, "loss": 0.8993, "step": 3462 }, { "epoch": 0.56, "grad_norm": 3.0904862884775235, "learning_rate": 8.613495277613283e-06, "loss": 0.9088, "step": 3463 }, { "epoch": 0.56, "grad_norm": 3.2598238437543783, "learning_rate": 8.608326420023272e-06, "loss": 0.9404, "step": 3464 }, { "epoch": 0.56, "grad_norm": 3.5531556570161458, "learning_rate": 8.603157941563506e-06, "loss": 0.922, "step": 3465 }, { "epoch": 0.56, "grad_norm": 4.288758758740555, "learning_rate": 8.597989843642025e-06, "loss": 0.8348, "step": 3466 }, { "epoch": 0.56, "grad_norm": 3.061968076015025, "learning_rate": 8.592822127666751e-06, "loss": 0.8659, "step": 3467 }, { "epoch": 0.56, "grad_norm": 3.03142775181556, "learning_rate": 8.58765479504552e-06, "loss": 0.9478, "step": 3468 }, { "epoch": 0.56, "grad_norm": 3.1249414057006732, "learning_rate": 8.582487847186061e-06, "loss": 0.8549, "step": 3469 }, { "epoch": 0.56, "grad_norm": 2.665901958571632, "learning_rate": 8.577321285495981e-06, "loss": 0.9709, "step": 3470 }, { "epoch": 0.56, "grad_norm": 4.269986895288731, "learning_rate": 8.572155111382799e-06, "loss": 0.9101, "step": 3471 }, { "epoch": 0.56, "grad_norm": 3.405641413925809, "learning_rate": 8.566989326253924e-06, "loss": 0.9195, "step": 3472 }, { "epoch": 0.56, "grad_norm": 3.428656142754981, "learning_rate": 8.56182393151666e-06, "loss": 0.8811, "step": 3473 }, { "epoch": 0.56, "grad_norm": 2.451124602694063, "learning_rate": 8.556658928578196e-06, "loss": 0.9159, "step": 3474 }, { "epoch": 0.56, "grad_norm": 2.9028917824616878, "learning_rate": 8.55149431884563e-06, "loss": 0.8252, "step": 3475 }, { "epoch": 0.56, "grad_norm": 2.5940318126860067, "learning_rate": 8.546330103725937e-06, "loss": 0.8819, "step": 3476 }, { "epoch": 0.56, "grad_norm": 2.7979708607855756, "learning_rate": 8.541166284625995e-06, "loss": 0.9594, "step": 3477 }, { "epoch": 0.56, "grad_norm": 2.8929514827516853, "learning_rate": 8.536002862952572e-06, "loss": 0.9197, "step": 3478 }, { "epoch": 0.56, "grad_norm": 1.3074807059503766, "learning_rate": 8.530839840112324e-06, "loss": 0.8972, "step": 3479 }, { "epoch": 0.56, "grad_norm": 2.6830604611926896, "learning_rate": 8.525677217511799e-06, "loss": 0.8718, "step": 3480 }, { "epoch": 0.56, "grad_norm": 4.057254397703041, "learning_rate": 8.520514996557443e-06, "loss": 0.9049, "step": 3481 }, { "epoch": 0.56, "grad_norm": 3.410176385249834, "learning_rate": 8.515353178655582e-06, "loss": 0.7943, "step": 3482 }, { "epoch": 0.56, "grad_norm": 3.0451573934733474, "learning_rate": 8.510191765212438e-06, "loss": 0.8914, "step": 3483 }, { "epoch": 0.56, "grad_norm": 3.1919246705423663, "learning_rate": 8.505030757634125e-06, "loss": 0.8884, "step": 3484 }, { "epoch": 0.56, "grad_norm": 3.6048103301671075, "learning_rate": 8.499870157326647e-06, "loss": 0.9324, "step": 3485 }, { "epoch": 0.56, "grad_norm": 2.506688897189861, "learning_rate": 8.494709965695885e-06, "loss": 0.9334, "step": 3486 }, { "epoch": 0.56, "grad_norm": 3.969184791597775, "learning_rate": 8.489550184147621e-06, "loss": 0.7678, "step": 3487 }, { "epoch": 0.56, "grad_norm": 1.797871189124267, "learning_rate": 8.484390814087527e-06, "loss": 0.9564, "step": 3488 }, { "epoch": 0.56, "grad_norm": 3.0543548324790275, "learning_rate": 8.479231856921149e-06, "loss": 0.8227, "step": 3489 }, { "epoch": 0.56, "grad_norm": 1.8006063446699214, "learning_rate": 8.474073314053935e-06, "loss": 0.3667, "step": 3490 }, { "epoch": 0.56, "grad_norm": 3.246331859169046, "learning_rate": 8.468915186891215e-06, "loss": 0.8784, "step": 3491 }, { "epoch": 0.56, "grad_norm": 2.36837305778818, "learning_rate": 8.4637574768382e-06, "loss": 0.9335, "step": 3492 }, { "epoch": 0.56, "grad_norm": 1.3549235453977804, "learning_rate": 8.458600185299994e-06, "loss": 0.9196, "step": 3493 }, { "epoch": 0.56, "grad_norm": 2.360912807090581, "learning_rate": 8.453443313681591e-06, "loss": 0.8862, "step": 3494 }, { "epoch": 0.56, "grad_norm": 1.7361047041033035, "learning_rate": 8.448286863387858e-06, "loss": 0.3337, "step": 3495 }, { "epoch": 0.56, "grad_norm": 1.969301661146096, "learning_rate": 8.443130835823553e-06, "loss": 0.9314, "step": 3496 }, { "epoch": 0.56, "grad_norm": 3.480689182001412, "learning_rate": 8.437975232393331e-06, "loss": 0.9117, "step": 3497 }, { "epoch": 0.56, "grad_norm": 3.3645570043393853, "learning_rate": 8.432820054501706e-06, "loss": 0.9064, "step": 3498 }, { "epoch": 0.56, "grad_norm": 2.0728548382559153, "learning_rate": 8.427665303553101e-06, "loss": 0.9506, "step": 3499 }, { "epoch": 0.56, "grad_norm": 1.6827179296869372, "learning_rate": 8.42251098095181e-06, "loss": 0.9296, "step": 3500 }, { "epoch": 0.56, "grad_norm": 3.138766217905772, "learning_rate": 8.41735708810201e-06, "loss": 0.9744, "step": 3501 }, { "epoch": 0.56, "grad_norm": 3.152290201973171, "learning_rate": 8.412203626407765e-06, "loss": 0.8934, "step": 3502 }, { "epoch": 0.56, "grad_norm": 3.4013109709053038, "learning_rate": 8.407050597273024e-06, "loss": 0.8904, "step": 3503 }, { "epoch": 0.56, "grad_norm": 3.533469928644264, "learning_rate": 8.401898002101605e-06, "loss": 0.8853, "step": 3504 }, { "epoch": 0.56, "grad_norm": 2.6574853156357845, "learning_rate": 8.39674584229722e-06, "loss": 0.9307, "step": 3505 }, { "epoch": 0.56, "grad_norm": 2.656275221761001, "learning_rate": 8.391594119263467e-06, "loss": 0.8452, "step": 3506 }, { "epoch": 0.57, "grad_norm": 2.7173971559212746, "learning_rate": 8.386442834403811e-06, "loss": 0.9516, "step": 3507 }, { "epoch": 0.57, "grad_norm": 2.7782624733105163, "learning_rate": 8.381291989121604e-06, "loss": 0.9557, "step": 3508 }, { "epoch": 0.57, "grad_norm": 2.5909910120629642, "learning_rate": 8.376141584820078e-06, "loss": 0.8832, "step": 3509 }, { "epoch": 0.57, "grad_norm": 3.2994801140683245, "learning_rate": 8.37099162290235e-06, "loss": 0.9583, "step": 3510 }, { "epoch": 0.57, "grad_norm": 2.496507780454504, "learning_rate": 8.365842104771405e-06, "loss": 0.8217, "step": 3511 }, { "epoch": 0.57, "grad_norm": 3.806269949123176, "learning_rate": 8.360693031830114e-06, "loss": 0.8402, "step": 3512 }, { "epoch": 0.57, "grad_norm": 2.6121395793324833, "learning_rate": 8.355544405481233e-06, "loss": 0.8742, "step": 3513 }, { "epoch": 0.57, "grad_norm": 2.27315800053134, "learning_rate": 8.350396227127383e-06, "loss": 0.9293, "step": 3514 }, { "epoch": 0.57, "grad_norm": 3.9388517451242513, "learning_rate": 8.345248498171073e-06, "loss": 0.911, "step": 3515 }, { "epoch": 0.57, "grad_norm": 3.9975712078114927, "learning_rate": 8.340101220014688e-06, "loss": 0.8822, "step": 3516 }, { "epoch": 0.57, "grad_norm": 3.9114372296870914, "learning_rate": 8.334954394060484e-06, "loss": 0.8717, "step": 3517 }, { "epoch": 0.57, "grad_norm": 3.1789324939588544, "learning_rate": 8.329808021710598e-06, "loss": 0.8874, "step": 3518 }, { "epoch": 0.57, "grad_norm": 3.5475843619275182, "learning_rate": 8.324662104367052e-06, "loss": 0.8953, "step": 3519 }, { "epoch": 0.57, "grad_norm": 3.1002249728369953, "learning_rate": 8.319516643431723e-06, "loss": 0.8719, "step": 3520 }, { "epoch": 0.57, "grad_norm": 2.813151474840619, "learning_rate": 8.314371640306386e-06, "loss": 0.9289, "step": 3521 }, { "epoch": 0.57, "grad_norm": 1.9561456743886232, "learning_rate": 8.309227096392682e-06, "loss": 0.8475, "step": 3522 }, { "epoch": 0.57, "grad_norm": 3.498692677073595, "learning_rate": 8.30408301309212e-06, "loss": 0.8378, "step": 3523 }, { "epoch": 0.57, "grad_norm": 3.396386380397008, "learning_rate": 8.298939391806094e-06, "loss": 0.9237, "step": 3524 }, { "epoch": 0.57, "grad_norm": 3.407390018739729, "learning_rate": 8.293796233935864e-06, "loss": 0.8628, "step": 3525 }, { "epoch": 0.57, "grad_norm": 2.5190537111038878, "learning_rate": 8.288653540882579e-06, "loss": 0.9255, "step": 3526 }, { "epoch": 0.57, "grad_norm": 3.5632739648491496, "learning_rate": 8.283511314047236e-06, "loss": 0.8588, "step": 3527 }, { "epoch": 0.57, "grad_norm": 4.086072405307573, "learning_rate": 8.27836955483073e-06, "loss": 0.7749, "step": 3528 }, { "epoch": 0.57, "grad_norm": 3.166355987835104, "learning_rate": 8.273228264633815e-06, "loss": 0.8484, "step": 3529 }, { "epoch": 0.57, "grad_norm": 2.744934966215047, "learning_rate": 8.268087444857119e-06, "loss": 0.86, "step": 3530 }, { "epoch": 0.57, "grad_norm": 2.2273636296889388, "learning_rate": 8.262947096901142e-06, "loss": 0.8729, "step": 3531 }, { "epoch": 0.57, "grad_norm": 3.5651948340037096, "learning_rate": 8.25780722216626e-06, "loss": 0.8799, "step": 3532 }, { "epoch": 0.57, "grad_norm": 2.9687850146989385, "learning_rate": 8.252667822052714e-06, "loss": 0.9148, "step": 3533 }, { "epoch": 0.57, "grad_norm": 2.4132746426240885, "learning_rate": 8.247528897960615e-06, "loss": 0.8994, "step": 3534 }, { "epoch": 0.57, "grad_norm": 3.337861339424996, "learning_rate": 8.242390451289959e-06, "loss": 0.9525, "step": 3535 }, { "epoch": 0.57, "grad_norm": 3.6183937648492597, "learning_rate": 8.237252483440585e-06, "loss": 0.9142, "step": 3536 }, { "epoch": 0.57, "grad_norm": 2.9334690763392035, "learning_rate": 8.232114995812228e-06, "loss": 0.92, "step": 3537 }, { "epoch": 0.57, "grad_norm": 2.164373492538628, "learning_rate": 8.22697798980448e-06, "loss": 0.89, "step": 3538 }, { "epoch": 0.57, "grad_norm": 4.449702717998637, "learning_rate": 8.2218414668168e-06, "loss": 0.8745, "step": 3539 }, { "epoch": 0.57, "grad_norm": 2.942347321547903, "learning_rate": 8.216705428248519e-06, "loss": 0.8282, "step": 3540 }, { "epoch": 0.57, "grad_norm": 2.7307130543312703, "learning_rate": 8.211569875498838e-06, "loss": 0.8018, "step": 3541 }, { "epoch": 0.57, "grad_norm": 2.4450069151236447, "learning_rate": 8.206434809966817e-06, "loss": 0.3495, "step": 3542 }, { "epoch": 0.57, "grad_norm": 3.8173815633791763, "learning_rate": 8.201300233051395e-06, "loss": 0.8792, "step": 3543 }, { "epoch": 0.57, "grad_norm": 2.7780512728096167, "learning_rate": 8.196166146151373e-06, "loss": 0.9348, "step": 3544 }, { "epoch": 0.57, "grad_norm": 2.895596903930813, "learning_rate": 8.191032550665417e-06, "loss": 0.8264, "step": 3545 }, { "epoch": 0.57, "grad_norm": 3.223151595453647, "learning_rate": 8.185899447992056e-06, "loss": 0.9246, "step": 3546 }, { "epoch": 0.57, "grad_norm": 2.578548882057155, "learning_rate": 8.180766839529689e-06, "loss": 0.8831, "step": 3547 }, { "epoch": 0.57, "grad_norm": 2.9284416296236455, "learning_rate": 8.175634726676589e-06, "loss": 0.8633, "step": 3548 }, { "epoch": 0.57, "grad_norm": 2.1979191736384793, "learning_rate": 8.170503110830874e-06, "loss": 0.8682, "step": 3549 }, { "epoch": 0.57, "grad_norm": 1.912015856130445, "learning_rate": 8.16537199339054e-06, "loss": 0.9616, "step": 3550 }, { "epoch": 0.57, "grad_norm": 2.998733889117896, "learning_rate": 8.160241375753452e-06, "loss": 0.9388, "step": 3551 }, { "epoch": 0.57, "grad_norm": 1.940389263190842, "learning_rate": 8.155111259317323e-06, "loss": 0.8698, "step": 3552 }, { "epoch": 0.57, "grad_norm": 4.255873716416864, "learning_rate": 8.149981645479743e-06, "loss": 0.9117, "step": 3553 }, { "epoch": 0.57, "grad_norm": 2.0616176914369335, "learning_rate": 8.144852535638161e-06, "loss": 0.8323, "step": 3554 }, { "epoch": 0.57, "grad_norm": 1.624403550651895, "learning_rate": 8.139723931189883e-06, "loss": 0.8691, "step": 3555 }, { "epoch": 0.57, "grad_norm": 2.3942510493812836, "learning_rate": 8.134595833532084e-06, "loss": 0.8882, "step": 3556 }, { "epoch": 0.57, "grad_norm": 3.7187007612286447, "learning_rate": 8.129468244061805e-06, "loss": 0.9471, "step": 3557 }, { "epoch": 0.57, "grad_norm": 3.1622280501491193, "learning_rate": 8.12434116417593e-06, "loss": 0.8862, "step": 3558 }, { "epoch": 0.57, "grad_norm": 2.7487608979040785, "learning_rate": 8.11921459527123e-06, "loss": 0.896, "step": 3559 }, { "epoch": 0.57, "grad_norm": 3.621719454436059, "learning_rate": 8.114088538744318e-06, "loss": 0.9197, "step": 3560 }, { "epoch": 0.57, "grad_norm": 2.709118147382024, "learning_rate": 8.108962995991673e-06, "loss": 0.9256, "step": 3561 }, { "epoch": 0.57, "grad_norm": 2.528805152491504, "learning_rate": 8.103837968409634e-06, "loss": 0.8443, "step": 3562 }, { "epoch": 0.57, "grad_norm": 2.425593911030457, "learning_rate": 8.098713457394398e-06, "loss": 0.9206, "step": 3563 }, { "epoch": 0.57, "grad_norm": 2.0548331205452657, "learning_rate": 8.093589464342032e-06, "loss": 0.3079, "step": 3564 }, { "epoch": 0.57, "grad_norm": 2.2876058376821415, "learning_rate": 8.08846599064844e-06, "loss": 0.9182, "step": 3565 }, { "epoch": 0.57, "grad_norm": 2.507484676051833, "learning_rate": 8.083343037709407e-06, "loss": 0.8954, "step": 3566 }, { "epoch": 0.57, "grad_norm": 2.535310667063867, "learning_rate": 8.078220606920565e-06, "loss": 0.8497, "step": 3567 }, { "epoch": 0.57, "grad_norm": 3.3888562067120094, "learning_rate": 8.073098699677402e-06, "loss": 0.9339, "step": 3568 }, { "epoch": 0.58, "grad_norm": 3.242047336719918, "learning_rate": 8.067977317375268e-06, "loss": 0.826, "step": 3569 }, { "epoch": 0.58, "grad_norm": 3.197890754785256, "learning_rate": 8.062856461409372e-06, "loss": 0.9015, "step": 3570 }, { "epoch": 0.58, "grad_norm": 2.49088887308974, "learning_rate": 8.057736133174768e-06, "loss": 0.8732, "step": 3571 }, { "epoch": 0.58, "grad_norm": 3.548438849169644, "learning_rate": 8.052616334066383e-06, "loss": 0.8991, "step": 3572 }, { "epoch": 0.58, "grad_norm": 2.2219552542012098, "learning_rate": 8.047497065478991e-06, "loss": 0.2997, "step": 3573 }, { "epoch": 0.58, "grad_norm": 2.9014666170168297, "learning_rate": 8.042378328807217e-06, "loss": 1.0046, "step": 3574 }, { "epoch": 0.58, "grad_norm": 1.8157868672635178, "learning_rate": 8.037260125445548e-06, "loss": 0.3474, "step": 3575 }, { "epoch": 0.58, "grad_norm": 3.0177197074390483, "learning_rate": 8.032142456788328e-06, "loss": 0.8791, "step": 3576 }, { "epoch": 0.58, "grad_norm": 1.9124048645691718, "learning_rate": 8.027025324229743e-06, "loss": 0.3417, "step": 3577 }, { "epoch": 0.58, "grad_norm": 3.0931776654054617, "learning_rate": 8.021908729163842e-06, "loss": 0.9373, "step": 3578 }, { "epoch": 0.58, "grad_norm": 2.290502310221468, "learning_rate": 8.016792672984538e-06, "loss": 0.9294, "step": 3579 }, { "epoch": 0.58, "grad_norm": 2.7217746440198884, "learning_rate": 8.01167715708557e-06, "loss": 0.8312, "step": 3580 }, { "epoch": 0.58, "grad_norm": 3.2984220662517147, "learning_rate": 8.006562182860557e-06, "loss": 0.9461, "step": 3581 }, { "epoch": 0.58, "grad_norm": 3.2317936865987065, "learning_rate": 8.001447751702955e-06, "loss": 0.8811, "step": 3582 }, { "epoch": 0.58, "grad_norm": 3.066123998007058, "learning_rate": 7.996333865006074e-06, "loss": 0.9259, "step": 3583 }, { "epoch": 0.58, "grad_norm": 2.1749400733496898, "learning_rate": 7.99122052416308e-06, "loss": 0.8746, "step": 3584 }, { "epoch": 0.58, "grad_norm": 3.1497006591982215, "learning_rate": 7.986107730566985e-06, "loss": 0.9826, "step": 3585 }, { "epoch": 0.58, "grad_norm": 2.995114640221447, "learning_rate": 7.980995485610665e-06, "loss": 0.8426, "step": 3586 }, { "epoch": 0.58, "grad_norm": 3.2425822190071734, "learning_rate": 7.975883790686821e-06, "loss": 0.8999, "step": 3587 }, { "epoch": 0.58, "grad_norm": 2.6353655924515835, "learning_rate": 7.970772647188029e-06, "loss": 0.9042, "step": 3588 }, { "epoch": 0.58, "grad_norm": 3.8460954162854404, "learning_rate": 7.965662056506708e-06, "loss": 0.845, "step": 3589 }, { "epoch": 0.58, "grad_norm": 2.6105590692167127, "learning_rate": 7.960552020035118e-06, "loss": 0.8397, "step": 3590 }, { "epoch": 0.58, "grad_norm": 1.3882641415032884, "learning_rate": 7.955442539165372e-06, "loss": 0.8271, "step": 3591 }, { "epoch": 0.58, "grad_norm": 4.1518414284151195, "learning_rate": 7.950333615289442e-06, "loss": 0.9767, "step": 3592 }, { "epoch": 0.58, "grad_norm": 2.807617272418477, "learning_rate": 7.945225249799132e-06, "loss": 0.9079, "step": 3593 }, { "epoch": 0.58, "grad_norm": 1.4530749568730954, "learning_rate": 7.9401174440861e-06, "loss": 0.3115, "step": 3594 }, { "epoch": 0.58, "grad_norm": 3.7865641429167485, "learning_rate": 7.935010199541864e-06, "loss": 0.8396, "step": 3595 }, { "epoch": 0.58, "grad_norm": 2.623753206161132, "learning_rate": 7.92990351755777e-06, "loss": 0.9408, "step": 3596 }, { "epoch": 0.58, "grad_norm": 2.472989654963629, "learning_rate": 7.924797399525017e-06, "loss": 0.941, "step": 3597 }, { "epoch": 0.58, "grad_norm": 2.356092885898432, "learning_rate": 7.91969184683466e-06, "loss": 0.3157, "step": 3598 }, { "epoch": 0.58, "grad_norm": 2.9532304371575226, "learning_rate": 7.914586860877584e-06, "loss": 0.7946, "step": 3599 }, { "epoch": 0.58, "grad_norm": 3.2130320601943, "learning_rate": 7.909482443044532e-06, "loss": 0.8672, "step": 3600 }, { "epoch": 0.58, "grad_norm": 1.7572807355388875, "learning_rate": 7.904378594726095e-06, "loss": 0.9652, "step": 3601 }, { "epoch": 0.58, "grad_norm": 2.906665567199546, "learning_rate": 7.899275317312686e-06, "loss": 0.964, "step": 3602 }, { "epoch": 0.58, "grad_norm": 2.45689597917332, "learning_rate": 7.89417261219459e-06, "loss": 0.9712, "step": 3603 }, { "epoch": 0.58, "grad_norm": 2.8359327920501554, "learning_rate": 7.889070480761921e-06, "loss": 0.903, "step": 3604 }, { "epoch": 0.58, "grad_norm": 2.1047888739172556, "learning_rate": 7.883968924404645e-06, "loss": 0.9036, "step": 3605 }, { "epoch": 0.58, "grad_norm": 2.6537271523985733, "learning_rate": 7.878867944512561e-06, "loss": 0.9042, "step": 3606 }, { "epoch": 0.58, "grad_norm": 1.4690272292800468, "learning_rate": 7.873767542475316e-06, "loss": 0.8403, "step": 3607 }, { "epoch": 0.58, "grad_norm": 2.006399763465332, "learning_rate": 7.868667719682409e-06, "loss": 0.9297, "step": 3608 }, { "epoch": 0.58, "grad_norm": 2.4043378291914603, "learning_rate": 7.863568477523158e-06, "loss": 0.8875, "step": 3609 }, { "epoch": 0.58, "grad_norm": 2.777994372082716, "learning_rate": 7.858469817386746e-06, "loss": 0.8909, "step": 3610 }, { "epoch": 0.58, "grad_norm": 3.0765502309481905, "learning_rate": 7.853371740662193e-06, "loss": 0.8291, "step": 3611 }, { "epoch": 0.58, "grad_norm": 4.123468288062399, "learning_rate": 7.848274248738345e-06, "loss": 0.8846, "step": 3612 }, { "epoch": 0.58, "grad_norm": 2.8607461278839694, "learning_rate": 7.843177343003905e-06, "loss": 0.8889, "step": 3613 }, { "epoch": 0.58, "grad_norm": 3.783601266277314, "learning_rate": 7.838081024847412e-06, "loss": 0.9295, "step": 3614 }, { "epoch": 0.58, "grad_norm": 4.0066665885108605, "learning_rate": 7.83298529565724e-06, "loss": 0.9213, "step": 3615 }, { "epoch": 0.58, "grad_norm": 4.100144844287898, "learning_rate": 7.827890156821604e-06, "loss": 0.9417, "step": 3616 }, { "epoch": 0.58, "grad_norm": 3.327498399374424, "learning_rate": 7.82279560972857e-06, "loss": 0.9272, "step": 3617 }, { "epoch": 0.58, "grad_norm": 3.6444428035556644, "learning_rate": 7.817701655766024e-06, "loss": 0.9166, "step": 3618 }, { "epoch": 0.58, "grad_norm": 2.7274129246309755, "learning_rate": 7.8126082963217e-06, "loss": 0.932, "step": 3619 }, { "epoch": 0.58, "grad_norm": 1.7629138081989573, "learning_rate": 7.807515532783177e-06, "loss": 0.3369, "step": 3620 }, { "epoch": 0.58, "grad_norm": 3.2964971967697085, "learning_rate": 7.802423366537856e-06, "loss": 0.9569, "step": 3621 }, { "epoch": 0.58, "grad_norm": 2.262204765320336, "learning_rate": 7.797331798972986e-06, "loss": 0.8423, "step": 3622 }, { "epoch": 0.58, "grad_norm": 3.3416907128081674, "learning_rate": 7.79224083147565e-06, "loss": 0.9004, "step": 3623 }, { "epoch": 0.58, "grad_norm": 2.099030170836297, "learning_rate": 7.787150465432774e-06, "loss": 0.9295, "step": 3624 }, { "epoch": 0.58, "grad_norm": 2.6156627255798934, "learning_rate": 7.782060702231103e-06, "loss": 0.8772, "step": 3625 }, { "epoch": 0.58, "grad_norm": 1.419999698988117, "learning_rate": 7.776971543257236e-06, "loss": 0.9016, "step": 3626 }, { "epoch": 0.58, "grad_norm": 2.689542304882639, "learning_rate": 7.771882989897603e-06, "loss": 0.9256, "step": 3627 }, { "epoch": 0.58, "grad_norm": 2.251568883056911, "learning_rate": 7.766795043538457e-06, "loss": 0.8778, "step": 3628 }, { "epoch": 0.58, "grad_norm": 2.4129120390851004, "learning_rate": 7.7617077055659e-06, "loss": 0.9492, "step": 3629 }, { "epoch": 0.58, "grad_norm": 2.619885867013439, "learning_rate": 7.756620977365869e-06, "loss": 0.944, "step": 3630 }, { "epoch": 0.59, "grad_norm": 3.261131590115136, "learning_rate": 7.751534860324116e-06, "loss": 0.8902, "step": 3631 }, { "epoch": 0.59, "grad_norm": 3.0684773248235913, "learning_rate": 7.746449355826247e-06, "loss": 0.8748, "step": 3632 }, { "epoch": 0.59, "grad_norm": 2.7742537036220294, "learning_rate": 7.741364465257697e-06, "loss": 0.9009, "step": 3633 }, { "epoch": 0.59, "grad_norm": 2.41117691364336, "learning_rate": 7.736280190003723e-06, "loss": 0.909, "step": 3634 }, { "epoch": 0.59, "grad_norm": 4.2107952709630565, "learning_rate": 7.731196531449426e-06, "loss": 0.904, "step": 3635 }, { "epoch": 0.59, "grad_norm": 3.1624953922042147, "learning_rate": 7.726113490979735e-06, "loss": 0.8675, "step": 3636 }, { "epoch": 0.59, "grad_norm": 4.352205677543693, "learning_rate": 7.721031069979408e-06, "loss": 0.8423, "step": 3637 }, { "epoch": 0.59, "grad_norm": 4.067721488563408, "learning_rate": 7.715949269833034e-06, "loss": 0.8886, "step": 3638 }, { "epoch": 0.59, "grad_norm": 1.914042258642176, "learning_rate": 7.710868091925047e-06, "loss": 0.9498, "step": 3639 }, { "epoch": 0.59, "grad_norm": 3.1714197593183595, "learning_rate": 7.705787537639685e-06, "loss": 0.8805, "step": 3640 }, { "epoch": 0.59, "grad_norm": 2.827194144978098, "learning_rate": 7.70070760836104e-06, "loss": 0.901, "step": 3641 }, { "epoch": 0.59, "grad_norm": 4.247056951828548, "learning_rate": 7.695628305473025e-06, "loss": 0.8112, "step": 3642 }, { "epoch": 0.59, "grad_norm": 3.050539913229629, "learning_rate": 7.69054963035938e-06, "loss": 0.892, "step": 3643 }, { "epoch": 0.59, "grad_norm": 1.1757859138066764, "learning_rate": 7.685471584403674e-06, "loss": 0.8839, "step": 3644 }, { "epoch": 0.59, "grad_norm": 2.1861176619000044, "learning_rate": 7.680394168989306e-06, "loss": 0.8872, "step": 3645 }, { "epoch": 0.59, "grad_norm": 1.8778272294349068, "learning_rate": 7.675317385499513e-06, "loss": 0.951, "step": 3646 }, { "epoch": 0.59, "grad_norm": 2.5806027614663583, "learning_rate": 7.670241235317339e-06, "loss": 0.9048, "step": 3647 }, { "epoch": 0.59, "grad_norm": 1.7777616910736442, "learning_rate": 7.665165719825676e-06, "loss": 0.3581, "step": 3648 }, { "epoch": 0.59, "grad_norm": 3.6710327481082357, "learning_rate": 7.660090840407231e-06, "loss": 0.8907, "step": 3649 }, { "epoch": 0.59, "grad_norm": 3.4685376076498224, "learning_rate": 7.65501659844454e-06, "loss": 0.9291, "step": 3650 }, { "epoch": 0.59, "grad_norm": 2.6892639403890164, "learning_rate": 7.649942995319965e-06, "loss": 0.919, "step": 3651 }, { "epoch": 0.59, "grad_norm": 2.5544846947107502, "learning_rate": 7.644870032415705e-06, "loss": 0.8808, "step": 3652 }, { "epoch": 0.59, "grad_norm": 3.1566548607137856, "learning_rate": 7.63979771111376e-06, "loss": 0.8932, "step": 3653 }, { "epoch": 0.59, "grad_norm": 2.375388163919904, "learning_rate": 7.63472603279598e-06, "loss": 0.9276, "step": 3654 }, { "epoch": 0.59, "grad_norm": 3.790621784708872, "learning_rate": 7.629654998844031e-06, "loss": 0.8712, "step": 3655 }, { "epoch": 0.59, "grad_norm": 2.2011212916007454, "learning_rate": 7.624584610639397e-06, "loss": 0.8655, "step": 3656 }, { "epoch": 0.59, "grad_norm": 2.160108569207048, "learning_rate": 7.619514869563394e-06, "loss": 0.969, "step": 3657 }, { "epoch": 0.59, "grad_norm": 3.6294917197925893, "learning_rate": 7.6144457769971606e-06, "loss": 0.849, "step": 3658 }, { "epoch": 0.59, "grad_norm": 1.9898215212889, "learning_rate": 7.609377334321653e-06, "loss": 0.9077, "step": 3659 }, { "epoch": 0.59, "grad_norm": 2.80709590948873, "learning_rate": 7.604309542917656e-06, "loss": 0.8951, "step": 3660 }, { "epoch": 0.59, "grad_norm": 2.536008153884246, "learning_rate": 7.599242404165783e-06, "loss": 0.9368, "step": 3661 }, { "epoch": 0.59, "grad_norm": 2.550886867609054, "learning_rate": 7.59417591944645e-06, "loss": 0.8652, "step": 3662 }, { "epoch": 0.59, "grad_norm": 1.6133651157406506, "learning_rate": 7.589110090139917e-06, "loss": 0.9342, "step": 3663 }, { "epoch": 0.59, "grad_norm": 1.4295902740271484, "learning_rate": 7.584044917626251e-06, "loss": 0.3, "step": 3664 }, { "epoch": 0.59, "grad_norm": 3.415779153568355, "learning_rate": 7.5789804032853476e-06, "loss": 0.8514, "step": 3665 }, { "epoch": 0.59, "grad_norm": 2.597995087745962, "learning_rate": 7.573916548496916e-06, "loss": 0.9243, "step": 3666 }, { "epoch": 0.59, "grad_norm": 1.166407891366242, "learning_rate": 7.5688533546404895e-06, "loss": 0.839, "step": 3667 }, { "epoch": 0.59, "grad_norm": 4.013471092529668, "learning_rate": 7.5637908230954316e-06, "loss": 0.8715, "step": 3668 }, { "epoch": 0.59, "grad_norm": 2.1906112343958317, "learning_rate": 7.558728955240901e-06, "loss": 0.8191, "step": 3669 }, { "epoch": 0.59, "grad_norm": 1.9992141371774768, "learning_rate": 7.553667752455899e-06, "loss": 0.8676, "step": 3670 }, { "epoch": 0.59, "grad_norm": 1.081463496127831, "learning_rate": 7.548607216119237e-06, "loss": 0.8496, "step": 3671 }, { "epoch": 0.59, "grad_norm": 3.1422121203504614, "learning_rate": 7.54354734760954e-06, "loss": 0.9295, "step": 3672 }, { "epoch": 0.59, "grad_norm": 2.2620546819484173, "learning_rate": 7.5384881483052585e-06, "loss": 0.9608, "step": 3673 }, { "epoch": 0.59, "grad_norm": 2.6997535275177404, "learning_rate": 7.53342961958466e-06, "loss": 0.9072, "step": 3674 }, { "epoch": 0.59, "grad_norm": 3.905822608455005, "learning_rate": 7.528371762825819e-06, "loss": 0.9137, "step": 3675 }, { "epoch": 0.59, "grad_norm": 2.4901925356472954, "learning_rate": 7.5233145794066445e-06, "loss": 0.8956, "step": 3676 }, { "epoch": 0.59, "grad_norm": 2.9985090365481213, "learning_rate": 7.518258070704849e-06, "loss": 0.7952, "step": 3677 }, { "epoch": 0.59, "grad_norm": 2.7956652022706967, "learning_rate": 7.513202238097963e-06, "loss": 0.8435, "step": 3678 }, { "epoch": 0.59, "grad_norm": 2.840673941850541, "learning_rate": 7.508147082963337e-06, "loss": 0.8757, "step": 3679 }, { "epoch": 0.59, "grad_norm": 2.038641290532104, "learning_rate": 7.503092606678135e-06, "loss": 0.8427, "step": 3680 }, { "epoch": 0.59, "grad_norm": 3.1800325473583286, "learning_rate": 7.4980388106193336e-06, "loss": 0.8312, "step": 3681 }, { "epoch": 0.59, "grad_norm": 1.3847821534627127, "learning_rate": 7.4929856961637246e-06, "loss": 0.8227, "step": 3682 }, { "epoch": 0.59, "grad_norm": 2.654814298496822, "learning_rate": 7.487933264687921e-06, "loss": 0.9135, "step": 3683 }, { "epoch": 0.59, "grad_norm": 2.511995054149011, "learning_rate": 7.482881517568344e-06, "loss": 0.9011, "step": 3684 }, { "epoch": 0.59, "grad_norm": 3.352137945254657, "learning_rate": 7.477830456181222e-06, "loss": 0.9076, "step": 3685 }, { "epoch": 0.59, "grad_norm": 3.112813304161517, "learning_rate": 7.472780081902608e-06, "loss": 0.87, "step": 3686 }, { "epoch": 0.59, "grad_norm": 2.697733535344469, "learning_rate": 7.467730396108368e-06, "loss": 0.9676, "step": 3687 }, { "epoch": 0.59, "grad_norm": 2.6630635556104747, "learning_rate": 7.462681400174165e-06, "loss": 0.905, "step": 3688 }, { "epoch": 0.59, "grad_norm": 3.4512380562326577, "learning_rate": 7.45763309547549e-06, "loss": 0.9, "step": 3689 }, { "epoch": 0.59, "grad_norm": 2.898912087630303, "learning_rate": 7.452585483387647e-06, "loss": 0.8891, "step": 3690 }, { "epoch": 0.59, "grad_norm": 1.8007911135719903, "learning_rate": 7.4475385652857325e-06, "loss": 0.8829, "step": 3691 }, { "epoch": 0.59, "grad_norm": 2.360556603436432, "learning_rate": 7.442492342544672e-06, "loss": 0.9504, "step": 3692 }, { "epoch": 0.6, "grad_norm": 2.7172121104989913, "learning_rate": 7.437446816539198e-06, "loss": 0.8835, "step": 3693 }, { "epoch": 0.6, "grad_norm": 3.533276002046914, "learning_rate": 7.432401988643847e-06, "loss": 0.8497, "step": 3694 }, { "epoch": 0.6, "grad_norm": 3.135445494323889, "learning_rate": 7.427357860232971e-06, "loss": 0.8421, "step": 3695 }, { "epoch": 0.6, "grad_norm": 3.4660652011785356, "learning_rate": 7.422314432680731e-06, "loss": 0.8608, "step": 3696 }, { "epoch": 0.6, "grad_norm": 3.734922743972408, "learning_rate": 7.417271707361091e-06, "loss": 0.866, "step": 3697 }, { "epoch": 0.6, "grad_norm": 2.0663078299089697, "learning_rate": 7.412229685647829e-06, "loss": 0.8928, "step": 3698 }, { "epoch": 0.6, "grad_norm": 3.083226038810078, "learning_rate": 7.407188368914537e-06, "loss": 0.8108, "step": 3699 }, { "epoch": 0.6, "grad_norm": 2.127739542868529, "learning_rate": 7.402147758534604e-06, "loss": 0.9222, "step": 3700 }, { "epoch": 0.6, "grad_norm": 3.962734920625837, "learning_rate": 7.39710785588123e-06, "loss": 0.8955, "step": 3701 }, { "epoch": 0.6, "grad_norm": 2.8221694032310407, "learning_rate": 7.3920686623274265e-06, "loss": 0.921, "step": 3702 }, { "epoch": 0.6, "grad_norm": 3.7309012756455084, "learning_rate": 7.387030179246009e-06, "loss": 0.9722, "step": 3703 }, { "epoch": 0.6, "grad_norm": 3.0642802455318505, "learning_rate": 7.381992408009593e-06, "loss": 0.9466, "step": 3704 }, { "epoch": 0.6, "grad_norm": 3.3506660596384252, "learning_rate": 7.376955349990613e-06, "loss": 0.9394, "step": 3705 }, { "epoch": 0.6, "grad_norm": 2.194347345910764, "learning_rate": 7.3719190065613035e-06, "loss": 0.8832, "step": 3706 }, { "epoch": 0.6, "grad_norm": 2.351619681743516, "learning_rate": 7.366883379093698e-06, "loss": 0.9341, "step": 3707 }, { "epoch": 0.6, "grad_norm": 2.4858060350069064, "learning_rate": 7.361848468959641e-06, "loss": 0.8431, "step": 3708 }, { "epoch": 0.6, "grad_norm": 2.7466428378685674, "learning_rate": 7.356814277530785e-06, "loss": 0.9022, "step": 3709 }, { "epoch": 0.6, "grad_norm": 3.2397554766843215, "learning_rate": 7.351780806178578e-06, "loss": 0.8394, "step": 3710 }, { "epoch": 0.6, "grad_norm": 3.6175030762211837, "learning_rate": 7.346748056274275e-06, "loss": 0.945, "step": 3711 }, { "epoch": 0.6, "grad_norm": 2.259086381514502, "learning_rate": 7.341716029188946e-06, "loss": 0.8837, "step": 3712 }, { "epoch": 0.6, "grad_norm": 3.1938301509347706, "learning_rate": 7.33668472629344e-06, "loss": 0.8249, "step": 3713 }, { "epoch": 0.6, "grad_norm": 2.5565359911624337, "learning_rate": 7.33165414895843e-06, "loss": 0.9417, "step": 3714 }, { "epoch": 0.6, "grad_norm": 1.8470052011620073, "learning_rate": 7.326624298554387e-06, "loss": 0.8935, "step": 3715 }, { "epoch": 0.6, "grad_norm": 1.9231115565482526, "learning_rate": 7.321595176451575e-06, "loss": 0.3245, "step": 3716 }, { "epoch": 0.6, "grad_norm": 4.0710907702745285, "learning_rate": 7.316566784020067e-06, "loss": 0.9019, "step": 3717 }, { "epoch": 0.6, "grad_norm": 2.2148256654354275, "learning_rate": 7.311539122629738e-06, "loss": 0.8649, "step": 3718 }, { "epoch": 0.6, "grad_norm": 1.9892557749974225, "learning_rate": 7.306512193650258e-06, "loss": 0.8415, "step": 3719 }, { "epoch": 0.6, "grad_norm": 2.77656958583943, "learning_rate": 7.301485998451101e-06, "loss": 0.9241, "step": 3720 }, { "epoch": 0.6, "grad_norm": 3.690601660826658, "learning_rate": 7.29646053840155e-06, "loss": 0.889, "step": 3721 }, { "epoch": 0.6, "grad_norm": 3.7264948105012667, "learning_rate": 7.291435814870664e-06, "loss": 0.8824, "step": 3722 }, { "epoch": 0.6, "grad_norm": 3.503854672531831, "learning_rate": 7.2864118292273265e-06, "loss": 0.8734, "step": 3723 }, { "epoch": 0.6, "grad_norm": 3.39384834610719, "learning_rate": 7.281388582840209e-06, "loss": 0.8957, "step": 3724 }, { "epoch": 0.6, "grad_norm": 2.8237985638701613, "learning_rate": 7.276366077077781e-06, "loss": 0.9645, "step": 3725 }, { "epoch": 0.6, "grad_norm": 3.9845347652941117, "learning_rate": 7.271344313308308e-06, "loss": 0.9436, "step": 3726 }, { "epoch": 0.6, "grad_norm": 2.185745298430958, "learning_rate": 7.2663232928998594e-06, "loss": 0.8542, "step": 3727 }, { "epoch": 0.6, "grad_norm": 3.1740937388855457, "learning_rate": 7.2613030172203045e-06, "loss": 0.9345, "step": 3728 }, { "epoch": 0.6, "grad_norm": 3.3720853016970653, "learning_rate": 7.256283487637297e-06, "loss": 0.7983, "step": 3729 }, { "epoch": 0.6, "grad_norm": 2.6275326454755135, "learning_rate": 7.251264705518299e-06, "loss": 0.8286, "step": 3730 }, { "epoch": 0.6, "grad_norm": 2.677772037566783, "learning_rate": 7.246246672230568e-06, "loss": 0.8834, "step": 3731 }, { "epoch": 0.6, "grad_norm": 1.3612404662359752, "learning_rate": 7.241229389141147e-06, "loss": 0.909, "step": 3732 }, { "epoch": 0.6, "grad_norm": 3.704767048213796, "learning_rate": 7.236212857616885e-06, "loss": 0.9162, "step": 3733 }, { "epoch": 0.6, "grad_norm": 2.5529136915787793, "learning_rate": 7.231197079024431e-06, "loss": 0.8424, "step": 3734 }, { "epoch": 0.6, "grad_norm": 3.0298254067254864, "learning_rate": 7.226182054730208e-06, "loss": 0.8987, "step": 3735 }, { "epoch": 0.6, "grad_norm": 2.3833721566761743, "learning_rate": 7.221167786100458e-06, "loss": 0.8999, "step": 3736 }, { "epoch": 0.6, "grad_norm": 3.7568620840005136, "learning_rate": 7.216154274501203e-06, "loss": 0.9084, "step": 3737 }, { "epoch": 0.6, "grad_norm": 2.9187370218319266, "learning_rate": 7.211141521298259e-06, "loss": 0.8935, "step": 3738 }, { "epoch": 0.6, "grad_norm": 3.216090909460589, "learning_rate": 7.206129527857239e-06, "loss": 0.8635, "step": 3739 }, { "epoch": 0.6, "grad_norm": 3.05627088169071, "learning_rate": 7.201118295543553e-06, "loss": 0.9002, "step": 3740 }, { "epoch": 0.6, "grad_norm": 2.809927038962833, "learning_rate": 7.196107825722392e-06, "loss": 0.8716, "step": 3741 }, { "epoch": 0.6, "grad_norm": 3.9412391710359453, "learning_rate": 7.191098119758747e-06, "loss": 0.8548, "step": 3742 }, { "epoch": 0.6, "grad_norm": 2.577285352440205, "learning_rate": 7.1860891790174035e-06, "loss": 0.9485, "step": 3743 }, { "epoch": 0.6, "grad_norm": 2.5724675845818474, "learning_rate": 7.181081004862937e-06, "loss": 0.3391, "step": 3744 }, { "epoch": 0.6, "grad_norm": 3.72770331173038, "learning_rate": 7.176073598659706e-06, "loss": 0.8762, "step": 3745 }, { "epoch": 0.6, "grad_norm": 3.31581223630621, "learning_rate": 7.171066961771868e-06, "loss": 0.9148, "step": 3746 }, { "epoch": 0.6, "grad_norm": 3.723462877014996, "learning_rate": 7.1660610955633725e-06, "loss": 0.8609, "step": 3747 }, { "epoch": 0.6, "grad_norm": 1.0755212274262798, "learning_rate": 7.16105600139795e-06, "loss": 0.8837, "step": 3748 }, { "epoch": 0.6, "grad_norm": 1.9777678783516655, "learning_rate": 7.156051680639127e-06, "loss": 0.333, "step": 3749 }, { "epoch": 0.6, "grad_norm": 2.7965796117528687, "learning_rate": 7.151048134650225e-06, "loss": 0.8582, "step": 3750 }, { "epoch": 0.6, "grad_norm": 2.2422460887312123, "learning_rate": 7.146045364794339e-06, "loss": 0.8779, "step": 3751 }, { "epoch": 0.6, "grad_norm": 3.8619968913780434, "learning_rate": 7.1410433724343645e-06, "loss": 0.8948, "step": 3752 }, { "epoch": 0.6, "grad_norm": 3.1053999599299305, "learning_rate": 7.136042158932988e-06, "loss": 0.9372, "step": 3753 }, { "epoch": 0.6, "grad_norm": 2.617207996800569, "learning_rate": 7.131041725652669e-06, "loss": 0.9317, "step": 3754 }, { "epoch": 0.61, "grad_norm": 2.6931517855041895, "learning_rate": 7.126042073955668e-06, "loss": 0.8405, "step": 3755 }, { "epoch": 0.61, "grad_norm": 1.78488404183175, "learning_rate": 7.121043205204028e-06, "loss": 0.862, "step": 3756 }, { "epoch": 0.61, "grad_norm": 4.129770930035988, "learning_rate": 7.116045120759575e-06, "loss": 0.9278, "step": 3757 }, { "epoch": 0.61, "grad_norm": 3.35681494818742, "learning_rate": 7.1110478219839295e-06, "loss": 0.8926, "step": 3758 }, { "epoch": 0.61, "grad_norm": 3.3704836261228746, "learning_rate": 7.106051310238493e-06, "loss": 0.8618, "step": 3759 }, { "epoch": 0.61, "grad_norm": 2.646621216336492, "learning_rate": 7.101055586884451e-06, "loss": 0.8177, "step": 3760 }, { "epoch": 0.61, "grad_norm": 2.625293533625395, "learning_rate": 7.096060653282777e-06, "loss": 0.8208, "step": 3761 }, { "epoch": 0.61, "grad_norm": 1.774079597287277, "learning_rate": 7.091066510794224e-06, "loss": 0.7816, "step": 3762 }, { "epoch": 0.61, "grad_norm": 2.4502165251049672, "learning_rate": 7.086073160779347e-06, "loss": 0.9179, "step": 3763 }, { "epoch": 0.61, "grad_norm": 4.143887368251212, "learning_rate": 7.081080604598458e-06, "loss": 0.9367, "step": 3764 }, { "epoch": 0.61, "grad_norm": 3.144207784945147, "learning_rate": 7.076088843611675e-06, "loss": 0.9048, "step": 3765 }, { "epoch": 0.61, "grad_norm": 3.453160626672126, "learning_rate": 7.0710978791788895e-06, "loss": 0.9548, "step": 3766 }, { "epoch": 0.61, "grad_norm": 2.1912195462745365, "learning_rate": 7.066107712659778e-06, "loss": 0.8672, "step": 3767 }, { "epoch": 0.61, "grad_norm": 2.426853403707096, "learning_rate": 7.061118345413797e-06, "loss": 0.9399, "step": 3768 }, { "epoch": 0.61, "grad_norm": 3.5302011738368098, "learning_rate": 7.0561297788001915e-06, "loss": 0.8767, "step": 3769 }, { "epoch": 0.61, "grad_norm": 4.055957396876037, "learning_rate": 7.0511420141779805e-06, "loss": 0.9455, "step": 3770 }, { "epoch": 0.61, "grad_norm": 1.815688255902111, "learning_rate": 7.046155052905967e-06, "loss": 0.3483, "step": 3771 }, { "epoch": 0.61, "grad_norm": 3.838685335380453, "learning_rate": 7.04116889634275e-06, "loss": 0.8281, "step": 3772 }, { "epoch": 0.61, "grad_norm": 1.9058880696889677, "learning_rate": 7.036183545846678e-06, "loss": 0.3008, "step": 3773 }, { "epoch": 0.61, "grad_norm": 2.253120483732525, "learning_rate": 7.031199002775908e-06, "loss": 0.9197, "step": 3774 }, { "epoch": 0.61, "grad_norm": 1.6092744814397446, "learning_rate": 7.0262152684883675e-06, "loss": 0.8973, "step": 3775 }, { "epoch": 0.61, "grad_norm": 1.7974168043553787, "learning_rate": 7.021232344341759e-06, "loss": 0.3224, "step": 3776 }, { "epoch": 0.61, "grad_norm": 1.957115247678875, "learning_rate": 7.01625023169357e-06, "loss": 0.3353, "step": 3777 }, { "epoch": 0.61, "grad_norm": 3.1963882687304337, "learning_rate": 7.011268931901067e-06, "loss": 0.8711, "step": 3778 }, { "epoch": 0.61, "grad_norm": 3.492679160239261, "learning_rate": 7.006288446321288e-06, "loss": 0.8727, "step": 3779 }, { "epoch": 0.61, "grad_norm": 3.49137824664662, "learning_rate": 7.001308776311061e-06, "loss": 0.8329, "step": 3780 }, { "epoch": 0.61, "grad_norm": 2.9704138660743222, "learning_rate": 6.9963299232269824e-06, "loss": 0.9977, "step": 3781 }, { "epoch": 0.61, "grad_norm": 3.4906928428948745, "learning_rate": 6.99135188842543e-06, "loss": 0.8823, "step": 3782 }, { "epoch": 0.61, "grad_norm": 2.6378686728595695, "learning_rate": 6.986374673262557e-06, "loss": 0.9103, "step": 3783 }, { "epoch": 0.61, "grad_norm": 2.188213014202713, "learning_rate": 6.981398279094292e-06, "loss": 0.8259, "step": 3784 }, { "epoch": 0.61, "grad_norm": 4.1304791214612315, "learning_rate": 6.976422707276349e-06, "loss": 0.8657, "step": 3785 }, { "epoch": 0.61, "grad_norm": 1.5053083745144789, "learning_rate": 6.971447959164201e-06, "loss": 0.9253, "step": 3786 }, { "epoch": 0.61, "grad_norm": 4.012581350862564, "learning_rate": 6.966474036113112e-06, "loss": 0.7953, "step": 3787 }, { "epoch": 0.61, "grad_norm": 1.7256613058093275, "learning_rate": 6.961500939478118e-06, "loss": 0.9212, "step": 3788 }, { "epoch": 0.61, "grad_norm": 3.565614108872225, "learning_rate": 6.956528670614022e-06, "loss": 0.9835, "step": 3789 }, { "epoch": 0.61, "grad_norm": 2.773950491729513, "learning_rate": 6.951557230875412e-06, "loss": 0.9077, "step": 3790 }, { "epoch": 0.61, "grad_norm": 3.7122989124849854, "learning_rate": 6.9465866216166436e-06, "loss": 0.8667, "step": 3791 }, { "epoch": 0.61, "grad_norm": 2.0374830420788865, "learning_rate": 6.941616844191846e-06, "loss": 0.9619, "step": 3792 }, { "epoch": 0.61, "grad_norm": 2.848210452222828, "learning_rate": 6.936647899954921e-06, "loss": 0.8635, "step": 3793 }, { "epoch": 0.61, "grad_norm": 3.314108691867059, "learning_rate": 6.931679790259558e-06, "loss": 0.8034, "step": 3794 }, { "epoch": 0.61, "grad_norm": 1.0649236076788093, "learning_rate": 6.926712516459192e-06, "loss": 0.8775, "step": 3795 }, { "epoch": 0.61, "grad_norm": 4.228805651860929, "learning_rate": 6.921746079907055e-06, "loss": 0.8835, "step": 3796 }, { "epoch": 0.61, "grad_norm": 1.7645156886418598, "learning_rate": 6.916780481956139e-06, "loss": 0.8913, "step": 3797 }, { "epoch": 0.61, "grad_norm": 2.4193737280528675, "learning_rate": 6.911815723959207e-06, "loss": 0.8734, "step": 3798 }, { "epoch": 0.61, "grad_norm": 2.0929558229919354, "learning_rate": 6.906851807268799e-06, "loss": 0.3454, "step": 3799 }, { "epoch": 0.61, "grad_norm": 3.3368656198470132, "learning_rate": 6.901888733237226e-06, "loss": 0.8852, "step": 3800 }, { "epoch": 0.61, "grad_norm": 1.3468966347357452, "learning_rate": 6.896926503216557e-06, "loss": 0.895, "step": 3801 }, { "epoch": 0.61, "grad_norm": 3.7754717096343104, "learning_rate": 6.891965118558644e-06, "loss": 0.887, "step": 3802 }, { "epoch": 0.61, "grad_norm": 2.0767276052450208, "learning_rate": 6.887004580615107e-06, "loss": 0.9319, "step": 3803 }, { "epoch": 0.61, "grad_norm": 2.850090922193625, "learning_rate": 6.882044890737336e-06, "loss": 0.9351, "step": 3804 }, { "epoch": 0.61, "grad_norm": 1.9788364402605023, "learning_rate": 6.8770860502764815e-06, "loss": 0.884, "step": 3805 }, { "epoch": 0.61, "grad_norm": 3.5092894029920982, "learning_rate": 6.8721280605834706e-06, "loss": 0.8401, "step": 3806 }, { "epoch": 0.61, "grad_norm": 4.4477030623140275, "learning_rate": 6.867170923008998e-06, "loss": 0.8595, "step": 3807 }, { "epoch": 0.61, "grad_norm": 2.1820698506252754, "learning_rate": 6.862214638903519e-06, "loss": 0.9003, "step": 3808 }, { "epoch": 0.61, "grad_norm": 2.0171500653871797, "learning_rate": 6.857259209617269e-06, "loss": 0.8731, "step": 3809 }, { "epoch": 0.61, "grad_norm": 2.6856126589954283, "learning_rate": 6.85230463650024e-06, "loss": 0.9316, "step": 3810 }, { "epoch": 0.61, "grad_norm": 3.2461435472545457, "learning_rate": 6.847350920902194e-06, "loss": 0.8545, "step": 3811 }, { "epoch": 0.61, "grad_norm": 4.261382400008998, "learning_rate": 6.842398064172661e-06, "loss": 0.9172, "step": 3812 }, { "epoch": 0.61, "grad_norm": 3.255207731119736, "learning_rate": 6.837446067660937e-06, "loss": 0.8935, "step": 3813 }, { "epoch": 0.61, "grad_norm": 2.2427142119400743, "learning_rate": 6.832494932716078e-06, "loss": 0.9217, "step": 3814 }, { "epoch": 0.61, "grad_norm": 2.8905468079584913, "learning_rate": 6.82754466068691e-06, "loss": 0.8593, "step": 3815 }, { "epoch": 0.61, "grad_norm": 3.2168729179024282, "learning_rate": 6.822595252922033e-06, "loss": 0.8163, "step": 3816 }, { "epoch": 0.62, "grad_norm": 2.6494586067659305, "learning_rate": 6.817646710769788e-06, "loss": 0.9524, "step": 3817 }, { "epoch": 0.62, "grad_norm": 2.6203614077385446, "learning_rate": 6.812699035578302e-06, "loss": 0.917, "step": 3818 }, { "epoch": 0.62, "grad_norm": 2.611126066620475, "learning_rate": 6.807752228695459e-06, "loss": 0.9613, "step": 3819 }, { "epoch": 0.62, "grad_norm": 2.137487409231124, "learning_rate": 6.802806291468903e-06, "loss": 0.866, "step": 3820 }, { "epoch": 0.62, "grad_norm": 2.67750055250773, "learning_rate": 6.797861225246045e-06, "loss": 0.9184, "step": 3821 }, { "epoch": 0.62, "grad_norm": 2.563860462325491, "learning_rate": 6.792917031374053e-06, "loss": 0.9281, "step": 3822 }, { "epoch": 0.62, "grad_norm": 4.070355845273644, "learning_rate": 6.787973711199872e-06, "loss": 0.9213, "step": 3823 }, { "epoch": 0.62, "grad_norm": 2.7178225634483866, "learning_rate": 6.783031266070186e-06, "loss": 0.8914, "step": 3824 }, { "epoch": 0.62, "grad_norm": 3.9831758976127634, "learning_rate": 6.778089697331462e-06, "loss": 0.8634, "step": 3825 }, { "epoch": 0.62, "grad_norm": 2.6395146670865257, "learning_rate": 6.773149006329919e-06, "loss": 0.9052, "step": 3826 }, { "epoch": 0.62, "grad_norm": 1.7310710057020269, "learning_rate": 6.768209194411533e-06, "loss": 0.8963, "step": 3827 }, { "epoch": 0.62, "grad_norm": 2.1301977877056615, "learning_rate": 6.763270262922047e-06, "loss": 0.3294, "step": 3828 }, { "epoch": 0.62, "grad_norm": 3.0069188759773655, "learning_rate": 6.758332213206964e-06, "loss": 0.9416, "step": 3829 }, { "epoch": 0.62, "grad_norm": 2.3937659479710565, "learning_rate": 6.753395046611543e-06, "loss": 0.8697, "step": 3830 }, { "epoch": 0.62, "grad_norm": 2.531467758042706, "learning_rate": 6.748458764480801e-06, "loss": 0.9197, "step": 3831 }, { "epoch": 0.62, "grad_norm": 2.8055811231432246, "learning_rate": 6.743523368159526e-06, "loss": 0.8281, "step": 3832 }, { "epoch": 0.62, "grad_norm": 1.661415681520726, "learning_rate": 6.738588858992248e-06, "loss": 0.899, "step": 3833 }, { "epoch": 0.62, "grad_norm": 2.4640262656731657, "learning_rate": 6.733655238323264e-06, "loss": 0.8335, "step": 3834 }, { "epoch": 0.62, "grad_norm": 3.300104376558293, "learning_rate": 6.728722507496636e-06, "loss": 0.8961, "step": 3835 }, { "epoch": 0.62, "grad_norm": 2.9278675988076137, "learning_rate": 6.723790667856166e-06, "loss": 0.8581, "step": 3836 }, { "epoch": 0.62, "grad_norm": 3.068316793844419, "learning_rate": 6.718859720745425e-06, "loss": 0.8644, "step": 3837 }, { "epoch": 0.62, "grad_norm": 2.748676241535192, "learning_rate": 6.71392966750775e-06, "loss": 0.918, "step": 3838 }, { "epoch": 0.62, "grad_norm": 1.9063783195956454, "learning_rate": 6.709000509486207e-06, "loss": 0.9188, "step": 3839 }, { "epoch": 0.62, "grad_norm": 3.129789420652283, "learning_rate": 6.704072248023643e-06, "loss": 0.9081, "step": 3840 }, { "epoch": 0.62, "grad_norm": 3.4710519386742287, "learning_rate": 6.6991448844626525e-06, "loss": 0.8485, "step": 3841 }, { "epoch": 0.62, "grad_norm": 2.2757071475461914, "learning_rate": 6.694218420145586e-06, "loss": 0.8638, "step": 3842 }, { "epoch": 0.62, "grad_norm": 3.2575671792001395, "learning_rate": 6.689292856414545e-06, "loss": 0.8799, "step": 3843 }, { "epoch": 0.62, "grad_norm": 3.0081553870607545, "learning_rate": 6.684368194611387e-06, "loss": 0.8918, "step": 3844 }, { "epoch": 0.62, "grad_norm": 3.525122533521746, "learning_rate": 6.679444436077735e-06, "loss": 0.8931, "step": 3845 }, { "epoch": 0.62, "grad_norm": 2.440672936744142, "learning_rate": 6.674521582154945e-06, "loss": 0.9094, "step": 3846 }, { "epoch": 0.62, "grad_norm": 3.363830592431438, "learning_rate": 6.669599634184145e-06, "loss": 0.9413, "step": 3847 }, { "epoch": 0.62, "grad_norm": 2.603272114016412, "learning_rate": 6.6646785935062086e-06, "loss": 0.8902, "step": 3848 }, { "epoch": 0.62, "grad_norm": 3.4661495324317384, "learning_rate": 6.65975846146176e-06, "loss": 0.8916, "step": 3849 }, { "epoch": 0.62, "grad_norm": 3.6675811985338997, "learning_rate": 6.654839239391182e-06, "loss": 0.9386, "step": 3850 }, { "epoch": 0.62, "grad_norm": 2.4329382440684113, "learning_rate": 6.649920928634607e-06, "loss": 0.9285, "step": 3851 }, { "epoch": 0.62, "grad_norm": 3.437848437596226, "learning_rate": 6.645003530531915e-06, "loss": 0.8141, "step": 3852 }, { "epoch": 0.62, "grad_norm": 2.4820046303065757, "learning_rate": 6.640087046422739e-06, "loss": 0.9207, "step": 3853 }, { "epoch": 0.62, "grad_norm": 4.3612741141555995, "learning_rate": 6.635171477646475e-06, "loss": 0.8922, "step": 3854 }, { "epoch": 0.62, "grad_norm": 4.155804144098521, "learning_rate": 6.630256825542245e-06, "loss": 0.8511, "step": 3855 }, { "epoch": 0.62, "grad_norm": 3.2500356525519667, "learning_rate": 6.625343091448948e-06, "loss": 0.8582, "step": 3856 }, { "epoch": 0.62, "grad_norm": 4.739143815114899, "learning_rate": 6.620430276705215e-06, "loss": 0.9201, "step": 3857 }, { "epoch": 0.62, "grad_norm": 2.307232605556401, "learning_rate": 6.615518382649433e-06, "loss": 0.8688, "step": 3858 }, { "epoch": 0.62, "grad_norm": 3.54536493228155, "learning_rate": 6.610607410619736e-06, "loss": 0.8518, "step": 3859 }, { "epoch": 0.62, "grad_norm": 2.882026213643861, "learning_rate": 6.605697361954009e-06, "loss": 0.9098, "step": 3860 }, { "epoch": 0.62, "grad_norm": 2.3354984751202084, "learning_rate": 6.600788237989888e-06, "loss": 0.8312, "step": 3861 }, { "epoch": 0.62, "grad_norm": 3.260711331605655, "learning_rate": 6.595880040064748e-06, "loss": 0.9449, "step": 3862 }, { "epoch": 0.62, "grad_norm": 1.9570934415213355, "learning_rate": 6.590972769515722e-06, "loss": 0.3097, "step": 3863 }, { "epoch": 0.62, "grad_norm": 3.5336728856538455, "learning_rate": 6.5860664276796844e-06, "loss": 0.9095, "step": 3864 }, { "epoch": 0.62, "grad_norm": 3.130642793604448, "learning_rate": 6.581161015893257e-06, "loss": 0.8903, "step": 3865 }, { "epoch": 0.62, "grad_norm": 2.4076794428515664, "learning_rate": 6.576256535492807e-06, "loss": 0.858, "step": 3866 }, { "epoch": 0.62, "grad_norm": 1.4231086660996601, "learning_rate": 6.571352987814459e-06, "loss": 0.8691, "step": 3867 }, { "epoch": 0.62, "grad_norm": 2.2599316851663134, "learning_rate": 6.566450374194062e-06, "loss": 0.3339, "step": 3868 }, { "epoch": 0.62, "grad_norm": 3.4630284591241254, "learning_rate": 6.561548695967231e-06, "loss": 0.8741, "step": 3869 }, { "epoch": 0.62, "grad_norm": 3.7907666965301323, "learning_rate": 6.55664795446932e-06, "loss": 0.7955, "step": 3870 }, { "epoch": 0.62, "grad_norm": 3.458172867204266, "learning_rate": 6.55174815103542e-06, "loss": 0.939, "step": 3871 }, { "epoch": 0.62, "grad_norm": 2.9962891834483547, "learning_rate": 6.546849287000374e-06, "loss": 0.831, "step": 3872 }, { "epoch": 0.62, "grad_norm": 2.983700181651437, "learning_rate": 6.5419513636987735e-06, "loss": 0.9216, "step": 3873 }, { "epoch": 0.62, "grad_norm": 4.693086677340954, "learning_rate": 6.53705438246494e-06, "loss": 0.9095, "step": 3874 }, { "epoch": 0.62, "grad_norm": 2.9507756652051995, "learning_rate": 6.532158344632946e-06, "loss": 0.8461, "step": 3875 }, { "epoch": 0.62, "grad_norm": 3.5941988457391174, "learning_rate": 6.527263251536618e-06, "loss": 0.888, "step": 3876 }, { "epoch": 0.62, "grad_norm": 3.8151579799914552, "learning_rate": 6.522369104509501e-06, "loss": 0.796, "step": 3877 }, { "epoch": 0.62, "grad_norm": 2.811890006142973, "learning_rate": 6.517475904884903e-06, "loss": 0.896, "step": 3878 }, { "epoch": 0.62, "grad_norm": 2.6982496981493824, "learning_rate": 6.512583653995867e-06, "loss": 0.8976, "step": 3879 }, { "epoch": 0.63, "grad_norm": 2.9771561318518134, "learning_rate": 6.507692353175172e-06, "loss": 0.8673, "step": 3880 }, { "epoch": 0.63, "grad_norm": 2.673849079281251, "learning_rate": 6.502802003755346e-06, "loss": 0.9343, "step": 3881 }, { "epoch": 0.63, "grad_norm": 2.026845176117416, "learning_rate": 6.497912607068655e-06, "loss": 0.3209, "step": 3882 }, { "epoch": 0.63, "grad_norm": 3.424649024299604, "learning_rate": 6.49302416444711e-06, "loss": 0.9038, "step": 3883 }, { "epoch": 0.63, "grad_norm": 3.2471456464475326, "learning_rate": 6.4881366772224475e-06, "loss": 0.927, "step": 3884 }, { "epoch": 0.63, "grad_norm": 2.6789981411325483, "learning_rate": 6.48325014672616e-06, "loss": 0.8999, "step": 3885 }, { "epoch": 0.63, "grad_norm": 4.1170545408144665, "learning_rate": 6.478364574289475e-06, "loss": 0.8505, "step": 3886 }, { "epoch": 0.63, "grad_norm": 3.025946628218378, "learning_rate": 6.473479961243353e-06, "loss": 0.9779, "step": 3887 }, { "epoch": 0.63, "grad_norm": 1.1936821883349213, "learning_rate": 6.468596308918498e-06, "loss": 0.3092, "step": 3888 }, { "epoch": 0.63, "grad_norm": 2.4819529500923596, "learning_rate": 6.463713618645354e-06, "loss": 0.8405, "step": 3889 }, { "epoch": 0.63, "grad_norm": 3.4306316770550116, "learning_rate": 6.458831891754096e-06, "loss": 0.8531, "step": 3890 }, { "epoch": 0.63, "grad_norm": 4.215083132248708, "learning_rate": 6.453951129574644e-06, "loss": 0.8901, "step": 3891 }, { "epoch": 0.63, "grad_norm": 3.8049238628935784, "learning_rate": 6.449071333436654e-06, "loss": 0.9259, "step": 3892 }, { "epoch": 0.63, "grad_norm": 2.2852430539068895, "learning_rate": 6.4441925046695135e-06, "loss": 0.8345, "step": 3893 }, { "epoch": 0.63, "grad_norm": 3.078338615634629, "learning_rate": 6.43931464460235e-06, "loss": 0.9332, "step": 3894 }, { "epoch": 0.63, "grad_norm": 3.0006037740308247, "learning_rate": 6.434437754564031e-06, "loss": 0.9249, "step": 3895 }, { "epoch": 0.63, "grad_norm": 2.624066323035298, "learning_rate": 6.42956183588315e-06, "loss": 0.8295, "step": 3896 }, { "epoch": 0.63, "grad_norm": 2.955722534351498, "learning_rate": 6.424686889888044e-06, "loss": 0.9187, "step": 3897 }, { "epoch": 0.63, "grad_norm": 2.7705957602471023, "learning_rate": 6.419812917906788e-06, "loss": 0.9669, "step": 3898 }, { "epoch": 0.63, "grad_norm": 2.406425271524728, "learning_rate": 6.414939921267176e-06, "loss": 0.8918, "step": 3899 }, { "epoch": 0.63, "grad_norm": 3.2216481614572228, "learning_rate": 6.410067901296755e-06, "loss": 0.8082, "step": 3900 }, { "epoch": 0.63, "grad_norm": 3.1962229730316003, "learning_rate": 6.405196859322793e-06, "loss": 0.8141, "step": 3901 }, { "epoch": 0.63, "grad_norm": 1.928085094267705, "learning_rate": 6.400326796672302e-06, "loss": 0.9179, "step": 3902 }, { "epoch": 0.63, "grad_norm": 3.7477061567548127, "learning_rate": 6.395457714672013e-06, "loss": 0.8535, "step": 3903 }, { "epoch": 0.63, "grad_norm": 3.061539791373785, "learning_rate": 6.390589614648401e-06, "loss": 0.8474, "step": 3904 }, { "epoch": 0.63, "grad_norm": 2.524602379037382, "learning_rate": 6.38572249792768e-06, "loss": 0.8938, "step": 3905 }, { "epoch": 0.63, "grad_norm": 1.7852233606991759, "learning_rate": 6.38085636583577e-06, "loss": 0.8499, "step": 3906 }, { "epoch": 0.63, "grad_norm": 4.072937453949444, "learning_rate": 6.375991219698352e-06, "loss": 0.8745, "step": 3907 }, { "epoch": 0.63, "grad_norm": 3.8375783856366716, "learning_rate": 6.371127060840823e-06, "loss": 0.848, "step": 3908 }, { "epoch": 0.63, "grad_norm": 3.563382892626799, "learning_rate": 6.366263890588311e-06, "loss": 0.9131, "step": 3909 }, { "epoch": 0.63, "grad_norm": 2.089173282490579, "learning_rate": 6.3614017102656815e-06, "loss": 0.8854, "step": 3910 }, { "epoch": 0.63, "grad_norm": 2.4010588892872744, "learning_rate": 6.356540521197525e-06, "loss": 0.8829, "step": 3911 }, { "epoch": 0.63, "grad_norm": 2.5843063695944157, "learning_rate": 6.351680324708162e-06, "loss": 0.9153, "step": 3912 }, { "epoch": 0.63, "grad_norm": 4.102243712031306, "learning_rate": 6.3468211221216425e-06, "loss": 0.9418, "step": 3913 }, { "epoch": 0.63, "grad_norm": 3.8215527227833315, "learning_rate": 6.341962914761753e-06, "loss": 0.8939, "step": 3914 }, { "epoch": 0.63, "grad_norm": 3.4290156303754835, "learning_rate": 6.337105703952e-06, "loss": 0.9256, "step": 3915 }, { "epoch": 0.63, "grad_norm": 2.827530434846874, "learning_rate": 6.3322494910156195e-06, "loss": 0.9381, "step": 3916 }, { "epoch": 0.63, "grad_norm": 3.8572851538270503, "learning_rate": 6.327394277275582e-06, "loss": 0.8774, "step": 3917 }, { "epoch": 0.63, "grad_norm": 2.228942241839401, "learning_rate": 6.322540064054578e-06, "loss": 0.8993, "step": 3918 }, { "epoch": 0.63, "grad_norm": 2.7510791741823115, "learning_rate": 6.317686852675029e-06, "loss": 0.9677, "step": 3919 }, { "epoch": 0.63, "grad_norm": 2.120227390905973, "learning_rate": 6.312834644459086e-06, "loss": 0.3279, "step": 3920 }, { "epoch": 0.63, "grad_norm": 2.455280790256521, "learning_rate": 6.307983440728626e-06, "loss": 0.8482, "step": 3921 }, { "epoch": 0.63, "grad_norm": 1.206886455653389, "learning_rate": 6.303133242805244e-06, "loss": 0.9263, "step": 3922 }, { "epoch": 0.63, "grad_norm": 1.559485235503308, "learning_rate": 6.2982840520102725e-06, "loss": 0.8383, "step": 3923 }, { "epoch": 0.63, "grad_norm": 3.2555859318756526, "learning_rate": 6.293435869664766e-06, "loss": 0.8198, "step": 3924 }, { "epoch": 0.63, "grad_norm": 2.9912504716004804, "learning_rate": 6.2885886970894986e-06, "loss": 0.9254, "step": 3925 }, { "epoch": 0.63, "grad_norm": 1.481309636842972, "learning_rate": 6.283742535604973e-06, "loss": 0.9446, "step": 3926 }, { "epoch": 0.63, "grad_norm": 2.9146559369249494, "learning_rate": 6.2788973865314265e-06, "loss": 0.8567, "step": 3927 }, { "epoch": 0.63, "grad_norm": 1.9388280439378736, "learning_rate": 6.274053251188798e-06, "loss": 0.3336, "step": 3928 }, { "epoch": 0.63, "grad_norm": 2.7321073884202196, "learning_rate": 6.269210130896773e-06, "loss": 0.857, "step": 3929 }, { "epoch": 0.63, "grad_norm": 2.638798821457083, "learning_rate": 6.26436802697475e-06, "loss": 0.8531, "step": 3930 }, { "epoch": 0.63, "grad_norm": 3.2552554846845503, "learning_rate": 6.259526940741848e-06, "loss": 0.8308, "step": 3931 }, { "epoch": 0.63, "grad_norm": 2.8181090478367348, "learning_rate": 6.2546868735169134e-06, "loss": 0.888, "step": 3932 }, { "epoch": 0.63, "grad_norm": 2.9885108610214, "learning_rate": 6.249847826618518e-06, "loss": 0.928, "step": 3933 }, { "epoch": 0.63, "grad_norm": 2.7278026831742284, "learning_rate": 6.245009801364945e-06, "loss": 0.9295, "step": 3934 }, { "epoch": 0.63, "grad_norm": 3.230430519772653, "learning_rate": 6.240172799074207e-06, "loss": 0.8649, "step": 3935 }, { "epoch": 0.63, "grad_norm": 2.762566809903592, "learning_rate": 6.2353368210640415e-06, "loss": 0.8401, "step": 3936 }, { "epoch": 0.63, "grad_norm": 2.9937625255601423, "learning_rate": 6.230501868651899e-06, "loss": 0.8353, "step": 3937 }, { "epoch": 0.63, "grad_norm": 1.7283285093592706, "learning_rate": 6.225667943154956e-06, "loss": 0.9202, "step": 3938 }, { "epoch": 0.63, "grad_norm": 4.0287905738768774, "learning_rate": 6.2208350458901035e-06, "loss": 0.9108, "step": 3939 }, { "epoch": 0.63, "grad_norm": 2.8430713220989556, "learning_rate": 6.216003178173959e-06, "loss": 0.8535, "step": 3940 }, { "epoch": 0.63, "grad_norm": 3.415027333676599, "learning_rate": 6.211172341322853e-06, "loss": 0.8657, "step": 3941 }, { "epoch": 0.64, "grad_norm": 2.9635886851750297, "learning_rate": 6.206342536652841e-06, "loss": 0.8833, "step": 3942 }, { "epoch": 0.64, "grad_norm": 3.5388846447445266, "learning_rate": 6.201513765479699e-06, "loss": 0.7938, "step": 3943 }, { "epoch": 0.64, "grad_norm": 2.7825283584452034, "learning_rate": 6.196686029118909e-06, "loss": 0.8416, "step": 3944 }, { "epoch": 0.64, "grad_norm": 3.4170251208580114, "learning_rate": 6.191859328885685e-06, "loss": 0.8726, "step": 3945 }, { "epoch": 0.64, "grad_norm": 3.311502270418564, "learning_rate": 6.1870336660949526e-06, "loss": 0.937, "step": 3946 }, { "epoch": 0.64, "grad_norm": 2.3740745548345448, "learning_rate": 6.182209042061353e-06, "loss": 0.7696, "step": 3947 }, { "epoch": 0.64, "grad_norm": 3.0844839028531905, "learning_rate": 6.177385458099248e-06, "loss": 0.8516, "step": 3948 }, { "epoch": 0.64, "grad_norm": 2.157656998386622, "learning_rate": 6.1725629155227195e-06, "loss": 0.337, "step": 3949 }, { "epoch": 0.64, "grad_norm": 1.3403510643002, "learning_rate": 6.167741415645552e-06, "loss": 0.3424, "step": 3950 }, { "epoch": 0.64, "grad_norm": 4.548730845084746, "learning_rate": 6.16292095978126e-06, "loss": 0.8442, "step": 3951 }, { "epoch": 0.64, "grad_norm": 1.4194786922821412, "learning_rate": 6.158101549243072e-06, "loss": 0.9923, "step": 3952 }, { "epoch": 0.64, "grad_norm": 2.1608977054178267, "learning_rate": 6.153283185343921e-06, "loss": 0.8963, "step": 3953 }, { "epoch": 0.64, "grad_norm": 2.4557587887112446, "learning_rate": 6.148465869396468e-06, "loss": 0.8789, "step": 3954 }, { "epoch": 0.64, "grad_norm": 3.0806623351113367, "learning_rate": 6.14364960271308e-06, "loss": 0.9195, "step": 3955 }, { "epoch": 0.64, "grad_norm": 2.288293077745017, "learning_rate": 6.13883438660584e-06, "loss": 0.8695, "step": 3956 }, { "epoch": 0.64, "grad_norm": 1.7118713756577228, "learning_rate": 6.134020222386544e-06, "loss": 0.959, "step": 3957 }, { "epoch": 0.64, "grad_norm": 3.7658227892330816, "learning_rate": 6.1292071113667125e-06, "loss": 0.8724, "step": 3958 }, { "epoch": 0.64, "grad_norm": 3.163184677613291, "learning_rate": 6.124395054857557e-06, "loss": 0.8714, "step": 3959 }, { "epoch": 0.64, "grad_norm": 3.4262724190458296, "learning_rate": 6.119584054170022e-06, "loss": 0.9302, "step": 3960 }, { "epoch": 0.64, "grad_norm": 3.7217023654107675, "learning_rate": 6.114774110614755e-06, "loss": 0.9279, "step": 3961 }, { "epoch": 0.64, "grad_norm": 2.5847607842443714, "learning_rate": 6.109965225502119e-06, "loss": 0.8623, "step": 3962 }, { "epoch": 0.64, "grad_norm": 2.7763873678863398, "learning_rate": 6.105157400142183e-06, "loss": 0.7891, "step": 3963 }, { "epoch": 0.64, "grad_norm": 3.0574107019666967, "learning_rate": 6.100350635844731e-06, "loss": 0.9352, "step": 3964 }, { "epoch": 0.64, "grad_norm": 4.0734202406166045, "learning_rate": 6.095544933919265e-06, "loss": 0.9784, "step": 3965 }, { "epoch": 0.64, "grad_norm": 1.6286503067012097, "learning_rate": 6.090740295674984e-06, "loss": 0.9003, "step": 3966 }, { "epoch": 0.64, "grad_norm": 2.728400892222479, "learning_rate": 6.085936722420806e-06, "loss": 0.8464, "step": 3967 }, { "epoch": 0.64, "grad_norm": 2.9915958945174648, "learning_rate": 6.081134215465358e-06, "loss": 0.9817, "step": 3968 }, { "epoch": 0.64, "grad_norm": 3.520885008182563, "learning_rate": 6.076332776116971e-06, "loss": 0.8287, "step": 3969 }, { "epoch": 0.64, "grad_norm": 2.9133047801116327, "learning_rate": 6.071532405683691e-06, "loss": 0.8946, "step": 3970 }, { "epoch": 0.64, "grad_norm": 3.210203212116943, "learning_rate": 6.066733105473279e-06, "loss": 0.9074, "step": 3971 }, { "epoch": 0.64, "grad_norm": 2.445929988617072, "learning_rate": 6.061934876793183e-06, "loss": 0.898, "step": 3972 }, { "epoch": 0.64, "grad_norm": 4.042177515419395, "learning_rate": 6.057137720950583e-06, "loss": 0.9055, "step": 3973 }, { "epoch": 0.64, "grad_norm": 1.9015249256443314, "learning_rate": 6.052341639252356e-06, "loss": 0.3026, "step": 3974 }, { "epoch": 0.64, "grad_norm": 2.5735066055441354, "learning_rate": 6.0475466330050815e-06, "loss": 0.9013, "step": 3975 }, { "epoch": 0.64, "grad_norm": 2.975459657949747, "learning_rate": 6.042752703515054e-06, "loss": 0.8599, "step": 3976 }, { "epoch": 0.64, "grad_norm": 3.931176451463544, "learning_rate": 6.037959852088275e-06, "loss": 0.8952, "step": 3977 }, { "epoch": 0.64, "grad_norm": 2.9021239165357344, "learning_rate": 6.0331680800304436e-06, "loss": 0.982, "step": 3978 }, { "epoch": 0.64, "grad_norm": 2.9597300782724205, "learning_rate": 6.028377388646972e-06, "loss": 0.8472, "step": 3979 }, { "epoch": 0.64, "grad_norm": 2.8156864341613077, "learning_rate": 6.023587779242981e-06, "loss": 0.8905, "step": 3980 }, { "epoch": 0.64, "grad_norm": 3.448717990352, "learning_rate": 6.0187992531232895e-06, "loss": 0.8732, "step": 3981 }, { "epoch": 0.64, "grad_norm": 3.1317359044770403, "learning_rate": 6.014011811592424e-06, "loss": 0.8853, "step": 3982 }, { "epoch": 0.64, "grad_norm": 3.235051033761297, "learning_rate": 6.0092254559546134e-06, "loss": 0.8769, "step": 3983 }, { "epoch": 0.64, "grad_norm": 2.8963572808006397, "learning_rate": 6.004440187513798e-06, "loss": 0.922, "step": 3984 }, { "epoch": 0.64, "grad_norm": 2.184456042531346, "learning_rate": 5.999656007573612e-06, "loss": 0.8938, "step": 3985 }, { "epoch": 0.64, "grad_norm": 1.880221788634731, "learning_rate": 5.9948729174373975e-06, "loss": 0.8854, "step": 3986 }, { "epoch": 0.64, "grad_norm": 3.7380092924521984, "learning_rate": 5.990090918408208e-06, "loss": 0.8957, "step": 3987 }, { "epoch": 0.64, "grad_norm": 4.083786348917328, "learning_rate": 5.985310011788781e-06, "loss": 0.895, "step": 3988 }, { "epoch": 0.64, "grad_norm": 1.9529127081892024, "learning_rate": 5.9805301988815735e-06, "loss": 0.9041, "step": 3989 }, { "epoch": 0.64, "grad_norm": 3.3599956560106805, "learning_rate": 5.975751480988739e-06, "loss": 0.9104, "step": 3990 }, { "epoch": 0.64, "grad_norm": 2.665684896782271, "learning_rate": 5.970973859412128e-06, "loss": 0.9106, "step": 3991 }, { "epoch": 0.64, "grad_norm": 3.790804307286592, "learning_rate": 5.966197335453299e-06, "loss": 0.7794, "step": 3992 }, { "epoch": 0.64, "grad_norm": 2.147168259671178, "learning_rate": 5.96142191041351e-06, "loss": 0.9372, "step": 3993 }, { "epoch": 0.64, "grad_norm": 2.905173974137749, "learning_rate": 5.956647585593713e-06, "loss": 0.8694, "step": 3994 }, { "epoch": 0.64, "grad_norm": 2.578894575451413, "learning_rate": 5.9518743622945715e-06, "loss": 0.9587, "step": 3995 }, { "epoch": 0.64, "grad_norm": 3.000078518157867, "learning_rate": 5.9471022418164425e-06, "loss": 0.9405, "step": 3996 }, { "epoch": 0.64, "grad_norm": 2.8151156660204446, "learning_rate": 5.942331225459379e-06, "loss": 0.845, "step": 3997 }, { "epoch": 0.64, "grad_norm": 3.2375973484323484, "learning_rate": 5.937561314523142e-06, "loss": 0.886, "step": 3998 }, { "epoch": 0.64, "grad_norm": 1.3721584088154897, "learning_rate": 5.93279251030718e-06, "loss": 0.8841, "step": 3999 }, { "epoch": 0.64, "grad_norm": 2.414152964653061, "learning_rate": 5.928024814110659e-06, "loss": 0.8755, "step": 4000 }, { "epoch": 0.64, "grad_norm": 2.8472581959341103, "learning_rate": 5.923258227232418e-06, "loss": 0.8796, "step": 4001 }, { "epoch": 0.64, "grad_norm": 3.490274404542764, "learning_rate": 5.918492750971012e-06, "loss": 0.8493, "step": 4002 }, { "epoch": 0.64, "grad_norm": 1.9124752392132134, "learning_rate": 5.913728386624691e-06, "loss": 0.92, "step": 4003 }, { "epoch": 0.65, "grad_norm": 2.6947770181107096, "learning_rate": 5.908965135491394e-06, "loss": 0.9424, "step": 4004 }, { "epoch": 0.65, "grad_norm": 2.80600242287279, "learning_rate": 5.904202998868765e-06, "loss": 0.9082, "step": 4005 }, { "epoch": 0.65, "grad_norm": 3.56960471585491, "learning_rate": 5.899441978054141e-06, "loss": 0.9131, "step": 4006 }, { "epoch": 0.65, "grad_norm": 3.0808194368389694, "learning_rate": 5.894682074344554e-06, "loss": 0.8706, "step": 4007 }, { "epoch": 0.65, "grad_norm": 3.0646617811074304, "learning_rate": 5.889923289036732e-06, "loss": 0.9023, "step": 4008 }, { "epoch": 0.65, "grad_norm": 2.6352642653556586, "learning_rate": 5.885165623427106e-06, "loss": 0.9378, "step": 4009 }, { "epoch": 0.65, "grad_norm": 2.7975105969975735, "learning_rate": 5.880409078811784e-06, "loss": 0.8975, "step": 4010 }, { "epoch": 0.65, "grad_norm": 3.6187633929227148, "learning_rate": 5.875653656486588e-06, "loss": 0.9131, "step": 4011 }, { "epoch": 0.65, "grad_norm": 4.2838953018145975, "learning_rate": 5.870899357747029e-06, "loss": 0.9167, "step": 4012 }, { "epoch": 0.65, "grad_norm": 2.749703477998845, "learning_rate": 5.8661461838882995e-06, "loss": 0.8556, "step": 4013 }, { "epoch": 0.65, "grad_norm": 2.1860528791406604, "learning_rate": 5.861394136205301e-06, "loss": 0.9343, "step": 4014 }, { "epoch": 0.65, "grad_norm": 3.290939556567095, "learning_rate": 5.8566432159926215e-06, "loss": 0.868, "step": 4015 }, { "epoch": 0.65, "grad_norm": 3.273406618104627, "learning_rate": 5.851893424544541e-06, "loss": 0.8655, "step": 4016 }, { "epoch": 0.65, "grad_norm": 2.913815567149505, "learning_rate": 5.8471447631550335e-06, "loss": 0.9438, "step": 4017 }, { "epoch": 0.65, "grad_norm": 3.328134026313684, "learning_rate": 5.842397233117771e-06, "loss": 0.9732, "step": 4018 }, { "epoch": 0.65, "grad_norm": 1.7169829561768393, "learning_rate": 5.837650835726105e-06, "loss": 0.8915, "step": 4019 }, { "epoch": 0.65, "grad_norm": 2.659133961172681, "learning_rate": 5.832905572273084e-06, "loss": 0.8736, "step": 4020 }, { "epoch": 0.65, "grad_norm": 3.7161189156056182, "learning_rate": 5.828161444051456e-06, "loss": 0.9515, "step": 4021 }, { "epoch": 0.65, "grad_norm": 1.9378931200457004, "learning_rate": 5.823418452353649e-06, "loss": 0.9072, "step": 4022 }, { "epoch": 0.65, "grad_norm": 2.4661195479015685, "learning_rate": 5.8186765984717795e-06, "loss": 0.8588, "step": 4023 }, { "epoch": 0.65, "grad_norm": 3.2694756834715886, "learning_rate": 5.813935883697668e-06, "loss": 0.8483, "step": 4024 }, { "epoch": 0.65, "grad_norm": 2.636887382483676, "learning_rate": 5.809196309322811e-06, "loss": 0.85, "step": 4025 }, { "epoch": 0.65, "grad_norm": 3.2900719622594434, "learning_rate": 5.8044578766383945e-06, "loss": 0.8631, "step": 4026 }, { "epoch": 0.65, "grad_norm": 3.752440103742861, "learning_rate": 5.799720586935304e-06, "loss": 0.7194, "step": 4027 }, { "epoch": 0.65, "grad_norm": 3.011094084203172, "learning_rate": 5.794984441504114e-06, "loss": 0.9293, "step": 4028 }, { "epoch": 0.65, "grad_norm": 2.2332572008318117, "learning_rate": 5.790249441635067e-06, "loss": 0.3321, "step": 4029 }, { "epoch": 0.65, "grad_norm": 1.58455541362268, "learning_rate": 5.785515588618115e-06, "loss": 0.8801, "step": 4030 }, { "epoch": 0.65, "grad_norm": 3.706757256981177, "learning_rate": 5.780782883742892e-06, "loss": 0.8301, "step": 4031 }, { "epoch": 0.65, "grad_norm": 3.443398980114689, "learning_rate": 5.7760513282987164e-06, "loss": 0.8222, "step": 4032 }, { "epoch": 0.65, "grad_norm": 3.38993835528204, "learning_rate": 5.771320923574588e-06, "loss": 0.8486, "step": 4033 }, { "epoch": 0.65, "grad_norm": 3.666783735544931, "learning_rate": 5.7665916708592095e-06, "loss": 0.8822, "step": 4034 }, { "epoch": 0.65, "grad_norm": 3.1933548953523627, "learning_rate": 5.761863571440948e-06, "loss": 0.8478, "step": 4035 }, { "epoch": 0.65, "grad_norm": 3.5253746645241204, "learning_rate": 5.757136626607881e-06, "loss": 0.853, "step": 4036 }, { "epoch": 0.65, "grad_norm": 3.3281579530902334, "learning_rate": 5.75241083764775e-06, "loss": 0.8609, "step": 4037 }, { "epoch": 0.65, "grad_norm": 2.176018583821432, "learning_rate": 5.747686205847991e-06, "loss": 0.9372, "step": 4038 }, { "epoch": 0.65, "grad_norm": 2.310711658886482, "learning_rate": 5.742962732495727e-06, "loss": 0.8712, "step": 4039 }, { "epoch": 0.65, "grad_norm": 3.4938768230751607, "learning_rate": 5.738240418877758e-06, "loss": 0.9161, "step": 4040 }, { "epoch": 0.65, "grad_norm": 1.6041796630068696, "learning_rate": 5.7335192662805784e-06, "loss": 0.9333, "step": 4041 }, { "epoch": 0.65, "grad_norm": 4.14533883728794, "learning_rate": 5.728799275990352e-06, "loss": 0.7674, "step": 4042 }, { "epoch": 0.65, "grad_norm": 1.5963219478108488, "learning_rate": 5.7240804492929435e-06, "loss": 0.352, "step": 4043 }, { "epoch": 0.65, "grad_norm": 3.860535234887086, "learning_rate": 5.7193627874738874e-06, "loss": 0.9112, "step": 4044 }, { "epoch": 0.65, "grad_norm": 3.004917723533794, "learning_rate": 5.714646291818401e-06, "loss": 0.8506, "step": 4045 }, { "epoch": 0.65, "grad_norm": 2.149900403044633, "learning_rate": 5.709930963611394e-06, "loss": 0.3348, "step": 4046 }, { "epoch": 0.65, "grad_norm": 2.0715864760248186, "learning_rate": 5.70521680413745e-06, "loss": 0.9082, "step": 4047 }, { "epoch": 0.65, "grad_norm": 2.5463658859556477, "learning_rate": 5.700503814680831e-06, "loss": 0.8308, "step": 4048 }, { "epoch": 0.65, "grad_norm": 3.971972381334429, "learning_rate": 5.695791996525488e-06, "loss": 0.9193, "step": 4049 }, { "epoch": 0.65, "grad_norm": 2.875114604490165, "learning_rate": 5.691081350955061e-06, "loss": 0.9352, "step": 4050 }, { "epoch": 0.65, "grad_norm": 2.8681035462418945, "learning_rate": 5.686371879252841e-06, "loss": 0.8565, "step": 4051 }, { "epoch": 0.65, "grad_norm": 3.794207856705757, "learning_rate": 5.681663582701827e-06, "loss": 0.8685, "step": 4052 }, { "epoch": 0.65, "grad_norm": 2.9590599528817485, "learning_rate": 5.676956462584693e-06, "loss": 0.8561, "step": 4053 }, { "epoch": 0.65, "grad_norm": 2.3632105288682306, "learning_rate": 5.672250520183784e-06, "loss": 0.8981, "step": 4054 }, { "epoch": 0.65, "grad_norm": 1.9271435358764752, "learning_rate": 5.6675457567811235e-06, "loss": 0.8596, "step": 4055 }, { "epoch": 0.65, "grad_norm": 2.4861155237008354, "learning_rate": 5.662842173658429e-06, "loss": 0.917, "step": 4056 }, { "epoch": 0.65, "grad_norm": 4.005034616156435, "learning_rate": 5.65813977209708e-06, "loss": 0.8298, "step": 4057 }, { "epoch": 0.65, "grad_norm": 3.1095219102974845, "learning_rate": 5.653438553378137e-06, "loss": 0.877, "step": 4058 }, { "epoch": 0.65, "grad_norm": 3.1681427443356034, "learning_rate": 5.648738518782346e-06, "loss": 0.8861, "step": 4059 }, { "epoch": 0.65, "grad_norm": 2.121438576223792, "learning_rate": 5.64403966959013e-06, "loss": 0.3237, "step": 4060 }, { "epoch": 0.65, "grad_norm": 2.89773658030166, "learning_rate": 5.639342007081581e-06, "loss": 0.8943, "step": 4061 }, { "epoch": 0.65, "grad_norm": 4.234078597501966, "learning_rate": 5.6346455325364664e-06, "loss": 0.9201, "step": 4062 }, { "epoch": 0.65, "grad_norm": 3.0872042989249646, "learning_rate": 5.629950247234246e-06, "loss": 0.8791, "step": 4063 }, { "epoch": 0.65, "grad_norm": 2.558609963867996, "learning_rate": 5.625256152454035e-06, "loss": 0.917, "step": 4064 }, { "epoch": 0.65, "grad_norm": 3.214828175985591, "learning_rate": 5.620563249474642e-06, "loss": 0.8878, "step": 4065 }, { "epoch": 0.66, "grad_norm": 2.424598196503855, "learning_rate": 5.615871539574539e-06, "loss": 0.9103, "step": 4066 }, { "epoch": 0.66, "grad_norm": 2.4053116987796264, "learning_rate": 5.611181024031875e-06, "loss": 0.9264, "step": 4067 }, { "epoch": 0.66, "grad_norm": 4.491385585692918, "learning_rate": 5.606491704124482e-06, "loss": 0.8741, "step": 4068 }, { "epoch": 0.66, "grad_norm": 2.8014490669463945, "learning_rate": 5.6018035811298566e-06, "loss": 0.8205, "step": 4069 }, { "epoch": 0.66, "grad_norm": 2.171471976889694, "learning_rate": 5.597116656325169e-06, "loss": 0.837, "step": 4070 }, { "epoch": 0.66, "grad_norm": 2.5576690167524356, "learning_rate": 5.5924309309872705e-06, "loss": 0.8984, "step": 4071 }, { "epoch": 0.66, "grad_norm": 2.6783709169451853, "learning_rate": 5.587746406392689e-06, "loss": 0.899, "step": 4072 }, { "epoch": 0.66, "grad_norm": 1.8031660660376223, "learning_rate": 5.583063083817604e-06, "loss": 0.922, "step": 4073 }, { "epoch": 0.66, "grad_norm": 2.958299896337821, "learning_rate": 5.578380964537889e-06, "loss": 0.9101, "step": 4074 }, { "epoch": 0.66, "grad_norm": 3.214122964886478, "learning_rate": 5.573700049829083e-06, "loss": 0.8553, "step": 4075 }, { "epoch": 0.66, "grad_norm": 2.923940652581604, "learning_rate": 5.569020340966398e-06, "loss": 0.8712, "step": 4076 }, { "epoch": 0.66, "grad_norm": 3.212382119238756, "learning_rate": 5.564341839224707e-06, "loss": 0.941, "step": 4077 }, { "epoch": 0.66, "grad_norm": 2.012148912974842, "learning_rate": 5.55966454587857e-06, "loss": 0.9272, "step": 4078 }, { "epoch": 0.66, "grad_norm": 3.0951160536100972, "learning_rate": 5.5549884622022165e-06, "loss": 0.8395, "step": 4079 }, { "epoch": 0.66, "grad_norm": 3.682864751109302, "learning_rate": 5.550313589469525e-06, "loss": 0.9285, "step": 4080 }, { "epoch": 0.66, "grad_norm": 4.106463775411824, "learning_rate": 5.54563992895407e-06, "loss": 0.9291, "step": 4081 }, { "epoch": 0.66, "grad_norm": 3.156563469456387, "learning_rate": 5.540967481929085e-06, "loss": 0.8764, "step": 4082 }, { "epoch": 0.66, "grad_norm": 2.598201562534767, "learning_rate": 5.536296249667472e-06, "loss": 0.9253, "step": 4083 }, { "epoch": 0.66, "grad_norm": 3.0567569990825945, "learning_rate": 5.531626233441802e-06, "loss": 0.8897, "step": 4084 }, { "epoch": 0.66, "grad_norm": 2.8137396729464927, "learning_rate": 5.526957434524319e-06, "loss": 0.8287, "step": 4085 }, { "epoch": 0.66, "grad_norm": 2.4738389692455844, "learning_rate": 5.522289854186928e-06, "loss": 0.8807, "step": 4086 }, { "epoch": 0.66, "grad_norm": 3.2550816056061134, "learning_rate": 5.517623493701213e-06, "loss": 0.8912, "step": 4087 }, { "epoch": 0.66, "grad_norm": 4.160026498123286, "learning_rate": 5.512958354338416e-06, "loss": 0.8668, "step": 4088 }, { "epoch": 0.66, "grad_norm": 2.779915135686896, "learning_rate": 5.508294437369445e-06, "loss": 0.9499, "step": 4089 }, { "epoch": 0.66, "grad_norm": 2.160076560676922, "learning_rate": 5.503631744064888e-06, "loss": 0.844, "step": 4090 }, { "epoch": 0.66, "grad_norm": 2.965364151668721, "learning_rate": 5.498970275694987e-06, "loss": 0.9544, "step": 4091 }, { "epoch": 0.66, "grad_norm": 3.396411791876474, "learning_rate": 5.494310033529651e-06, "loss": 0.8879, "step": 4092 }, { "epoch": 0.66, "grad_norm": 3.114054929548939, "learning_rate": 5.489651018838462e-06, "loss": 0.8498, "step": 4093 }, { "epoch": 0.66, "grad_norm": 2.8450388241014446, "learning_rate": 5.48499323289067e-06, "loss": 0.8676, "step": 4094 }, { "epoch": 0.66, "grad_norm": 3.4282075995454324, "learning_rate": 5.480336676955174e-06, "loss": 0.9094, "step": 4095 }, { "epoch": 0.66, "grad_norm": 2.64327986845551, "learning_rate": 5.475681352300549e-06, "loss": 0.8945, "step": 4096 }, { "epoch": 0.66, "grad_norm": 3.610511105566691, "learning_rate": 5.471027260195043e-06, "loss": 0.884, "step": 4097 }, { "epoch": 0.66, "grad_norm": 1.1139889793159052, "learning_rate": 5.466374401906552e-06, "loss": 0.9639, "step": 4098 }, { "epoch": 0.66, "grad_norm": 1.9525731031772775, "learning_rate": 5.461722778702641e-06, "loss": 0.9, "step": 4099 }, { "epoch": 0.66, "grad_norm": 2.2132143623863665, "learning_rate": 5.457072391850543e-06, "loss": 0.3148, "step": 4100 }, { "epoch": 0.66, "grad_norm": 2.479911681571498, "learning_rate": 5.45242324261716e-06, "loss": 0.8956, "step": 4101 }, { "epoch": 0.66, "grad_norm": 2.679177104955325, "learning_rate": 5.447775332269032e-06, "loss": 0.8665, "step": 4102 }, { "epoch": 0.66, "grad_norm": 3.929150038380933, "learning_rate": 5.443128662072388e-06, "loss": 0.8594, "step": 4103 }, { "epoch": 0.66, "grad_norm": 3.681653447217393, "learning_rate": 5.43848323329311e-06, "loss": 0.8604, "step": 4104 }, { "epoch": 0.66, "grad_norm": 4.038081804524789, "learning_rate": 5.433839047196738e-06, "loss": 0.8743, "step": 4105 }, { "epoch": 0.66, "grad_norm": 4.022613974427059, "learning_rate": 5.429196105048473e-06, "loss": 0.8816, "step": 4106 }, { "epoch": 0.66, "grad_norm": 3.352280475390022, "learning_rate": 5.424554408113188e-06, "loss": 0.8625, "step": 4107 }, { "epoch": 0.66, "grad_norm": 3.0359713565746786, "learning_rate": 5.4199139576554046e-06, "loss": 0.8475, "step": 4108 }, { "epoch": 0.66, "grad_norm": 1.1807694179077879, "learning_rate": 5.4152747549393055e-06, "loss": 0.3215, "step": 4109 }, { "epoch": 0.66, "grad_norm": 1.632830679030268, "learning_rate": 5.410636801228748e-06, "loss": 0.9818, "step": 4110 }, { "epoch": 0.66, "grad_norm": 2.721082541798353, "learning_rate": 5.406000097787226e-06, "loss": 0.8899, "step": 4111 }, { "epoch": 0.66, "grad_norm": 3.098316867805822, "learning_rate": 5.4013646458779175e-06, "loss": 0.783, "step": 4112 }, { "epoch": 0.66, "grad_norm": 3.1548836649440903, "learning_rate": 5.396730446763641e-06, "loss": 0.9193, "step": 4113 }, { "epoch": 0.66, "grad_norm": 2.9377649066057643, "learning_rate": 5.392097501706877e-06, "loss": 0.9148, "step": 4114 }, { "epoch": 0.66, "grad_norm": 2.225625145346281, "learning_rate": 5.38746581196977e-06, "loss": 0.906, "step": 4115 }, { "epoch": 0.66, "grad_norm": 2.3173588407090837, "learning_rate": 5.382835378814129e-06, "loss": 0.9157, "step": 4116 }, { "epoch": 0.66, "grad_norm": 2.959214486720753, "learning_rate": 5.378206203501397e-06, "loss": 0.9346, "step": 4117 }, { "epoch": 0.66, "grad_norm": 1.9417780130683122, "learning_rate": 5.373578287292694e-06, "loss": 0.895, "step": 4118 }, { "epoch": 0.66, "grad_norm": 2.7890111239961324, "learning_rate": 5.3689516314488e-06, "loss": 0.7944, "step": 4119 }, { "epoch": 0.66, "grad_norm": 3.22217947467496, "learning_rate": 5.364326237230135e-06, "loss": 0.8699, "step": 4120 }, { "epoch": 0.66, "grad_norm": 2.867954910232458, "learning_rate": 5.359702105896783e-06, "loss": 0.8699, "step": 4121 }, { "epoch": 0.66, "grad_norm": 2.163284106681483, "learning_rate": 5.355079238708487e-06, "loss": 0.9357, "step": 4122 }, { "epoch": 0.66, "grad_norm": 2.626680472026246, "learning_rate": 5.350457636924654e-06, "loss": 0.9012, "step": 4123 }, { "epoch": 0.66, "grad_norm": 3.411143715986009, "learning_rate": 5.345837301804317e-06, "loss": 0.9099, "step": 4124 }, { "epoch": 0.66, "grad_norm": 2.8917379350335515, "learning_rate": 5.341218234606192e-06, "loss": 0.9418, "step": 4125 }, { "epoch": 0.66, "grad_norm": 2.863852408820475, "learning_rate": 5.336600436588644e-06, "loss": 0.8591, "step": 4126 }, { "epoch": 0.66, "grad_norm": 2.0665013195876316, "learning_rate": 5.331983909009685e-06, "loss": 0.8675, "step": 4127 }, { "epoch": 0.67, "grad_norm": 3.4094213718578916, "learning_rate": 5.327368653126978e-06, "loss": 0.9063, "step": 4128 }, { "epoch": 0.67, "grad_norm": 1.1225671423561243, "learning_rate": 5.322754670197859e-06, "loss": 0.9452, "step": 4129 }, { "epoch": 0.67, "grad_norm": 3.244064780306972, "learning_rate": 5.318141961479293e-06, "loss": 0.923, "step": 4130 }, { "epoch": 0.67, "grad_norm": 3.41177591879126, "learning_rate": 5.31353052822791e-06, "loss": 0.8799, "step": 4131 }, { "epoch": 0.67, "grad_norm": 2.912280972476125, "learning_rate": 5.3089203717e-06, "loss": 0.8568, "step": 4132 }, { "epoch": 0.67, "grad_norm": 2.8625757199063337, "learning_rate": 5.304311493151486e-06, "loss": 0.9385, "step": 4133 }, { "epoch": 0.67, "grad_norm": 3.4917637783436097, "learning_rate": 5.299703893837963e-06, "loss": 0.8785, "step": 4134 }, { "epoch": 0.67, "grad_norm": 2.9219047014491397, "learning_rate": 5.2950975750146635e-06, "loss": 0.8694, "step": 4135 }, { "epoch": 0.67, "grad_norm": 3.938499687083427, "learning_rate": 5.290492537936473e-06, "loss": 0.8871, "step": 4136 }, { "epoch": 0.67, "grad_norm": 1.6821488909371631, "learning_rate": 5.285888783857935e-06, "loss": 0.8839, "step": 4137 }, { "epoch": 0.67, "grad_norm": 3.7629590232903034, "learning_rate": 5.281286314033236e-06, "loss": 0.8135, "step": 4138 }, { "epoch": 0.67, "grad_norm": 2.7771456147459985, "learning_rate": 5.27668512971622e-06, "loss": 0.9444, "step": 4139 }, { "epoch": 0.67, "grad_norm": 2.569198506129432, "learning_rate": 5.27208523216037e-06, "loss": 0.9072, "step": 4140 }, { "epoch": 0.67, "grad_norm": 3.214426043641238, "learning_rate": 5.267486622618833e-06, "loss": 0.8653, "step": 4141 }, { "epoch": 0.67, "grad_norm": 2.2739547717814586, "learning_rate": 5.262889302344391e-06, "loss": 0.8538, "step": 4142 }, { "epoch": 0.67, "grad_norm": 2.4349294949141007, "learning_rate": 5.2582932725894785e-06, "loss": 0.8264, "step": 4143 }, { "epoch": 0.67, "grad_norm": 2.758451652486228, "learning_rate": 5.253698534606186e-06, "loss": 0.9081, "step": 4144 }, { "epoch": 0.67, "grad_norm": 2.2413375253602235, "learning_rate": 5.249105089646252e-06, "loss": 0.8664, "step": 4145 }, { "epoch": 0.67, "grad_norm": 4.1990438735388675, "learning_rate": 5.244512938961044e-06, "loss": 0.9078, "step": 4146 }, { "epoch": 0.67, "grad_norm": 3.1686633156501145, "learning_rate": 5.239922083801597e-06, "loss": 0.8929, "step": 4147 }, { "epoch": 0.67, "grad_norm": 1.8337616709082287, "learning_rate": 5.235332525418588e-06, "loss": 0.8821, "step": 4148 }, { "epoch": 0.67, "grad_norm": 2.7640062325347987, "learning_rate": 5.23074426506234e-06, "loss": 0.8999, "step": 4149 }, { "epoch": 0.67, "grad_norm": 3.947880458749625, "learning_rate": 5.226157303982815e-06, "loss": 0.9423, "step": 4150 }, { "epoch": 0.67, "grad_norm": 2.455961106567084, "learning_rate": 5.221571643429637e-06, "loss": 0.8789, "step": 4151 }, { "epoch": 0.67, "grad_norm": 3.893951342231187, "learning_rate": 5.216987284652061e-06, "loss": 0.874, "step": 4152 }, { "epoch": 0.67, "grad_norm": 2.853226852688091, "learning_rate": 5.21240422889899e-06, "loss": 0.9041, "step": 4153 }, { "epoch": 0.67, "grad_norm": 3.1245090861964413, "learning_rate": 5.20782247741898e-06, "loss": 0.9, "step": 4154 }, { "epoch": 0.67, "grad_norm": 2.9671372701160816, "learning_rate": 5.203242031460222e-06, "loss": 0.8498, "step": 4155 }, { "epoch": 0.67, "grad_norm": 3.6845642300092254, "learning_rate": 5.1986628922705605e-06, "loss": 0.8986, "step": 4156 }, { "epoch": 0.67, "grad_norm": 2.601455548811593, "learning_rate": 5.194085061097474e-06, "loss": 0.9126, "step": 4157 }, { "epoch": 0.67, "grad_norm": 2.434002249585807, "learning_rate": 5.189508539188097e-06, "loss": 0.8582, "step": 4158 }, { "epoch": 0.67, "grad_norm": 3.1016344499289934, "learning_rate": 5.1849333277891946e-06, "loss": 0.8918, "step": 4159 }, { "epoch": 0.67, "grad_norm": 2.7441728583238127, "learning_rate": 5.180359428147179e-06, "loss": 0.8609, "step": 4160 }, { "epoch": 0.67, "grad_norm": 3.190046452629504, "learning_rate": 5.175786841508113e-06, "loss": 0.8689, "step": 4161 }, { "epoch": 0.67, "grad_norm": 3.5796028650036833, "learning_rate": 5.1712155691176865e-06, "loss": 0.9213, "step": 4162 }, { "epoch": 0.67, "grad_norm": 2.460346259892476, "learning_rate": 5.166645612221251e-06, "loss": 0.9265, "step": 4163 }, { "epoch": 0.67, "grad_norm": 4.416032253741911, "learning_rate": 5.162076972063781e-06, "loss": 0.8014, "step": 4164 }, { "epoch": 0.67, "grad_norm": 3.7799901203248427, "learning_rate": 5.1575096498899e-06, "loss": 0.8683, "step": 4165 }, { "epoch": 0.67, "grad_norm": 4.431267458258539, "learning_rate": 5.152943646943876e-06, "loss": 0.887, "step": 4166 }, { "epoch": 0.67, "grad_norm": 1.768662666887596, "learning_rate": 5.148378964469615e-06, "loss": 0.8694, "step": 4167 }, { "epoch": 0.67, "grad_norm": 2.0541291729314723, "learning_rate": 5.143815603710654e-06, "loss": 0.8486, "step": 4168 }, { "epoch": 0.67, "grad_norm": 3.9947951547672935, "learning_rate": 5.139253565910185e-06, "loss": 0.7938, "step": 4169 }, { "epoch": 0.67, "grad_norm": 2.7720870907741464, "learning_rate": 5.134692852311035e-06, "loss": 0.837, "step": 4170 }, { "epoch": 0.67, "grad_norm": 2.95235017168609, "learning_rate": 5.1301334641556665e-06, "loss": 0.9009, "step": 4171 }, { "epoch": 0.67, "grad_norm": 3.1598426842462537, "learning_rate": 5.125575402686176e-06, "loss": 0.9538, "step": 4172 }, { "epoch": 0.67, "grad_norm": 2.4692083971366188, "learning_rate": 5.121018669144313e-06, "loss": 0.8985, "step": 4173 }, { "epoch": 0.67, "grad_norm": 3.51833893185641, "learning_rate": 5.116463264771456e-06, "loss": 0.8997, "step": 4174 }, { "epoch": 0.67, "grad_norm": 1.4575694217801332, "learning_rate": 5.111909190808617e-06, "loss": 0.8354, "step": 4175 }, { "epoch": 0.67, "grad_norm": 2.4207494273612005, "learning_rate": 5.107356448496459e-06, "loss": 0.8148, "step": 4176 }, { "epoch": 0.67, "grad_norm": 3.5930841243672926, "learning_rate": 5.102805039075267e-06, "loss": 0.8648, "step": 4177 }, { "epoch": 0.67, "grad_norm": 2.4510155616555727, "learning_rate": 5.098254963784979e-06, "loss": 0.9366, "step": 4178 }, { "epoch": 0.67, "grad_norm": 2.58432343695609, "learning_rate": 5.093706223865151e-06, "loss": 0.8787, "step": 4179 }, { "epoch": 0.67, "grad_norm": 3.3448715468120525, "learning_rate": 5.089158820554996e-06, "loss": 0.874, "step": 4180 }, { "epoch": 0.67, "grad_norm": 2.1300620205226872, "learning_rate": 5.084612755093346e-06, "loss": 0.3739, "step": 4181 }, { "epoch": 0.67, "grad_norm": 4.745245712558897, "learning_rate": 5.08006802871867e-06, "loss": 0.879, "step": 4182 }, { "epoch": 0.67, "grad_norm": 2.310052968858746, "learning_rate": 5.075524642669086e-06, "loss": 0.8831, "step": 4183 }, { "epoch": 0.67, "grad_norm": 2.020233091370706, "learning_rate": 5.07098259818233e-06, "loss": 0.3524, "step": 4184 }, { "epoch": 0.67, "grad_norm": 2.3952449463079386, "learning_rate": 5.066441896495786e-06, "loss": 0.8974, "step": 4185 }, { "epoch": 0.67, "grad_norm": 5.2591806703366055, "learning_rate": 5.061902538846466e-06, "loss": 0.8657, "step": 4186 }, { "epoch": 0.67, "grad_norm": 2.6464321229850922, "learning_rate": 5.057364526471008e-06, "loss": 0.9344, "step": 4187 }, { "epoch": 0.67, "grad_norm": 2.9994236074500567, "learning_rate": 5.052827860605702e-06, "loss": 0.9254, "step": 4188 }, { "epoch": 0.67, "grad_norm": 2.8855197672321364, "learning_rate": 5.048292542486457e-06, "loss": 0.8845, "step": 4189 }, { "epoch": 0.68, "grad_norm": 3.570944832330014, "learning_rate": 5.0437585733488135e-06, "loss": 0.838, "step": 4190 }, { "epoch": 0.68, "grad_norm": 2.219225443014983, "learning_rate": 5.039225954427953e-06, "loss": 0.8788, "step": 4191 }, { "epoch": 0.68, "grad_norm": 2.7064760637900362, "learning_rate": 5.034694686958692e-06, "loss": 0.8958, "step": 4192 }, { "epoch": 0.68, "grad_norm": 2.0136648893928903, "learning_rate": 5.030164772175469e-06, "loss": 0.8501, "step": 4193 }, { "epoch": 0.68, "grad_norm": 1.7977727223131237, "learning_rate": 5.025636211312351e-06, "loss": 0.8823, "step": 4194 }, { "epoch": 0.68, "grad_norm": 2.285336740000859, "learning_rate": 5.021109005603053e-06, "loss": 0.9415, "step": 4195 }, { "epoch": 0.68, "grad_norm": 4.055722966215336, "learning_rate": 5.016583156280906e-06, "loss": 0.8997, "step": 4196 }, { "epoch": 0.68, "grad_norm": 2.853296875973295, "learning_rate": 5.012058664578871e-06, "loss": 0.8945, "step": 4197 }, { "epoch": 0.68, "grad_norm": 3.462724418115451, "learning_rate": 5.007535531729548e-06, "loss": 0.9308, "step": 4198 }, { "epoch": 0.68, "grad_norm": 1.592788106826225, "learning_rate": 5.003013758965171e-06, "loss": 0.9377, "step": 4199 }, { "epoch": 0.68, "grad_norm": 4.044303872619726, "learning_rate": 4.9984933475175865e-06, "loss": 0.7803, "step": 4200 }, { "epoch": 0.68, "grad_norm": 3.115613650011454, "learning_rate": 4.9939742986182795e-06, "loss": 0.8679, "step": 4201 }, { "epoch": 0.68, "grad_norm": 4.365694449329035, "learning_rate": 4.989456613498368e-06, "loss": 0.8577, "step": 4202 }, { "epoch": 0.68, "grad_norm": 2.588068795458663, "learning_rate": 4.9849402933885915e-06, "loss": 0.9019, "step": 4203 }, { "epoch": 0.68, "grad_norm": 3.4626248554069825, "learning_rate": 4.980425339519316e-06, "loss": 0.8673, "step": 4204 }, { "epoch": 0.68, "grad_norm": 3.620987216736216, "learning_rate": 4.975911753120548e-06, "loss": 0.9235, "step": 4205 }, { "epoch": 0.68, "grad_norm": 2.1027118702208063, "learning_rate": 4.971399535421904e-06, "loss": 0.8963, "step": 4206 }, { "epoch": 0.68, "grad_norm": 2.2351198888792463, "learning_rate": 4.966888687652645e-06, "loss": 0.884, "step": 4207 }, { "epoch": 0.68, "grad_norm": 4.10030440154176, "learning_rate": 4.9623792110416454e-06, "loss": 0.8722, "step": 4208 }, { "epoch": 0.68, "grad_norm": 2.5809355241717373, "learning_rate": 4.9578711068174076e-06, "loss": 0.8846, "step": 4209 }, { "epoch": 0.68, "grad_norm": 3.5933133067302534, "learning_rate": 4.953364376208072e-06, "loss": 0.8715, "step": 4210 }, { "epoch": 0.68, "grad_norm": 2.876944009076274, "learning_rate": 4.948859020441391e-06, "loss": 0.8185, "step": 4211 }, { "epoch": 0.68, "grad_norm": 3.3324485240260207, "learning_rate": 4.944355040744745e-06, "loss": 0.8476, "step": 4212 }, { "epoch": 0.68, "grad_norm": 2.8886159152477773, "learning_rate": 4.939852438345145e-06, "loss": 0.8716, "step": 4213 }, { "epoch": 0.68, "grad_norm": 1.5016999149064143, "learning_rate": 4.93535121446923e-06, "loss": 0.3378, "step": 4214 }, { "epoch": 0.68, "grad_norm": 2.3517513769597294, "learning_rate": 4.93085137034325e-06, "loss": 0.8725, "step": 4215 }, { "epoch": 0.68, "grad_norm": 4.538264665722732, "learning_rate": 4.926352907193086e-06, "loss": 0.8951, "step": 4216 }, { "epoch": 0.68, "grad_norm": 2.834567829541822, "learning_rate": 4.921855826244249e-06, "loss": 0.9674, "step": 4217 }, { "epoch": 0.68, "grad_norm": 1.8147953889128616, "learning_rate": 4.917360128721865e-06, "loss": 0.8936, "step": 4218 }, { "epoch": 0.68, "grad_norm": 2.751553443490872, "learning_rate": 4.912865815850682e-06, "loss": 0.9408, "step": 4219 }, { "epoch": 0.68, "grad_norm": 2.3388518968054455, "learning_rate": 4.908372888855078e-06, "loss": 0.8126, "step": 4220 }, { "epoch": 0.68, "grad_norm": 2.6832739849122174, "learning_rate": 4.903881348959055e-06, "loss": 0.8702, "step": 4221 }, { "epoch": 0.68, "grad_norm": 3.1977508866439592, "learning_rate": 4.899391197386229e-06, "loss": 0.9137, "step": 4222 }, { "epoch": 0.68, "grad_norm": 2.119671817425147, "learning_rate": 4.894902435359834e-06, "loss": 0.3188, "step": 4223 }, { "epoch": 0.68, "grad_norm": 2.2512981061015935, "learning_rate": 4.890415064102744e-06, "loss": 0.3142, "step": 4224 }, { "epoch": 0.68, "grad_norm": 3.1390630529316805, "learning_rate": 4.885929084837436e-06, "loss": 0.9259, "step": 4225 }, { "epoch": 0.68, "grad_norm": 2.9157642785074076, "learning_rate": 4.8814444987860125e-06, "loss": 0.892, "step": 4226 }, { "epoch": 0.68, "grad_norm": 2.78726702687336, "learning_rate": 4.876961307170204e-06, "loss": 0.8826, "step": 4227 }, { "epoch": 0.68, "grad_norm": 2.0385060921438325, "learning_rate": 4.87247951121135e-06, "loss": 0.9295, "step": 4228 }, { "epoch": 0.68, "grad_norm": 3.5514824471805904, "learning_rate": 4.867999112130422e-06, "loss": 0.8603, "step": 4229 }, { "epoch": 0.68, "grad_norm": 1.7588724289219853, "learning_rate": 4.863520111147999e-06, "loss": 0.8873, "step": 4230 }, { "epoch": 0.68, "grad_norm": 2.638429168901277, "learning_rate": 4.859042509484283e-06, "loss": 0.8593, "step": 4231 }, { "epoch": 0.68, "grad_norm": 4.3747233712070255, "learning_rate": 4.854566308359102e-06, "loss": 0.9433, "step": 4232 }, { "epoch": 0.68, "grad_norm": 2.6712377133977183, "learning_rate": 4.850091508991893e-06, "loss": 0.862, "step": 4233 }, { "epoch": 0.68, "grad_norm": 3.105289861936877, "learning_rate": 4.845618112601712e-06, "loss": 0.8457, "step": 4234 }, { "epoch": 0.68, "grad_norm": 3.594033934945121, "learning_rate": 4.841146120407239e-06, "loss": 0.8042, "step": 4235 }, { "epoch": 0.68, "grad_norm": 2.780683481189219, "learning_rate": 4.836675533626769e-06, "loss": 0.9267, "step": 4236 }, { "epoch": 0.68, "grad_norm": 3.0419188268642143, "learning_rate": 4.832206353478213e-06, "loss": 0.9174, "step": 4237 }, { "epoch": 0.68, "grad_norm": 4.21196512058231, "learning_rate": 4.8277385811790946e-06, "loss": 0.7914, "step": 4238 }, { "epoch": 0.68, "grad_norm": 2.572761366220324, "learning_rate": 4.823272217946563e-06, "loss": 0.917, "step": 4239 }, { "epoch": 0.68, "grad_norm": 2.450753882282392, "learning_rate": 4.8188072649973775e-06, "loss": 0.8998, "step": 4240 }, { "epoch": 0.68, "grad_norm": 2.9202187479726143, "learning_rate": 4.8143437235479085e-06, "loss": 0.8481, "step": 4241 }, { "epoch": 0.68, "grad_norm": 2.8207043827245846, "learning_rate": 4.809881594814154e-06, "loss": 0.833, "step": 4242 }, { "epoch": 0.68, "grad_norm": 2.15115129562462, "learning_rate": 4.805420880011723e-06, "loss": 0.9212, "step": 4243 }, { "epoch": 0.68, "grad_norm": 3.1097795232896623, "learning_rate": 4.800961580355833e-06, "loss": 0.8993, "step": 4244 }, { "epoch": 0.68, "grad_norm": 3.121385391225999, "learning_rate": 4.7965036970613175e-06, "loss": 0.8353, "step": 4245 }, { "epoch": 0.68, "grad_norm": 2.7469371732200507, "learning_rate": 4.792047231342635e-06, "loss": 0.887, "step": 4246 }, { "epoch": 0.68, "grad_norm": 2.0798511649948113, "learning_rate": 4.787592184413843e-06, "loss": 0.9018, "step": 4247 }, { "epoch": 0.68, "grad_norm": 3.314856500756174, "learning_rate": 4.783138557488618e-06, "loss": 0.9104, "step": 4248 }, { "epoch": 0.68, "grad_norm": 2.6496924743713626, "learning_rate": 4.778686351780257e-06, "loss": 0.867, "step": 4249 }, { "epoch": 0.68, "grad_norm": 3.0046873032147783, "learning_rate": 4.774235568501656e-06, "loss": 0.9113, "step": 4250 }, { "epoch": 0.68, "grad_norm": 3.3373384415174288, "learning_rate": 4.76978620886534e-06, "loss": 0.8783, "step": 4251 }, { "epoch": 0.69, "grad_norm": 2.8399859659089968, "learning_rate": 4.765338274083432e-06, "loss": 0.9098, "step": 4252 }, { "epoch": 0.69, "grad_norm": 3.3822002605700474, "learning_rate": 4.7608917653676675e-06, "loss": 0.8214, "step": 4253 }, { "epoch": 0.69, "grad_norm": 3.2331316488490014, "learning_rate": 4.7564466839294085e-06, "loss": 0.8415, "step": 4254 }, { "epoch": 0.69, "grad_norm": 3.2062849135849607, "learning_rate": 4.752003030979612e-06, "loss": 0.8521, "step": 4255 }, { "epoch": 0.69, "grad_norm": 1.696654531301831, "learning_rate": 4.747560807728847e-06, "loss": 0.8158, "step": 4256 }, { "epoch": 0.69, "grad_norm": 1.5125594450563262, "learning_rate": 4.743120015387302e-06, "loss": 0.3406, "step": 4257 }, { "epoch": 0.69, "grad_norm": 4.409934160774758, "learning_rate": 4.7386806551647766e-06, "loss": 0.9031, "step": 4258 }, { "epoch": 0.69, "grad_norm": 3.9426352302227694, "learning_rate": 4.73424272827067e-06, "loss": 0.9726, "step": 4259 }, { "epoch": 0.69, "grad_norm": 2.943976223103248, "learning_rate": 4.729806235913991e-06, "loss": 0.9067, "step": 4260 }, { "epoch": 0.69, "grad_norm": 3.286367585409932, "learning_rate": 4.725371179303371e-06, "loss": 0.8792, "step": 4261 }, { "epoch": 0.69, "grad_norm": 3.9513258628236105, "learning_rate": 4.720937559647038e-06, "loss": 0.8772, "step": 4262 }, { "epoch": 0.69, "grad_norm": 3.0855568952889105, "learning_rate": 4.716505378152827e-06, "loss": 0.9065, "step": 4263 }, { "epoch": 0.69, "grad_norm": 3.353911317065748, "learning_rate": 4.712074636028192e-06, "loss": 0.8312, "step": 4264 }, { "epoch": 0.69, "grad_norm": 2.7095126494362107, "learning_rate": 4.7076453344801965e-06, "loss": 0.8263, "step": 4265 }, { "epoch": 0.69, "grad_norm": 3.273471877616321, "learning_rate": 4.703217474715489e-06, "loss": 0.8732, "step": 4266 }, { "epoch": 0.69, "grad_norm": 2.5903688016505395, "learning_rate": 4.698791057940349e-06, "loss": 0.8628, "step": 4267 }, { "epoch": 0.69, "grad_norm": 3.5433572810581815, "learning_rate": 4.694366085360656e-06, "loss": 0.8731, "step": 4268 }, { "epoch": 0.69, "grad_norm": 4.0786104461429895, "learning_rate": 4.689942558181893e-06, "loss": 0.8704, "step": 4269 }, { "epoch": 0.69, "grad_norm": 2.6415286351994394, "learning_rate": 4.6855204776091445e-06, "loss": 0.8778, "step": 4270 }, { "epoch": 0.69, "grad_norm": 1.8175157009532736, "learning_rate": 4.681099844847117e-06, "loss": 0.8998, "step": 4271 }, { "epoch": 0.69, "grad_norm": 2.4225948125435544, "learning_rate": 4.6766806611001046e-06, "loss": 0.333, "step": 4272 }, { "epoch": 0.69, "grad_norm": 2.522228884536451, "learning_rate": 4.672262927572021e-06, "loss": 0.8653, "step": 4273 }, { "epoch": 0.69, "grad_norm": 3.0987569962607586, "learning_rate": 4.667846645466377e-06, "loss": 0.9113, "step": 4274 }, { "epoch": 0.69, "grad_norm": 2.120732229204876, "learning_rate": 4.663431815986284e-06, "loss": 0.8355, "step": 4275 }, { "epoch": 0.69, "grad_norm": 3.0820714756959853, "learning_rate": 4.659018440334472e-06, "loss": 0.8708, "step": 4276 }, { "epoch": 0.69, "grad_norm": 3.7826788858595743, "learning_rate": 4.654606519713258e-06, "loss": 0.7531, "step": 4277 }, { "epoch": 0.69, "grad_norm": 3.0633800565569143, "learning_rate": 4.6501960553245785e-06, "loss": 0.8747, "step": 4278 }, { "epoch": 0.69, "grad_norm": 3.9954409844569496, "learning_rate": 4.645787048369958e-06, "loss": 0.9178, "step": 4279 }, { "epoch": 0.69, "grad_norm": 2.880011792688598, "learning_rate": 4.641379500050538e-06, "loss": 0.8315, "step": 4280 }, { "epoch": 0.69, "grad_norm": 3.230916703704516, "learning_rate": 4.636973411567055e-06, "loss": 0.9005, "step": 4281 }, { "epoch": 0.69, "grad_norm": 4.638764801700777, "learning_rate": 4.632568784119842e-06, "loss": 0.8557, "step": 4282 }, { "epoch": 0.69, "grad_norm": 2.3343021447840275, "learning_rate": 4.628165618908851e-06, "loss": 0.9128, "step": 4283 }, { "epoch": 0.69, "grad_norm": 4.084682994107807, "learning_rate": 4.623763917133621e-06, "loss": 0.864, "step": 4284 }, { "epoch": 0.69, "grad_norm": 3.2670959372257666, "learning_rate": 4.619363679993293e-06, "loss": 0.9281, "step": 4285 }, { "epoch": 0.69, "grad_norm": 2.775148621436919, "learning_rate": 4.614964908686617e-06, "loss": 0.8978, "step": 4286 }, { "epoch": 0.69, "grad_norm": 3.1689680336723973, "learning_rate": 4.610567604411946e-06, "loss": 0.9299, "step": 4287 }, { "epoch": 0.69, "grad_norm": 2.861020307941489, "learning_rate": 4.606171768367213e-06, "loss": 0.886, "step": 4288 }, { "epoch": 0.69, "grad_norm": 1.9059164662223922, "learning_rate": 4.601777401749972e-06, "loss": 0.8423, "step": 4289 }, { "epoch": 0.69, "grad_norm": 2.6406923545057306, "learning_rate": 4.597384505757373e-06, "loss": 0.8553, "step": 4290 }, { "epoch": 0.69, "grad_norm": 3.18251021030703, "learning_rate": 4.592993081586159e-06, "loss": 0.9509, "step": 4291 }, { "epoch": 0.69, "grad_norm": 2.4357556679561916, "learning_rate": 4.588603130432671e-06, "loss": 0.8883, "step": 4292 }, { "epoch": 0.69, "grad_norm": 2.7889345283961795, "learning_rate": 4.58421465349286e-06, "loss": 0.9307, "step": 4293 }, { "epoch": 0.69, "grad_norm": 2.1959718413223928, "learning_rate": 4.579827651962264e-06, "loss": 0.8838, "step": 4294 }, { "epoch": 0.69, "grad_norm": 2.0121965452021424, "learning_rate": 4.5754421270360195e-06, "loss": 0.9049, "step": 4295 }, { "epoch": 0.69, "grad_norm": 3.2019526127126894, "learning_rate": 4.571058079908869e-06, "loss": 0.8978, "step": 4296 }, { "epoch": 0.69, "grad_norm": 3.039517797574553, "learning_rate": 4.566675511775151e-06, "loss": 0.9415, "step": 4297 }, { "epoch": 0.69, "grad_norm": 3.635683561800704, "learning_rate": 4.562294423828794e-06, "loss": 0.8797, "step": 4298 }, { "epoch": 0.69, "grad_norm": 1.6823967663636379, "learning_rate": 4.557914817263324e-06, "loss": 0.8766, "step": 4299 }, { "epoch": 0.69, "grad_norm": 3.564023411597905, "learning_rate": 4.553536693271872e-06, "loss": 0.8804, "step": 4300 }, { "epoch": 0.69, "grad_norm": 3.0724939260275317, "learning_rate": 4.5491600530471546e-06, "loss": 0.891, "step": 4301 }, { "epoch": 0.69, "grad_norm": 2.78128257475218, "learning_rate": 4.544784897781495e-06, "loss": 0.8093, "step": 4302 }, { "epoch": 0.69, "grad_norm": 4.017149402106879, "learning_rate": 4.540411228666805e-06, "loss": 0.931, "step": 4303 }, { "epoch": 0.69, "grad_norm": 2.82418135200105, "learning_rate": 4.536039046894584e-06, "loss": 0.954, "step": 4304 }, { "epoch": 0.69, "grad_norm": 3.461877837213503, "learning_rate": 4.531668353655948e-06, "loss": 0.9165, "step": 4305 }, { "epoch": 0.69, "grad_norm": 2.101707028095448, "learning_rate": 4.527299150141588e-06, "loss": 0.8133, "step": 4306 }, { "epoch": 0.69, "grad_norm": 2.166678489750613, "learning_rate": 4.52293143754179e-06, "loss": 0.9142, "step": 4307 }, { "epoch": 0.69, "grad_norm": 3.716185382649495, "learning_rate": 4.518565217046446e-06, "loss": 0.8542, "step": 4308 }, { "epoch": 0.69, "grad_norm": 3.258937723703093, "learning_rate": 4.51420048984504e-06, "loss": 0.8895, "step": 4309 }, { "epoch": 0.69, "grad_norm": 4.21696897988583, "learning_rate": 4.50983725712663e-06, "loss": 0.8886, "step": 4310 }, { "epoch": 0.69, "grad_norm": 1.61420035793663, "learning_rate": 4.505475520079889e-06, "loss": 0.3355, "step": 4311 }, { "epoch": 0.69, "grad_norm": 2.5132845778083053, "learning_rate": 4.501115279893077e-06, "loss": 0.9055, "step": 4312 }, { "epoch": 0.69, "grad_norm": 2.5047227120497415, "learning_rate": 4.4967565377540415e-06, "loss": 0.8968, "step": 4313 }, { "epoch": 0.7, "grad_norm": 3.8779096446771777, "learning_rate": 4.492399294850218e-06, "loss": 0.8751, "step": 4314 }, { "epoch": 0.7, "grad_norm": 2.075441621411973, "learning_rate": 4.488043552368649e-06, "loss": 0.8297, "step": 4315 }, { "epoch": 0.7, "grad_norm": 2.1430074979621563, "learning_rate": 4.483689311495954e-06, "loss": 0.8452, "step": 4316 }, { "epoch": 0.7, "grad_norm": 3.484902517304429, "learning_rate": 4.4793365734183445e-06, "loss": 0.9462, "step": 4317 }, { "epoch": 0.7, "grad_norm": 3.2818509959278552, "learning_rate": 4.47498533932163e-06, "loss": 0.8236, "step": 4318 }, { "epoch": 0.7, "grad_norm": 2.568943667942386, "learning_rate": 4.470635610391212e-06, "loss": 0.809, "step": 4319 }, { "epoch": 0.7, "grad_norm": 3.4003322887730056, "learning_rate": 4.466287387812071e-06, "loss": 0.84, "step": 4320 }, { "epoch": 0.7, "grad_norm": 3.488503785419857, "learning_rate": 4.461940672768779e-06, "loss": 0.8909, "step": 4321 }, { "epoch": 0.7, "grad_norm": 2.7345816397924456, "learning_rate": 4.457595466445509e-06, "loss": 0.9022, "step": 4322 }, { "epoch": 0.7, "grad_norm": 1.3431506927432781, "learning_rate": 4.453251770026011e-06, "loss": 0.8971, "step": 4323 }, { "epoch": 0.7, "grad_norm": 3.5717659627403266, "learning_rate": 4.448909584693626e-06, "loss": 0.8965, "step": 4324 }, { "epoch": 0.7, "grad_norm": 2.432742243875063, "learning_rate": 4.444568911631289e-06, "loss": 0.8883, "step": 4325 }, { "epoch": 0.7, "grad_norm": 2.2152991524691243, "learning_rate": 4.4402297520215136e-06, "loss": 0.9004, "step": 4326 }, { "epoch": 0.7, "grad_norm": 3.579226528343621, "learning_rate": 4.435892107046414e-06, "loss": 0.8236, "step": 4327 }, { "epoch": 0.7, "grad_norm": 3.0398166377834723, "learning_rate": 4.431555977887679e-06, "loss": 0.961, "step": 4328 }, { "epoch": 0.7, "grad_norm": 2.6837031135318994, "learning_rate": 4.427221365726586e-06, "loss": 0.8771, "step": 4329 }, { "epoch": 0.7, "grad_norm": 3.232690492284823, "learning_rate": 4.422888271744009e-06, "loss": 0.8584, "step": 4330 }, { "epoch": 0.7, "grad_norm": 1.9206150546004173, "learning_rate": 4.418556697120408e-06, "loss": 0.3265, "step": 4331 }, { "epoch": 0.7, "grad_norm": 2.8220642229175943, "learning_rate": 4.4142266430358085e-06, "loss": 0.9564, "step": 4332 }, { "epoch": 0.7, "grad_norm": 2.6290638665255015, "learning_rate": 4.409898110669844e-06, "loss": 0.8996, "step": 4333 }, { "epoch": 0.7, "grad_norm": 3.542135963014542, "learning_rate": 4.40557110120173e-06, "loss": 0.8801, "step": 4334 }, { "epoch": 0.7, "grad_norm": 2.731493843419418, "learning_rate": 4.40124561581026e-06, "loss": 0.9294, "step": 4335 }, { "epoch": 0.7, "grad_norm": 1.936849177170856, "learning_rate": 4.39692165567381e-06, "loss": 0.876, "step": 4336 }, { "epoch": 0.7, "grad_norm": 2.5335361847056075, "learning_rate": 4.392599221970351e-06, "loss": 0.9069, "step": 4337 }, { "epoch": 0.7, "grad_norm": 2.326860225117243, "learning_rate": 4.388278315877441e-06, "loss": 0.8551, "step": 4338 }, { "epoch": 0.7, "grad_norm": 3.653383092843028, "learning_rate": 4.3839589385721985e-06, "loss": 0.9291, "step": 4339 }, { "epoch": 0.7, "grad_norm": 4.513352188416562, "learning_rate": 4.379641091231348e-06, "loss": 0.9028, "step": 4340 }, { "epoch": 0.7, "grad_norm": 3.1290030208486503, "learning_rate": 4.375324775031194e-06, "loss": 0.8408, "step": 4341 }, { "epoch": 0.7, "grad_norm": 3.4587438102311094, "learning_rate": 4.3710099911476155e-06, "loss": 0.8318, "step": 4342 }, { "epoch": 0.7, "grad_norm": 3.047313169615315, "learning_rate": 4.3666967407560765e-06, "loss": 0.8502, "step": 4343 }, { "epoch": 0.7, "grad_norm": 3.3375227509483953, "learning_rate": 4.362385025031631e-06, "loss": 0.8331, "step": 4344 }, { "epoch": 0.7, "grad_norm": 2.911086129751915, "learning_rate": 4.3580748451489075e-06, "loss": 0.8692, "step": 4345 }, { "epoch": 0.7, "grad_norm": 2.7891302020406914, "learning_rate": 4.353766202282113e-06, "loss": 0.8266, "step": 4346 }, { "epoch": 0.7, "grad_norm": 3.0513913842512586, "learning_rate": 4.349459097605048e-06, "loss": 0.8654, "step": 4347 }, { "epoch": 0.7, "grad_norm": 3.332044272715979, "learning_rate": 4.3451535322910786e-06, "loss": 0.9009, "step": 4348 }, { "epoch": 0.7, "grad_norm": 1.7833384935654941, "learning_rate": 4.340849507513168e-06, "loss": 0.3162, "step": 4349 }, { "epoch": 0.7, "grad_norm": 2.932646129768938, "learning_rate": 4.336547024443847e-06, "loss": 0.8597, "step": 4350 }, { "epoch": 0.7, "grad_norm": 3.478984135086364, "learning_rate": 4.332246084255227e-06, "loss": 0.8297, "step": 4351 }, { "epoch": 0.7, "grad_norm": 3.379304472108123, "learning_rate": 4.327946688119006e-06, "loss": 0.8482, "step": 4352 }, { "epoch": 0.7, "grad_norm": 3.9017717249589277, "learning_rate": 4.3236488372064656e-06, "loss": 0.9122, "step": 4353 }, { "epoch": 0.7, "grad_norm": 1.990485268184494, "learning_rate": 4.319352532688444e-06, "loss": 0.9038, "step": 4354 }, { "epoch": 0.7, "grad_norm": 2.0004985903101167, "learning_rate": 4.31505777573538e-06, "loss": 0.9011, "step": 4355 }, { "epoch": 0.7, "grad_norm": 2.3618578446818317, "learning_rate": 4.310764567517288e-06, "loss": 0.8351, "step": 4356 }, { "epoch": 0.7, "grad_norm": 1.9764130426525819, "learning_rate": 4.306472909203754e-06, "loss": 0.3084, "step": 4357 }, { "epoch": 0.7, "grad_norm": 2.4008611802941577, "learning_rate": 4.302182801963937e-06, "loss": 0.8867, "step": 4358 }, { "epoch": 0.7, "grad_norm": 3.0074862216497493, "learning_rate": 4.297894246966586e-06, "loss": 0.8558, "step": 4359 }, { "epoch": 0.7, "grad_norm": 2.682686864880514, "learning_rate": 4.29360724538003e-06, "loss": 0.862, "step": 4360 }, { "epoch": 0.7, "grad_norm": 3.254913137656888, "learning_rate": 4.28932179837215e-06, "loss": 0.8633, "step": 4361 }, { "epoch": 0.7, "grad_norm": 2.832366965554336, "learning_rate": 4.2850379071104286e-06, "loss": 0.8491, "step": 4362 }, { "epoch": 0.7, "grad_norm": 2.2973428885419303, "learning_rate": 4.280755572761919e-06, "loss": 0.8848, "step": 4363 }, { "epoch": 0.7, "grad_norm": 3.4276424210867584, "learning_rate": 4.276474796493243e-06, "loss": 0.855, "step": 4364 }, { "epoch": 0.7, "grad_norm": 3.6454571920457504, "learning_rate": 4.2721955794705985e-06, "loss": 0.9515, "step": 4365 }, { "epoch": 0.7, "grad_norm": 2.3991528168155187, "learning_rate": 4.267917922859769e-06, "loss": 0.8466, "step": 4366 }, { "epoch": 0.7, "grad_norm": 2.2474704934836387, "learning_rate": 4.263641827826104e-06, "loss": 0.9476, "step": 4367 }, { "epoch": 0.7, "grad_norm": 3.3800253012799297, "learning_rate": 4.259367295534524e-06, "loss": 0.8837, "step": 4368 }, { "epoch": 0.7, "grad_norm": 2.965094313255318, "learning_rate": 4.255094327149539e-06, "loss": 0.8633, "step": 4369 }, { "epoch": 0.7, "grad_norm": 3.742185206402611, "learning_rate": 4.250822923835214e-06, "loss": 0.8364, "step": 4370 }, { "epoch": 0.7, "grad_norm": 2.6987395240682845, "learning_rate": 4.2465530867552065e-06, "loss": 0.8932, "step": 4371 }, { "epoch": 0.7, "grad_norm": 3.1566379422012485, "learning_rate": 4.242284817072732e-06, "loss": 0.8792, "step": 4372 }, { "epoch": 0.7, "grad_norm": 2.958737484543265, "learning_rate": 4.2380181159505815e-06, "loss": 0.9195, "step": 4373 }, { "epoch": 0.7, "grad_norm": 3.341919727842994, "learning_rate": 4.23375298455113e-06, "loss": 0.8535, "step": 4374 }, { "epoch": 0.7, "grad_norm": 3.038006047296187, "learning_rate": 4.22948942403631e-06, "loss": 0.9284, "step": 4375 }, { "epoch": 0.71, "grad_norm": 3.3784944316416725, "learning_rate": 4.2252274355676395e-06, "loss": 0.91, "step": 4376 }, { "epoch": 0.71, "grad_norm": 3.5627804026760996, "learning_rate": 4.220967020306194e-06, "loss": 0.9031, "step": 4377 }, { "epoch": 0.71, "grad_norm": 3.1775196380368547, "learning_rate": 4.216708179412636e-06, "loss": 0.9231, "step": 4378 }, { "epoch": 0.71, "grad_norm": 2.381841743215044, "learning_rate": 4.212450914047187e-06, "loss": 0.8949, "step": 4379 }, { "epoch": 0.71, "grad_norm": 1.6358543425869316, "learning_rate": 4.2081952253696415e-06, "loss": 0.8442, "step": 4380 }, { "epoch": 0.71, "grad_norm": 3.4367633377030606, "learning_rate": 4.203941114539367e-06, "loss": 0.8871, "step": 4381 }, { "epoch": 0.71, "grad_norm": 2.873586058186055, "learning_rate": 4.19968858271531e-06, "loss": 0.8827, "step": 4382 }, { "epoch": 0.71, "grad_norm": 2.5234346655484274, "learning_rate": 4.195437631055963e-06, "loss": 0.8563, "step": 4383 }, { "epoch": 0.71, "grad_norm": 3.7326817204977556, "learning_rate": 4.191188260719408e-06, "loss": 0.8584, "step": 4384 }, { "epoch": 0.71, "grad_norm": 2.7466996936602515, "learning_rate": 4.186940472863296e-06, "loss": 0.881, "step": 4385 }, { "epoch": 0.71, "grad_norm": 2.5576520511775205, "learning_rate": 4.182694268644837e-06, "loss": 0.8838, "step": 4386 }, { "epoch": 0.71, "grad_norm": 3.2907607526028966, "learning_rate": 4.178449649220809e-06, "loss": 0.8479, "step": 4387 }, { "epoch": 0.71, "grad_norm": 2.2334501046166246, "learning_rate": 4.174206615747575e-06, "loss": 0.8364, "step": 4388 }, { "epoch": 0.71, "grad_norm": 2.8646511740309593, "learning_rate": 4.169965169381045e-06, "loss": 0.9054, "step": 4389 }, { "epoch": 0.71, "grad_norm": 2.7637652162351256, "learning_rate": 4.165725311276707e-06, "loss": 0.8989, "step": 4390 }, { "epoch": 0.71, "grad_norm": 3.536371483764001, "learning_rate": 4.161487042589619e-06, "loss": 0.8883, "step": 4391 }, { "epoch": 0.71, "grad_norm": 3.433701792331015, "learning_rate": 4.157250364474398e-06, "loss": 0.8477, "step": 4392 }, { "epoch": 0.71, "grad_norm": 1.93975938297819, "learning_rate": 4.153015278085237e-06, "loss": 0.8288, "step": 4393 }, { "epoch": 0.71, "grad_norm": 3.688362537951597, "learning_rate": 4.148781784575888e-06, "loss": 0.8704, "step": 4394 }, { "epoch": 0.71, "grad_norm": 2.047111089937397, "learning_rate": 4.1445498850996664e-06, "loss": 0.8664, "step": 4395 }, { "epoch": 0.71, "grad_norm": 3.8128613160066678, "learning_rate": 4.1403195808094665e-06, "loss": 0.9142, "step": 4396 }, { "epoch": 0.71, "grad_norm": 2.4439269214012227, "learning_rate": 4.136090872857732e-06, "loss": 0.9462, "step": 4397 }, { "epoch": 0.71, "grad_norm": 2.1267353153011017, "learning_rate": 4.131863762396487e-06, "loss": 0.8445, "step": 4398 }, { "epoch": 0.71, "grad_norm": 1.694528706163086, "learning_rate": 4.127638250577305e-06, "loss": 0.903, "step": 4399 }, { "epoch": 0.71, "grad_norm": 3.023526606860773, "learning_rate": 4.12341433855134e-06, "loss": 0.8524, "step": 4400 }, { "epoch": 0.71, "grad_norm": 3.724314781504893, "learning_rate": 4.119192027469299e-06, "loss": 0.8047, "step": 4401 }, { "epoch": 0.71, "grad_norm": 1.7697340152264855, "learning_rate": 4.114971318481451e-06, "loss": 0.8532, "step": 4402 }, { "epoch": 0.71, "grad_norm": 2.0606579502052593, "learning_rate": 4.110752212737641e-06, "loss": 0.8544, "step": 4403 }, { "epoch": 0.71, "grad_norm": 2.496702976531498, "learning_rate": 4.106534711387267e-06, "loss": 0.9448, "step": 4404 }, { "epoch": 0.71, "grad_norm": 2.8976878716005117, "learning_rate": 4.102318815579288e-06, "loss": 0.864, "step": 4405 }, { "epoch": 0.71, "grad_norm": 2.3372233752412126, "learning_rate": 4.098104526462235e-06, "loss": 0.825, "step": 4406 }, { "epoch": 0.71, "grad_norm": 3.727650097819003, "learning_rate": 4.093891845184197e-06, "loss": 0.8382, "step": 4407 }, { "epoch": 0.71, "grad_norm": 3.178902793989274, "learning_rate": 4.0896807728928245e-06, "loss": 0.8718, "step": 4408 }, { "epoch": 0.71, "grad_norm": 3.668694859988768, "learning_rate": 4.085471310735325e-06, "loss": 0.8605, "step": 4409 }, { "epoch": 0.71, "grad_norm": 2.9819250785989646, "learning_rate": 4.081263459858479e-06, "loss": 0.8331, "step": 4410 }, { "epoch": 0.71, "grad_norm": 3.264852916214475, "learning_rate": 4.077057221408617e-06, "loss": 0.8631, "step": 4411 }, { "epoch": 0.71, "grad_norm": 2.777284688630105, "learning_rate": 4.072852596531631e-06, "loss": 0.7944, "step": 4412 }, { "epoch": 0.71, "grad_norm": 2.6073522894214207, "learning_rate": 4.0686495863729854e-06, "loss": 0.9353, "step": 4413 }, { "epoch": 0.71, "grad_norm": 3.0834889415618756, "learning_rate": 4.064448192077687e-06, "loss": 0.9432, "step": 4414 }, { "epoch": 0.71, "grad_norm": 3.7316399298648992, "learning_rate": 4.060248414790318e-06, "loss": 0.9169, "step": 4415 }, { "epoch": 0.71, "grad_norm": 3.315763485359703, "learning_rate": 4.0560502556550085e-06, "loss": 0.9756, "step": 4416 }, { "epoch": 0.71, "grad_norm": 4.043726576393833, "learning_rate": 4.051853715815459e-06, "loss": 0.8478, "step": 4417 }, { "epoch": 0.71, "grad_norm": 4.073700942346625, "learning_rate": 4.0476587964149185e-06, "loss": 0.8205, "step": 4418 }, { "epoch": 0.71, "grad_norm": 2.6489331167093133, "learning_rate": 4.0434654985961955e-06, "loss": 0.8959, "step": 4419 }, { "epoch": 0.71, "grad_norm": 2.4558775214120514, "learning_rate": 4.039273823501667e-06, "loss": 0.9277, "step": 4420 }, { "epoch": 0.71, "grad_norm": 3.0528124740089826, "learning_rate": 4.035083772273254e-06, "loss": 0.8969, "step": 4421 }, { "epoch": 0.71, "grad_norm": 2.7317224334026906, "learning_rate": 4.030895346052449e-06, "loss": 0.8711, "step": 4422 }, { "epoch": 0.71, "grad_norm": 1.9195259438838446, "learning_rate": 4.02670854598029e-06, "loss": 0.9224, "step": 4423 }, { "epoch": 0.71, "grad_norm": 3.9988925115930414, "learning_rate": 4.022523373197376e-06, "loss": 0.8365, "step": 4424 }, { "epoch": 0.71, "grad_norm": 3.318035323209639, "learning_rate": 4.018339828843868e-06, "loss": 0.9373, "step": 4425 }, { "epoch": 0.71, "grad_norm": 1.6729295916155011, "learning_rate": 4.014157914059475e-06, "loss": 0.9294, "step": 4426 }, { "epoch": 0.71, "grad_norm": 2.7908535717419913, "learning_rate": 4.009977629983464e-06, "loss": 0.9573, "step": 4427 }, { "epoch": 0.71, "grad_norm": 1.901122843084818, "learning_rate": 4.005798977754664e-06, "loss": 0.8809, "step": 4428 }, { "epoch": 0.71, "grad_norm": 2.795015963950656, "learning_rate": 4.001621958511456e-06, "loss": 0.8438, "step": 4429 }, { "epoch": 0.71, "grad_norm": 2.6978990610694615, "learning_rate": 3.9974465733917725e-06, "loss": 0.9051, "step": 4430 }, { "epoch": 0.71, "grad_norm": 2.331450883734547, "learning_rate": 3.993272823533101e-06, "loss": 0.8795, "step": 4431 }, { "epoch": 0.71, "grad_norm": 3.3694259390061188, "learning_rate": 3.989100710072491e-06, "loss": 0.927, "step": 4432 }, { "epoch": 0.71, "grad_norm": 2.3737061640431727, "learning_rate": 3.98493023414654e-06, "loss": 0.9338, "step": 4433 }, { "epoch": 0.71, "grad_norm": 3.084758755542323, "learning_rate": 3.980761396891396e-06, "loss": 0.9018, "step": 4434 }, { "epoch": 0.71, "grad_norm": 3.3421622811712903, "learning_rate": 3.976594199442768e-06, "loss": 0.8655, "step": 4435 }, { "epoch": 0.71, "grad_norm": 2.587668585698644, "learning_rate": 3.972428642935921e-06, "loss": 0.8813, "step": 4436 }, { "epoch": 0.71, "grad_norm": 3.558482263621362, "learning_rate": 3.968264728505662e-06, "loss": 0.8134, "step": 4437 }, { "epoch": 0.72, "grad_norm": 1.3600728227949264, "learning_rate": 3.964102457286353e-06, "loss": 0.3397, "step": 4438 }, { "epoch": 0.72, "grad_norm": 2.8703969512128, "learning_rate": 3.959941830411918e-06, "loss": 0.8023, "step": 4439 }, { "epoch": 0.72, "grad_norm": 4.004100366864548, "learning_rate": 3.955782849015825e-06, "loss": 0.9447, "step": 4440 }, { "epoch": 0.72, "grad_norm": 1.969346152977788, "learning_rate": 3.95162551423109e-06, "loss": 0.9764, "step": 4441 }, { "epoch": 0.72, "grad_norm": 2.657980770141985, "learning_rate": 3.9474698271902925e-06, "loss": 0.8991, "step": 4442 }, { "epoch": 0.72, "grad_norm": 2.4378452545574865, "learning_rate": 3.94331578902555e-06, "loss": 0.8737, "step": 4443 }, { "epoch": 0.72, "grad_norm": 2.8190721653378383, "learning_rate": 3.939163400868543e-06, "loss": 0.855, "step": 4444 }, { "epoch": 0.72, "grad_norm": 2.0743952961589947, "learning_rate": 3.935012663850493e-06, "loss": 0.8783, "step": 4445 }, { "epoch": 0.72, "grad_norm": 2.4200407896466265, "learning_rate": 3.930863579102173e-06, "loss": 0.8124, "step": 4446 }, { "epoch": 0.72, "grad_norm": 2.5782336587838364, "learning_rate": 3.9267161477539155e-06, "loss": 0.8611, "step": 4447 }, { "epoch": 0.72, "grad_norm": 3.9427641541709084, "learning_rate": 3.922570370935588e-06, "loss": 0.9129, "step": 4448 }, { "epoch": 0.72, "grad_norm": 2.7026142674231193, "learning_rate": 3.918426249776614e-06, "loss": 0.8055, "step": 4449 }, { "epoch": 0.72, "grad_norm": 2.8198389728374837, "learning_rate": 3.91428378540597e-06, "loss": 0.8596, "step": 4450 }, { "epoch": 0.72, "grad_norm": 2.8121371564991082, "learning_rate": 3.910142978952183e-06, "loss": 0.9784, "step": 4451 }, { "epoch": 0.72, "grad_norm": 1.903215776355003, "learning_rate": 3.906003831543309e-06, "loss": 0.8934, "step": 4452 }, { "epoch": 0.72, "grad_norm": 2.135772997891618, "learning_rate": 3.901866344306975e-06, "loss": 0.2911, "step": 4453 }, { "epoch": 0.72, "grad_norm": 2.9710072820029163, "learning_rate": 3.8977305183703464e-06, "loss": 0.902, "step": 4454 }, { "epoch": 0.72, "grad_norm": 2.948447423452266, "learning_rate": 3.893596354860135e-06, "loss": 0.7562, "step": 4455 }, { "epoch": 0.72, "grad_norm": 2.125885834902319, "learning_rate": 3.889463854902598e-06, "loss": 0.8948, "step": 4456 }, { "epoch": 0.72, "grad_norm": 3.3936574006488214, "learning_rate": 3.885333019623544e-06, "loss": 0.9172, "step": 4457 }, { "epoch": 0.72, "grad_norm": 3.107321209478826, "learning_rate": 3.88120385014833e-06, "loss": 0.892, "step": 4458 }, { "epoch": 0.72, "grad_norm": 3.0026143127075886, "learning_rate": 3.8770763476018546e-06, "loss": 0.9818, "step": 4459 }, { "epoch": 0.72, "grad_norm": 2.3271836803615464, "learning_rate": 3.872950513108558e-06, "loss": 0.8475, "step": 4460 }, { "epoch": 0.72, "grad_norm": 2.8331256304137873, "learning_rate": 3.868826347792437e-06, "loss": 0.836, "step": 4461 }, { "epoch": 0.72, "grad_norm": 2.7631316071409535, "learning_rate": 3.864703852777026e-06, "loss": 0.878, "step": 4462 }, { "epoch": 0.72, "grad_norm": 2.8498965060533066, "learning_rate": 3.860583029185403e-06, "loss": 0.8497, "step": 4463 }, { "epoch": 0.72, "grad_norm": 2.6347158553143126, "learning_rate": 3.8564638781402e-06, "loss": 0.9096, "step": 4464 }, { "epoch": 0.72, "grad_norm": 2.771055160094592, "learning_rate": 3.85234640076358e-06, "loss": 0.8758, "step": 4465 }, { "epoch": 0.72, "grad_norm": 2.727963762445368, "learning_rate": 3.848230598177266e-06, "loss": 0.9198, "step": 4466 }, { "epoch": 0.72, "grad_norm": 2.5409207630556283, "learning_rate": 3.844116471502511e-06, "loss": 0.9244, "step": 4467 }, { "epoch": 0.72, "grad_norm": 1.1931688621775394, "learning_rate": 3.840004021860113e-06, "loss": 0.9332, "step": 4468 }, { "epoch": 0.72, "grad_norm": 3.0325368135864132, "learning_rate": 3.835893250370426e-06, "loss": 0.944, "step": 4469 }, { "epoch": 0.72, "grad_norm": 4.0846608138538745, "learning_rate": 3.831784158153331e-06, "loss": 0.8895, "step": 4470 }, { "epoch": 0.72, "grad_norm": 4.514294491012623, "learning_rate": 3.827676746328256e-06, "loss": 0.8898, "step": 4471 }, { "epoch": 0.72, "grad_norm": 3.35670230027055, "learning_rate": 3.823571016014176e-06, "loss": 0.8431, "step": 4472 }, { "epoch": 0.72, "grad_norm": 2.486855665938506, "learning_rate": 3.819466968329613e-06, "loss": 0.8546, "step": 4473 }, { "epoch": 0.72, "grad_norm": 2.526752385050115, "learning_rate": 3.815364604392607e-06, "loss": 0.8857, "step": 4474 }, { "epoch": 0.72, "grad_norm": 3.6283610317263486, "learning_rate": 3.811263925320765e-06, "loss": 0.8158, "step": 4475 }, { "epoch": 0.72, "grad_norm": 2.3153076654569857, "learning_rate": 3.8071649322312256e-06, "loss": 0.8877, "step": 4476 }, { "epoch": 0.72, "grad_norm": 3.046211366028864, "learning_rate": 3.803067626240665e-06, "loss": 0.8338, "step": 4477 }, { "epoch": 0.72, "grad_norm": 3.3334820396313707, "learning_rate": 3.7989720084653003e-06, "loss": 0.8785, "step": 4478 }, { "epoch": 0.72, "grad_norm": 1.9296677314270811, "learning_rate": 3.7948780800208916e-06, "loss": 0.8546, "step": 4479 }, { "epoch": 0.72, "grad_norm": 2.905992229884472, "learning_rate": 3.790785842022746e-06, "loss": 0.8099, "step": 4480 }, { "epoch": 0.72, "grad_norm": 2.8257510511890285, "learning_rate": 3.7866952955856895e-06, "loss": 0.8936, "step": 4481 }, { "epoch": 0.72, "grad_norm": 2.009633941340073, "learning_rate": 3.7826064418241037e-06, "loss": 0.9562, "step": 4482 }, { "epoch": 0.72, "grad_norm": 2.2767185511152737, "learning_rate": 3.7785192818519113e-06, "loss": 0.9009, "step": 4483 }, { "epoch": 0.72, "grad_norm": 3.790421138102573, "learning_rate": 3.774433816782561e-06, "loss": 0.9208, "step": 4484 }, { "epoch": 0.72, "grad_norm": 4.595801070726349, "learning_rate": 3.7703500477290456e-06, "loss": 0.8498, "step": 4485 }, { "epoch": 0.72, "grad_norm": 2.6930143869825267, "learning_rate": 3.7662679758039023e-06, "loss": 1.0068, "step": 4486 }, { "epoch": 0.72, "grad_norm": 3.6820392585082913, "learning_rate": 3.762187602119192e-06, "loss": 0.9579, "step": 4487 }, { "epoch": 0.72, "grad_norm": 2.5931685381930665, "learning_rate": 3.758108927786528e-06, "loss": 0.951, "step": 4488 }, { "epoch": 0.72, "grad_norm": 3.562091335481449, "learning_rate": 3.7540319539170522e-06, "loss": 0.8782, "step": 4489 }, { "epoch": 0.72, "grad_norm": 2.753529364586367, "learning_rate": 3.7499566816214384e-06, "loss": 0.8723, "step": 4490 }, { "epoch": 0.72, "grad_norm": 2.188185666116889, "learning_rate": 3.7458831120099126e-06, "loss": 0.9512, "step": 4491 }, { "epoch": 0.72, "grad_norm": 1.6510811529241256, "learning_rate": 3.741811246192223e-06, "loss": 0.9188, "step": 4492 }, { "epoch": 0.72, "grad_norm": 3.129595619867922, "learning_rate": 3.737741085277653e-06, "loss": 0.8892, "step": 4493 }, { "epoch": 0.72, "grad_norm": 2.9983395113694935, "learning_rate": 3.7336726303750327e-06, "loss": 0.9283, "step": 4494 }, { "epoch": 0.72, "grad_norm": 1.0666665678222929, "learning_rate": 3.729605882592724e-06, "loss": 0.887, "step": 4495 }, { "epoch": 0.72, "grad_norm": 2.799684871242723, "learning_rate": 3.7255408430386164e-06, "loss": 0.9717, "step": 4496 }, { "epoch": 0.72, "grad_norm": 2.6367017505239043, "learning_rate": 3.7214775128201363e-06, "loss": 0.8763, "step": 4497 }, { "epoch": 0.72, "grad_norm": 2.24297582079797, "learning_rate": 3.717415893044254e-06, "loss": 0.306, "step": 4498 }, { "epoch": 0.72, "grad_norm": 3.1849925332453473, "learning_rate": 3.713355984817463e-06, "loss": 0.8196, "step": 4499 }, { "epoch": 0.73, "grad_norm": 2.8914402817231064, "learning_rate": 3.7092977892457905e-06, "loss": 0.8047, "step": 4500 }, { "epoch": 0.73, "grad_norm": 3.368608710275555, "learning_rate": 3.7052413074348038e-06, "loss": 0.8923, "step": 4501 }, { "epoch": 0.73, "grad_norm": 1.8601075340277973, "learning_rate": 3.7011865404896085e-06, "loss": 0.9203, "step": 4502 }, { "epoch": 0.73, "grad_norm": 2.0267425998257615, "learning_rate": 3.6971334895148202e-06, "loss": 0.3112, "step": 4503 }, { "epoch": 0.73, "grad_norm": 2.3367170481163875, "learning_rate": 3.6930821556146092e-06, "loss": 0.8766, "step": 4504 }, { "epoch": 0.73, "grad_norm": 3.3539770005585727, "learning_rate": 3.689032539892673e-06, "loss": 0.8623, "step": 4505 }, { "epoch": 0.73, "grad_norm": 1.9099843078487944, "learning_rate": 3.684984643452236e-06, "loss": 0.8849, "step": 4506 }, { "epoch": 0.73, "grad_norm": 1.6312468839301386, "learning_rate": 3.6809384673960545e-06, "loss": 0.3423, "step": 4507 }, { "epoch": 0.73, "grad_norm": 2.449525074762132, "learning_rate": 3.676894012826422e-06, "loss": 0.9027, "step": 4508 }, { "epoch": 0.73, "grad_norm": 2.452059180163167, "learning_rate": 3.6728512808451554e-06, "loss": 0.9313, "step": 4509 }, { "epoch": 0.73, "grad_norm": 3.7040722114411864, "learning_rate": 3.668810272553612e-06, "loss": 0.9047, "step": 4510 }, { "epoch": 0.73, "grad_norm": 1.8785867082263483, "learning_rate": 3.6647709890526708e-06, "loss": 0.852, "step": 4511 }, { "epoch": 0.73, "grad_norm": 3.0672553375095717, "learning_rate": 3.66073343144274e-06, "loss": 0.9039, "step": 4512 }, { "epoch": 0.73, "grad_norm": 3.02817954062523, "learning_rate": 3.65669760082377e-06, "loss": 0.8991, "step": 4513 }, { "epoch": 0.73, "grad_norm": 3.252380379684282, "learning_rate": 3.6526634982952225e-06, "loss": 0.8166, "step": 4514 }, { "epoch": 0.73, "grad_norm": 3.2713891952105802, "learning_rate": 3.648631124956108e-06, "loss": 0.8614, "step": 4515 }, { "epoch": 0.73, "grad_norm": 2.689662063671596, "learning_rate": 3.6446004819049473e-06, "loss": 0.9956, "step": 4516 }, { "epoch": 0.73, "grad_norm": 2.6835427536207157, "learning_rate": 3.640571570239807e-06, "loss": 0.8341, "step": 4517 }, { "epoch": 0.73, "grad_norm": 3.144390524801418, "learning_rate": 3.63654439105827e-06, "loss": 0.8457, "step": 4518 }, { "epoch": 0.73, "grad_norm": 2.197374129265393, "learning_rate": 3.632518945457446e-06, "loss": 0.9073, "step": 4519 }, { "epoch": 0.73, "grad_norm": 2.8930411473712745, "learning_rate": 3.6284952345339864e-06, "loss": 0.8622, "step": 4520 }, { "epoch": 0.73, "grad_norm": 2.3865610123625083, "learning_rate": 3.624473259384056e-06, "loss": 0.8167, "step": 4521 }, { "epoch": 0.73, "grad_norm": 3.174301797586462, "learning_rate": 3.6204530211033482e-06, "loss": 0.8781, "step": 4522 }, { "epoch": 0.73, "grad_norm": 2.120977184261067, "learning_rate": 3.616434520787091e-06, "loss": 0.788, "step": 4523 }, { "epoch": 0.73, "grad_norm": 2.838850558678078, "learning_rate": 3.6124177595300415e-06, "loss": 0.919, "step": 4524 }, { "epoch": 0.73, "grad_norm": 3.273926473072727, "learning_rate": 3.608402738426462e-06, "loss": 0.788, "step": 4525 }, { "epoch": 0.73, "grad_norm": 3.1733297385133956, "learning_rate": 3.6043894585701623e-06, "loss": 0.8758, "step": 4526 }, { "epoch": 0.73, "grad_norm": 4.240495488347363, "learning_rate": 3.6003779210544733e-06, "loss": 0.8677, "step": 4527 }, { "epoch": 0.73, "grad_norm": 1.838426047806099, "learning_rate": 3.596368126972245e-06, "loss": 0.3213, "step": 4528 }, { "epoch": 0.73, "grad_norm": 3.7179592798556897, "learning_rate": 3.592360077415853e-06, "loss": 0.8443, "step": 4529 }, { "epoch": 0.73, "grad_norm": 3.522944041543753, "learning_rate": 3.588353773477208e-06, "loss": 0.8835, "step": 4530 }, { "epoch": 0.73, "grad_norm": 2.7018064248680913, "learning_rate": 3.5843492162477312e-06, "loss": 0.8818, "step": 4531 }, { "epoch": 0.73, "grad_norm": 2.1583795397708476, "learning_rate": 3.5803464068183734e-06, "loss": 0.9344, "step": 4532 }, { "epoch": 0.73, "grad_norm": 2.529584360170122, "learning_rate": 3.576345346279614e-06, "loss": 0.8983, "step": 4533 }, { "epoch": 0.73, "grad_norm": 1.6930456519869492, "learning_rate": 3.5723460357214547e-06, "loss": 0.8585, "step": 4534 }, { "epoch": 0.73, "grad_norm": 4.404651757806052, "learning_rate": 3.568348476233414e-06, "loss": 0.9255, "step": 4535 }, { "epoch": 0.73, "grad_norm": 2.257928865067407, "learning_rate": 3.564352668904535e-06, "loss": 0.3238, "step": 4536 }, { "epoch": 0.73, "grad_norm": 3.099008118472647, "learning_rate": 3.5603586148233917e-06, "loss": 0.8834, "step": 4537 }, { "epoch": 0.73, "grad_norm": 3.8443572022223442, "learning_rate": 3.556366315078068e-06, "loss": 0.8695, "step": 4538 }, { "epoch": 0.73, "grad_norm": 2.6041077772475623, "learning_rate": 3.5523757707561836e-06, "loss": 0.8805, "step": 4539 }, { "epoch": 0.73, "grad_norm": 3.529162574834752, "learning_rate": 3.548386982944868e-06, "loss": 0.8285, "step": 4540 }, { "epoch": 0.73, "grad_norm": 2.6133522286348323, "learning_rate": 3.544399952730776e-06, "loss": 0.9346, "step": 4541 }, { "epoch": 0.73, "grad_norm": 1.8429739013703375, "learning_rate": 3.5404146812000893e-06, "loss": 0.9159, "step": 4542 }, { "epoch": 0.73, "grad_norm": 2.541733402147352, "learning_rate": 3.536431169438502e-06, "loss": 0.8157, "step": 4543 }, { "epoch": 0.73, "grad_norm": 4.0612812268109515, "learning_rate": 3.5324494185312317e-06, "loss": 0.8933, "step": 4544 }, { "epoch": 0.73, "grad_norm": 2.992413943016551, "learning_rate": 3.5284694295630183e-06, "loss": 0.8671, "step": 4545 }, { "epoch": 0.73, "grad_norm": 3.3864606607163754, "learning_rate": 3.5244912036181276e-06, "loss": 0.8314, "step": 4546 }, { "epoch": 0.73, "grad_norm": 4.5171514656796266, "learning_rate": 3.5205147417803253e-06, "loss": 0.8182, "step": 4547 }, { "epoch": 0.73, "grad_norm": 3.567737009346245, "learning_rate": 3.5165400451329147e-06, "loss": 0.7718, "step": 4548 }, { "epoch": 0.73, "grad_norm": 1.6159262783920174, "learning_rate": 3.512567114758717e-06, "loss": 0.8772, "step": 4549 }, { "epoch": 0.73, "grad_norm": 3.2652443111782814, "learning_rate": 3.5085959517400645e-06, "loss": 0.8952, "step": 4550 }, { "epoch": 0.73, "grad_norm": 3.1289182893709544, "learning_rate": 3.504626557158808e-06, "loss": 0.9528, "step": 4551 }, { "epoch": 0.73, "grad_norm": 3.664358944481152, "learning_rate": 3.500658932096327e-06, "loss": 0.9257, "step": 4552 }, { "epoch": 0.73, "grad_norm": 2.9089467652329053, "learning_rate": 3.4966930776335083e-06, "loss": 0.8122, "step": 4553 }, { "epoch": 0.73, "grad_norm": 2.938360859148212, "learning_rate": 3.492728994850756e-06, "loss": 0.9074, "step": 4554 }, { "epoch": 0.73, "grad_norm": 3.1786680344646934, "learning_rate": 3.488766684828e-06, "loss": 0.7571, "step": 4555 }, { "epoch": 0.73, "grad_norm": 2.382706836796585, "learning_rate": 3.4848061486446848e-06, "loss": 0.9374, "step": 4556 }, { "epoch": 0.73, "grad_norm": 2.653640183033167, "learning_rate": 3.4808473873797675e-06, "loss": 0.8695, "step": 4557 }, { "epoch": 0.73, "grad_norm": 2.344696565854822, "learning_rate": 3.476890402111721e-06, "loss": 0.8711, "step": 4558 }, { "epoch": 0.73, "grad_norm": 3.8302404669827337, "learning_rate": 3.472935193918542e-06, "loss": 0.858, "step": 4559 }, { "epoch": 0.73, "grad_norm": 3.858175361535726, "learning_rate": 3.4689817638777355e-06, "loss": 0.8653, "step": 4560 }, { "epoch": 0.73, "grad_norm": 3.083396154914988, "learning_rate": 3.4650301130663224e-06, "loss": 0.864, "step": 4561 }, { "epoch": 0.74, "grad_norm": 3.129840916012489, "learning_rate": 3.461080242560847e-06, "loss": 0.8328, "step": 4562 }, { "epoch": 0.74, "grad_norm": 4.197917903524489, "learning_rate": 3.4571321534373557e-06, "loss": 0.8634, "step": 4563 }, { "epoch": 0.74, "grad_norm": 2.5274230850243713, "learning_rate": 3.453185846771425e-06, "loss": 0.8501, "step": 4564 }, { "epoch": 0.74, "grad_norm": 2.8298730664042955, "learning_rate": 3.449241323638135e-06, "loss": 0.9086, "step": 4565 }, { "epoch": 0.74, "grad_norm": 2.092386057678572, "learning_rate": 3.445298585112077e-06, "loss": 0.3022, "step": 4566 }, { "epoch": 0.74, "grad_norm": 3.31301616749732, "learning_rate": 3.441357632267365e-06, "loss": 0.8564, "step": 4567 }, { "epoch": 0.74, "grad_norm": 2.6653022454442783, "learning_rate": 3.437418466177631e-06, "loss": 0.9096, "step": 4568 }, { "epoch": 0.74, "grad_norm": 1.8405279681239655, "learning_rate": 3.4334810879159987e-06, "loss": 0.297, "step": 4569 }, { "epoch": 0.74, "grad_norm": 3.403161845082194, "learning_rate": 3.429545498555126e-06, "loss": 0.8992, "step": 4570 }, { "epoch": 0.74, "grad_norm": 2.0405818496511556, "learning_rate": 3.4256116991671773e-06, "loss": 0.324, "step": 4571 }, { "epoch": 0.74, "grad_norm": 4.35928596214236, "learning_rate": 3.421679690823827e-06, "loss": 0.9667, "step": 4572 }, { "epoch": 0.74, "grad_norm": 3.09902812120518, "learning_rate": 3.417749474596257e-06, "loss": 0.8784, "step": 4573 }, { "epoch": 0.74, "grad_norm": 3.793299871698252, "learning_rate": 3.4138210515551717e-06, "loss": 0.9014, "step": 4574 }, { "epoch": 0.74, "grad_norm": 2.3107083571331226, "learning_rate": 3.4098944227707873e-06, "loss": 0.8912, "step": 4575 }, { "epoch": 0.74, "grad_norm": 2.5045679322067858, "learning_rate": 3.4059695893128133e-06, "loss": 0.8306, "step": 4576 }, { "epoch": 0.74, "grad_norm": 3.8112420680237853, "learning_rate": 3.4020465522504876e-06, "loss": 0.9248, "step": 4577 }, { "epoch": 0.74, "grad_norm": 3.303131386453884, "learning_rate": 3.3981253126525593e-06, "loss": 0.8861, "step": 4578 }, { "epoch": 0.74, "grad_norm": 2.8380999766853057, "learning_rate": 3.394205871587277e-06, "loss": 0.9528, "step": 4579 }, { "epoch": 0.74, "grad_norm": 3.4777665357099945, "learning_rate": 3.3902882301224016e-06, "loss": 0.8216, "step": 4580 }, { "epoch": 0.74, "grad_norm": 2.457030764823829, "learning_rate": 3.386372389325213e-06, "loss": 0.8579, "step": 4581 }, { "epoch": 0.74, "grad_norm": 3.004371478064996, "learning_rate": 3.382458350262493e-06, "loss": 0.8642, "step": 4582 }, { "epoch": 0.74, "grad_norm": 3.027398247997369, "learning_rate": 3.378546114000527e-06, "loss": 0.7847, "step": 4583 }, { "epoch": 0.74, "grad_norm": 3.132389030469876, "learning_rate": 3.374635681605125e-06, "loss": 0.824, "step": 4584 }, { "epoch": 0.74, "grad_norm": 3.2506521010923985, "learning_rate": 3.3707270541415895e-06, "loss": 0.8815, "step": 4585 }, { "epoch": 0.74, "grad_norm": 1.5599615457881546, "learning_rate": 3.3668202326747433e-06, "loss": 0.7835, "step": 4586 }, { "epoch": 0.74, "grad_norm": 3.592012864588956, "learning_rate": 3.3629152182689117e-06, "loss": 0.8697, "step": 4587 }, { "epoch": 0.74, "grad_norm": 3.5614977313474547, "learning_rate": 3.3590120119879233e-06, "loss": 0.8761, "step": 4588 }, { "epoch": 0.74, "grad_norm": 2.3664951633941533, "learning_rate": 3.3551106148951262e-06, "loss": 0.8961, "step": 4589 }, { "epoch": 0.74, "grad_norm": 1.7368919735765824, "learning_rate": 3.351211028053365e-06, "loss": 0.9521, "step": 4590 }, { "epoch": 0.74, "grad_norm": 1.5532466787035357, "learning_rate": 3.3473132525249918e-06, "loss": 0.9593, "step": 4591 }, { "epoch": 0.74, "grad_norm": 2.309028184423833, "learning_rate": 3.3434172893718707e-06, "loss": 0.8414, "step": 4592 }, { "epoch": 0.74, "grad_norm": 2.5697100550744785, "learning_rate": 3.339523139655373e-06, "loss": 0.9933, "step": 4593 }, { "epoch": 0.74, "grad_norm": 2.9211925994887955, "learning_rate": 3.3356308044363683e-06, "loss": 0.7936, "step": 4594 }, { "epoch": 0.74, "grad_norm": 3.5265527051553858, "learning_rate": 3.3317402847752344e-06, "loss": 0.816, "step": 4595 }, { "epoch": 0.74, "grad_norm": 3.099051201120751, "learning_rate": 3.327851581731859e-06, "loss": 0.9132, "step": 4596 }, { "epoch": 0.74, "grad_norm": 2.7072977482569747, "learning_rate": 3.323964696365638e-06, "loss": 0.8773, "step": 4597 }, { "epoch": 0.74, "grad_norm": 3.9479400043586197, "learning_rate": 3.320079629735452e-06, "loss": 0.9584, "step": 4598 }, { "epoch": 0.74, "grad_norm": 2.5422675459533206, "learning_rate": 3.316196382899709e-06, "loss": 0.8949, "step": 4599 }, { "epoch": 0.74, "grad_norm": 3.75039518181423, "learning_rate": 3.312314956916315e-06, "loss": 0.8528, "step": 4600 }, { "epoch": 0.74, "grad_norm": 1.7710368170419595, "learning_rate": 3.3084353528426727e-06, "loss": 0.9127, "step": 4601 }, { "epoch": 0.74, "grad_norm": 1.442224699224635, "learning_rate": 3.3045575717356926e-06, "loss": 0.923, "step": 4602 }, { "epoch": 0.74, "grad_norm": 1.7872553864746201, "learning_rate": 3.3006816146517927e-06, "loss": 0.8511, "step": 4603 }, { "epoch": 0.74, "grad_norm": 3.409762190415987, "learning_rate": 3.29680748264689e-06, "loss": 0.9113, "step": 4604 }, { "epoch": 0.74, "grad_norm": 3.068824776478453, "learning_rate": 3.2929351767763997e-06, "loss": 0.8366, "step": 4605 }, { "epoch": 0.74, "grad_norm": 2.6172805997317012, "learning_rate": 3.289064698095251e-06, "loss": 0.937, "step": 4606 }, { "epoch": 0.74, "grad_norm": 1.9898067235778039, "learning_rate": 3.2851960476578647e-06, "loss": 0.8851, "step": 4607 }, { "epoch": 0.74, "grad_norm": 1.4798015610943533, "learning_rate": 3.281329226518173e-06, "loss": 0.9016, "step": 4608 }, { "epoch": 0.74, "grad_norm": 3.213043784343348, "learning_rate": 3.2774642357296006e-06, "loss": 0.9689, "step": 4609 }, { "epoch": 0.74, "grad_norm": 3.2876008108949106, "learning_rate": 3.2736010763450744e-06, "loss": 0.9032, "step": 4610 }, { "epoch": 0.74, "grad_norm": 2.720433820881811, "learning_rate": 3.2697397494170336e-06, "loss": 0.9308, "step": 4611 }, { "epoch": 0.74, "grad_norm": 3.009597684518024, "learning_rate": 3.2658802559974046e-06, "loss": 0.8501, "step": 4612 }, { "epoch": 0.74, "grad_norm": 3.6516351512232057, "learning_rate": 3.2620225971376187e-06, "loss": 0.8674, "step": 4613 }, { "epoch": 0.74, "grad_norm": 2.4989592293126446, "learning_rate": 3.25816677388861e-06, "loss": 0.9039, "step": 4614 }, { "epoch": 0.74, "grad_norm": 3.1044540954921636, "learning_rate": 3.2543127873008164e-06, "loss": 0.9352, "step": 4615 }, { "epoch": 0.74, "grad_norm": 4.377087558246203, "learning_rate": 3.2504606384241642e-06, "loss": 0.8986, "step": 4616 }, { "epoch": 0.74, "grad_norm": 2.385331520215634, "learning_rate": 3.246610328308083e-06, "loss": 0.9587, "step": 4617 }, { "epoch": 0.74, "grad_norm": 2.576281634140694, "learning_rate": 3.2427618580015107e-06, "loss": 0.8491, "step": 4618 }, { "epoch": 0.74, "grad_norm": 3.104910554442177, "learning_rate": 3.2389152285528726e-06, "loss": 0.86, "step": 4619 }, { "epoch": 0.74, "grad_norm": 2.501501490785992, "learning_rate": 3.235070441010092e-06, "loss": 0.7952, "step": 4620 }, { "epoch": 0.74, "grad_norm": 2.5103990757327206, "learning_rate": 3.2312274964206013e-06, "loss": 0.9052, "step": 4621 }, { "epoch": 0.74, "grad_norm": 3.313879625453687, "learning_rate": 3.2273863958313257e-06, "loss": 0.8851, "step": 4622 }, { "epoch": 0.74, "grad_norm": 3.2854890598085364, "learning_rate": 3.2235471402886833e-06, "loss": 0.8409, "step": 4623 }, { "epoch": 0.75, "grad_norm": 4.4138127349137415, "learning_rate": 3.2197097308385916e-06, "loss": 0.8146, "step": 4624 }, { "epoch": 0.75, "grad_norm": 2.2917337812363936, "learning_rate": 3.2158741685264715e-06, "loss": 0.9128, "step": 4625 }, { "epoch": 0.75, "grad_norm": 3.4535961261260093, "learning_rate": 3.212040454397234e-06, "loss": 0.8457, "step": 4626 }, { "epoch": 0.75, "grad_norm": 3.0566481130099974, "learning_rate": 3.208208589495284e-06, "loss": 0.8852, "step": 4627 }, { "epoch": 0.75, "grad_norm": 2.4771225353205026, "learning_rate": 3.204378574864535e-06, "loss": 0.3263, "step": 4628 }, { "epoch": 0.75, "grad_norm": 2.231414540953039, "learning_rate": 3.200550411548381e-06, "loss": 0.9126, "step": 4629 }, { "epoch": 0.75, "grad_norm": 4.051350479080488, "learning_rate": 3.1967241005897264e-06, "loss": 0.8363, "step": 4630 }, { "epoch": 0.75, "grad_norm": 2.8018508209607984, "learning_rate": 3.19289964303096e-06, "loss": 0.9265, "step": 4631 }, { "epoch": 0.75, "grad_norm": 3.8753264505021283, "learning_rate": 3.189077039913967e-06, "loss": 0.8709, "step": 4632 }, { "epoch": 0.75, "grad_norm": 1.5023572202049853, "learning_rate": 3.1852562922801346e-06, "loss": 0.8687, "step": 4633 }, { "epoch": 0.75, "grad_norm": 3.095046571248685, "learning_rate": 3.181437401170335e-06, "loss": 0.8968, "step": 4634 }, { "epoch": 0.75, "grad_norm": 1.7167157011837446, "learning_rate": 3.177620367624946e-06, "loss": 0.3278, "step": 4635 }, { "epoch": 0.75, "grad_norm": 3.4511266941811267, "learning_rate": 3.1738051926838243e-06, "loss": 0.8661, "step": 4636 }, { "epoch": 0.75, "grad_norm": 2.2328399507024415, "learning_rate": 3.169991877386338e-06, "loss": 0.8816, "step": 4637 }, { "epoch": 0.75, "grad_norm": 3.518023270521898, "learning_rate": 3.1661804227713334e-06, "loss": 0.8679, "step": 4638 }, { "epoch": 0.75, "grad_norm": 2.779970967943925, "learning_rate": 3.162370829877154e-06, "loss": 0.9413, "step": 4639 }, { "epoch": 0.75, "grad_norm": 2.758134428691502, "learning_rate": 3.158563099741644e-06, "loss": 0.9376, "step": 4640 }, { "epoch": 0.75, "grad_norm": 2.1566172853660315, "learning_rate": 3.1547572334021315e-06, "loss": 0.9182, "step": 4641 }, { "epoch": 0.75, "grad_norm": 2.1403879744458347, "learning_rate": 3.150953231895435e-06, "loss": 0.8367, "step": 4642 }, { "epoch": 0.75, "grad_norm": 2.9188358596491395, "learning_rate": 3.1471510962578743e-06, "loss": 0.8464, "step": 4643 }, { "epoch": 0.75, "grad_norm": 1.6411697210011837, "learning_rate": 3.143350827525257e-06, "loss": 0.3371, "step": 4644 }, { "epoch": 0.75, "grad_norm": 3.9524676715028173, "learning_rate": 3.139552426732879e-06, "loss": 0.8853, "step": 4645 }, { "epoch": 0.75, "grad_norm": 3.3983561188172633, "learning_rate": 3.1357558949155266e-06, "loss": 0.8961, "step": 4646 }, { "epoch": 0.75, "grad_norm": 2.708185387507497, "learning_rate": 3.1319612331074856e-06, "loss": 0.8934, "step": 4647 }, { "epoch": 0.75, "grad_norm": 1.7409606171721501, "learning_rate": 3.128168442342523e-06, "loss": 0.3171, "step": 4648 }, { "epoch": 0.75, "grad_norm": 3.069144844711605, "learning_rate": 3.1243775236538963e-06, "loss": 0.8968, "step": 4649 }, { "epoch": 0.75, "grad_norm": 2.940850315522384, "learning_rate": 3.1205884780743633e-06, "loss": 0.8335, "step": 4650 }, { "epoch": 0.75, "grad_norm": 3.573798216380753, "learning_rate": 3.116801306636158e-06, "loss": 0.8642, "step": 4651 }, { "epoch": 0.75, "grad_norm": 2.624805352396625, "learning_rate": 3.113016010371016e-06, "loss": 0.9178, "step": 4652 }, { "epoch": 0.75, "grad_norm": 3.852448955299218, "learning_rate": 3.1092325903101518e-06, "loss": 0.9114, "step": 4653 }, { "epoch": 0.75, "grad_norm": 3.9014122875600337, "learning_rate": 3.105451047484277e-06, "loss": 0.8538, "step": 4654 }, { "epoch": 0.75, "grad_norm": 2.841476202107122, "learning_rate": 3.1016713829235866e-06, "loss": 0.8555, "step": 4655 }, { "epoch": 0.75, "grad_norm": 1.4746854446697524, "learning_rate": 3.0978935976577617e-06, "loss": 0.317, "step": 4656 }, { "epoch": 0.75, "grad_norm": 2.6634702499615837, "learning_rate": 3.0941176927159812e-06, "loss": 0.959, "step": 4657 }, { "epoch": 0.75, "grad_norm": 2.005195069372392, "learning_rate": 3.0903436691269006e-06, "loss": 0.8307, "step": 4658 }, { "epoch": 0.75, "grad_norm": 3.693315349378996, "learning_rate": 3.0865715279186724e-06, "loss": 0.8376, "step": 4659 }, { "epoch": 0.75, "grad_norm": 2.3437564086826446, "learning_rate": 3.0828012701189316e-06, "loss": 0.8862, "step": 4660 }, { "epoch": 0.75, "grad_norm": 2.955438908288412, "learning_rate": 3.079032896754793e-06, "loss": 0.9164, "step": 4661 }, { "epoch": 0.75, "grad_norm": 3.231865392997832, "learning_rate": 3.075266408852876e-06, "loss": 0.8459, "step": 4662 }, { "epoch": 0.75, "grad_norm": 2.230191990238503, "learning_rate": 3.07150180743927e-06, "loss": 0.826, "step": 4663 }, { "epoch": 0.75, "grad_norm": 3.086678794963345, "learning_rate": 3.0677390935395533e-06, "loss": 0.8908, "step": 4664 }, { "epoch": 0.75, "grad_norm": 2.732992291302492, "learning_rate": 3.063978268178798e-06, "loss": 0.9252, "step": 4665 }, { "epoch": 0.75, "grad_norm": 2.1334540551519585, "learning_rate": 3.0602193323815563e-06, "loss": 0.829, "step": 4666 }, { "epoch": 0.75, "grad_norm": 3.1311860464421595, "learning_rate": 3.056462287171865e-06, "loss": 0.8534, "step": 4667 }, { "epoch": 0.75, "grad_norm": 2.9483160999657723, "learning_rate": 3.052707133573244e-06, "loss": 0.9134, "step": 4668 }, { "epoch": 0.75, "grad_norm": 3.1844162329043555, "learning_rate": 3.0489538726087053e-06, "loss": 0.8814, "step": 4669 }, { "epoch": 0.75, "grad_norm": 2.962040758551824, "learning_rate": 3.0452025053007396e-06, "loss": 0.8875, "step": 4670 }, { "epoch": 0.75, "grad_norm": 2.5513646158268064, "learning_rate": 3.0414530326713176e-06, "loss": 0.8778, "step": 4671 }, { "epoch": 0.75, "grad_norm": 3.3030369742365626, "learning_rate": 3.0377054557419028e-06, "loss": 0.9073, "step": 4672 }, { "epoch": 0.75, "grad_norm": 2.087962449982135, "learning_rate": 3.0339597755334414e-06, "loss": 0.3249, "step": 4673 }, { "epoch": 0.75, "grad_norm": 4.177456554504248, "learning_rate": 3.0302159930663575e-06, "loss": 0.9173, "step": 4674 }, { "epoch": 0.75, "grad_norm": 2.318243060773495, "learning_rate": 3.026474109360559e-06, "loss": 0.8777, "step": 4675 }, { "epoch": 0.75, "grad_norm": 2.0817534241669358, "learning_rate": 3.0227341254354405e-06, "loss": 0.9195, "step": 4676 }, { "epoch": 0.75, "grad_norm": 2.8390685732770757, "learning_rate": 3.0189960423098765e-06, "loss": 0.8922, "step": 4677 }, { "epoch": 0.75, "grad_norm": 4.143521199303456, "learning_rate": 3.015259861002219e-06, "loss": 0.8811, "step": 4678 }, { "epoch": 0.75, "grad_norm": 3.3564953194012648, "learning_rate": 3.0115255825303148e-06, "loss": 0.8845, "step": 4679 }, { "epoch": 0.75, "grad_norm": 2.5944596204602, "learning_rate": 3.0077932079114768e-06, "loss": 0.8963, "step": 4680 }, { "epoch": 0.75, "grad_norm": 4.992158844914826, "learning_rate": 3.004062738162514e-06, "loss": 0.8231, "step": 4681 }, { "epoch": 0.75, "grad_norm": 2.8847074405946724, "learning_rate": 3.000334174299705e-06, "loss": 0.8943, "step": 4682 }, { "epoch": 0.75, "grad_norm": 2.7904091370277686, "learning_rate": 2.996607517338811e-06, "loss": 0.806, "step": 4683 }, { "epoch": 0.75, "grad_norm": 1.4011824535930861, "learning_rate": 2.9928827682950825e-06, "loss": 0.8714, "step": 4684 }, { "epoch": 0.75, "grad_norm": 4.745183008039327, "learning_rate": 2.9891599281832395e-06, "loss": 0.9376, "step": 4685 }, { "epoch": 0.76, "grad_norm": 2.9543675629207646, "learning_rate": 2.9854389980174837e-06, "loss": 0.8656, "step": 4686 }, { "epoch": 0.76, "grad_norm": 3.6313883297124634, "learning_rate": 2.9817199788115025e-06, "loss": 0.8813, "step": 4687 }, { "epoch": 0.76, "grad_norm": 3.7286004146248923, "learning_rate": 2.978002871578466e-06, "loss": 0.8799, "step": 4688 }, { "epoch": 0.76, "grad_norm": 4.040190961498118, "learning_rate": 2.9742876773310037e-06, "loss": 0.9047, "step": 4689 }, { "epoch": 0.76, "grad_norm": 3.5852966400809136, "learning_rate": 2.9705743970812416e-06, "loss": 0.8633, "step": 4690 }, { "epoch": 0.76, "grad_norm": 3.1034051528580093, "learning_rate": 2.9668630318407854e-06, "loss": 0.9019, "step": 4691 }, { "epoch": 0.76, "grad_norm": 2.1820169668962803, "learning_rate": 2.963153582620708e-06, "loss": 0.8719, "step": 4692 }, { "epoch": 0.76, "grad_norm": 2.3634082602427666, "learning_rate": 2.9594460504315637e-06, "loss": 0.8751, "step": 4693 }, { "epoch": 0.76, "grad_norm": 4.902990339580482, "learning_rate": 2.9557404362833898e-06, "loss": 0.8984, "step": 4694 }, { "epoch": 0.76, "grad_norm": 2.9152623383398573, "learning_rate": 2.9520367411856997e-06, "loss": 0.8104, "step": 4695 }, { "epoch": 0.76, "grad_norm": 4.250440686883519, "learning_rate": 2.9483349661474792e-06, "loss": 0.8524, "step": 4696 }, { "epoch": 0.76, "grad_norm": 2.7068978148762626, "learning_rate": 2.944635112177192e-06, "loss": 0.8958, "step": 4697 }, { "epoch": 0.76, "grad_norm": 2.477553112334522, "learning_rate": 2.9409371802827856e-06, "loss": 0.8809, "step": 4698 }, { "epoch": 0.76, "grad_norm": 3.5688724743993574, "learning_rate": 2.937241171471674e-06, "loss": 0.8701, "step": 4699 }, { "epoch": 0.76, "grad_norm": 3.0739877849130166, "learning_rate": 2.933547086750752e-06, "loss": 0.7975, "step": 4700 }, { "epoch": 0.76, "grad_norm": 1.5216399714345166, "learning_rate": 2.9298549271263944e-06, "loss": 0.9354, "step": 4701 }, { "epoch": 0.76, "grad_norm": 3.324910218597031, "learning_rate": 2.9261646936044408e-06, "loss": 0.9006, "step": 4702 }, { "epoch": 0.76, "grad_norm": 3.4242330987322003, "learning_rate": 2.9224763871902186e-06, "loss": 0.8178, "step": 4703 }, { "epoch": 0.76, "grad_norm": 3.5035624766037525, "learning_rate": 2.9187900088885224e-06, "loss": 0.85, "step": 4704 }, { "epoch": 0.76, "grad_norm": 2.991432513014364, "learning_rate": 2.9151055597036195e-06, "loss": 0.8597, "step": 4705 }, { "epoch": 0.76, "grad_norm": 3.193879867272309, "learning_rate": 2.9114230406392608e-06, "loss": 0.9153, "step": 4706 }, { "epoch": 0.76, "grad_norm": 3.943076771193144, "learning_rate": 2.907742452698664e-06, "loss": 0.9223, "step": 4707 }, { "epoch": 0.76, "grad_norm": 2.1385635139500163, "learning_rate": 2.90406379688452e-06, "loss": 0.3322, "step": 4708 }, { "epoch": 0.76, "grad_norm": 3.4397528462004847, "learning_rate": 2.900387074198997e-06, "loss": 0.8591, "step": 4709 }, { "epoch": 0.76, "grad_norm": 3.3296478082948524, "learning_rate": 2.8967122856437435e-06, "loss": 0.8612, "step": 4710 }, { "epoch": 0.76, "grad_norm": 2.734393920832975, "learning_rate": 2.893039432219861e-06, "loss": 0.879, "step": 4711 }, { "epoch": 0.76, "grad_norm": 3.8958572686151984, "learning_rate": 2.8893685149279417e-06, "loss": 0.9237, "step": 4712 }, { "epoch": 0.76, "grad_norm": 3.053221992506025, "learning_rate": 2.885699534768047e-06, "loss": 0.887, "step": 4713 }, { "epoch": 0.76, "grad_norm": 1.9517589827524198, "learning_rate": 2.882032492739706e-06, "loss": 0.8566, "step": 4714 }, { "epoch": 0.76, "grad_norm": 2.2797501741700597, "learning_rate": 2.8783673898419194e-06, "loss": 0.854, "step": 4715 }, { "epoch": 0.76, "grad_norm": 3.2969328527210804, "learning_rate": 2.874704227073164e-06, "loss": 0.8899, "step": 4716 }, { "epoch": 0.76, "grad_norm": 2.820124231537492, "learning_rate": 2.871043005431394e-06, "loss": 0.866, "step": 4717 }, { "epoch": 0.76, "grad_norm": 5.2968665378919315, "learning_rate": 2.8673837259140138e-06, "loss": 0.8781, "step": 4718 }, { "epoch": 0.76, "grad_norm": 3.467117887866557, "learning_rate": 2.863726389517918e-06, "loss": 0.9203, "step": 4719 }, { "epoch": 0.76, "grad_norm": 3.9662226041033417, "learning_rate": 2.860070997239469e-06, "loss": 0.8306, "step": 4720 }, { "epoch": 0.76, "grad_norm": 2.097228846704436, "learning_rate": 2.856417550074495e-06, "loss": 0.2972, "step": 4721 }, { "epoch": 0.76, "grad_norm": 1.766251781918976, "learning_rate": 2.852766049018291e-06, "loss": 0.8754, "step": 4722 }, { "epoch": 0.76, "grad_norm": 2.7776835001735223, "learning_rate": 2.8491164950656313e-06, "loss": 0.9588, "step": 4723 }, { "epoch": 0.76, "grad_norm": 2.683296109369796, "learning_rate": 2.8454688892107518e-06, "loss": 0.8965, "step": 4724 }, { "epoch": 0.76, "grad_norm": 3.018663999860729, "learning_rate": 2.841823232447366e-06, "loss": 0.8782, "step": 4725 }, { "epoch": 0.76, "grad_norm": 2.3419781917726143, "learning_rate": 2.8381795257686485e-06, "loss": 0.9052, "step": 4726 }, { "epoch": 0.76, "grad_norm": 1.8417766161357645, "learning_rate": 2.8345377701672404e-06, "loss": 0.8794, "step": 4727 }, { "epoch": 0.76, "grad_norm": 3.848384389383065, "learning_rate": 2.8308979666352644e-06, "loss": 0.8381, "step": 4728 }, { "epoch": 0.76, "grad_norm": 2.752613386405198, "learning_rate": 2.8272601161643e-06, "loss": 0.9196, "step": 4729 }, { "epoch": 0.76, "grad_norm": 1.752062671855553, "learning_rate": 2.8236242197453943e-06, "loss": 0.3198, "step": 4730 }, { "epoch": 0.76, "grad_norm": 2.321215829428261, "learning_rate": 2.819990278369069e-06, "loss": 0.8514, "step": 4731 }, { "epoch": 0.76, "grad_norm": 2.8385747411825, "learning_rate": 2.816358293025314e-06, "loss": 0.8334, "step": 4732 }, { "epoch": 0.76, "grad_norm": 2.741817525483433, "learning_rate": 2.812728264703577e-06, "loss": 0.9037, "step": 4733 }, { "epoch": 0.76, "grad_norm": 2.015523035590174, "learning_rate": 2.8091001943927764e-06, "loss": 0.936, "step": 4734 }, { "epoch": 0.76, "grad_norm": 3.286476260144913, "learning_rate": 2.8054740830813056e-06, "loss": 0.8402, "step": 4735 }, { "epoch": 0.76, "grad_norm": 2.853289522773503, "learning_rate": 2.8018499317570115e-06, "loss": 0.8431, "step": 4736 }, { "epoch": 0.76, "grad_norm": 2.168486552218976, "learning_rate": 2.79822774140721e-06, "loss": 0.8718, "step": 4737 }, { "epoch": 0.76, "grad_norm": 2.959669500921936, "learning_rate": 2.794607513018691e-06, "loss": 0.9333, "step": 4738 }, { "epoch": 0.76, "grad_norm": 2.460620678080822, "learning_rate": 2.79098924757771e-06, "loss": 0.8653, "step": 4739 }, { "epoch": 0.76, "grad_norm": 2.716568170021781, "learning_rate": 2.7873729460699684e-06, "loss": 0.842, "step": 4740 }, { "epoch": 0.76, "grad_norm": 3.1545162160096547, "learning_rate": 2.7837586094806535e-06, "loss": 0.9332, "step": 4741 }, { "epoch": 0.76, "grad_norm": 3.374839778911763, "learning_rate": 2.7801462387944145e-06, "loss": 0.9234, "step": 4742 }, { "epoch": 0.76, "grad_norm": 3.029107191786632, "learning_rate": 2.7765358349953554e-06, "loss": 0.8586, "step": 4743 }, { "epoch": 0.76, "grad_norm": 2.9280315958309058, "learning_rate": 2.772927399067048e-06, "loss": 0.8665, "step": 4744 }, { "epoch": 0.76, "grad_norm": 3.105935038941653, "learning_rate": 2.769320931992535e-06, "loss": 0.8189, "step": 4745 }, { "epoch": 0.76, "grad_norm": 2.6953617699584167, "learning_rate": 2.765716434754315e-06, "loss": 0.8771, "step": 4746 }, { "epoch": 0.76, "grad_norm": 3.41214067851983, "learning_rate": 2.762113908334351e-06, "loss": 0.8598, "step": 4747 }, { "epoch": 0.77, "grad_norm": 2.1084746523250097, "learning_rate": 2.758513353714073e-06, "loss": 0.852, "step": 4748 }, { "epoch": 0.77, "grad_norm": 2.9343467182913088, "learning_rate": 2.7549147718743684e-06, "loss": 0.8526, "step": 4749 }, { "epoch": 0.77, "grad_norm": 3.881587182477779, "learning_rate": 2.7513181637955945e-06, "loss": 0.8553, "step": 4750 }, { "epoch": 0.77, "grad_norm": 2.090696699113266, "learning_rate": 2.74772353045756e-06, "loss": 0.3107, "step": 4751 }, { "epoch": 0.77, "grad_norm": 3.8875861498740925, "learning_rate": 2.74413087283955e-06, "loss": 0.8766, "step": 4752 }, { "epoch": 0.77, "grad_norm": 2.8547502649649377, "learning_rate": 2.7405401919202967e-06, "loss": 0.8293, "step": 4753 }, { "epoch": 0.77, "grad_norm": 3.4067628806823804, "learning_rate": 2.736951488678006e-06, "loss": 0.897, "step": 4754 }, { "epoch": 0.77, "grad_norm": 2.6518630658235636, "learning_rate": 2.733364764090336e-06, "loss": 0.9203, "step": 4755 }, { "epoch": 0.77, "grad_norm": 2.137135913023803, "learning_rate": 2.729780019134408e-06, "loss": 0.8802, "step": 4756 }, { "epoch": 0.77, "grad_norm": 2.7730780570059332, "learning_rate": 2.726197254786811e-06, "loss": 0.8754, "step": 4757 }, { "epoch": 0.77, "grad_norm": 3.293559355167777, "learning_rate": 2.722616472023585e-06, "loss": 0.914, "step": 4758 }, { "epoch": 0.77, "grad_norm": 1.9882337643212162, "learning_rate": 2.7190376718202304e-06, "loss": 0.9046, "step": 4759 }, { "epoch": 0.77, "grad_norm": 2.320054884458692, "learning_rate": 2.715460855151716e-06, "loss": 0.8384, "step": 4760 }, { "epoch": 0.77, "grad_norm": 3.3901251499176976, "learning_rate": 2.711886022992469e-06, "loss": 0.8755, "step": 4761 }, { "epoch": 0.77, "grad_norm": 2.723709843949219, "learning_rate": 2.708313176316363e-06, "loss": 0.8915, "step": 4762 }, { "epoch": 0.77, "grad_norm": 1.7010738375211267, "learning_rate": 2.7047423160967433e-06, "loss": 0.2934, "step": 4763 }, { "epoch": 0.77, "grad_norm": 2.140513730463777, "learning_rate": 2.701173443306414e-06, "loss": 0.822, "step": 4764 }, { "epoch": 0.77, "grad_norm": 1.754357090971739, "learning_rate": 2.6976065589176337e-06, "loss": 0.91, "step": 4765 }, { "epoch": 0.77, "grad_norm": 3.1822824597680763, "learning_rate": 2.694041663902114e-06, "loss": 0.9238, "step": 4766 }, { "epoch": 0.77, "grad_norm": 4.118633096103884, "learning_rate": 2.690478759231039e-06, "loss": 0.8592, "step": 4767 }, { "epoch": 0.77, "grad_norm": 2.245202990829947, "learning_rate": 2.686917845875038e-06, "loss": 0.8873, "step": 4768 }, { "epoch": 0.77, "grad_norm": 2.33917175730917, "learning_rate": 2.683358924804198e-06, "loss": 0.9223, "step": 4769 }, { "epoch": 0.77, "grad_norm": 2.7081183030278755, "learning_rate": 2.679801996988075e-06, "loss": 0.9617, "step": 4770 }, { "epoch": 0.77, "grad_norm": 1.1784710201411404, "learning_rate": 2.676247063395668e-06, "loss": 0.8448, "step": 4771 }, { "epoch": 0.77, "grad_norm": 2.4394773630201, "learning_rate": 2.6726941249954443e-06, "loss": 0.8609, "step": 4772 }, { "epoch": 0.77, "grad_norm": 3.5388537886426077, "learning_rate": 2.669143182755315e-06, "loss": 0.8625, "step": 4773 }, { "epoch": 0.77, "grad_norm": 2.904125790921783, "learning_rate": 2.6655942376426635e-06, "loss": 0.799, "step": 4774 }, { "epoch": 0.77, "grad_norm": 2.1037464918355537, "learning_rate": 2.6620472906243123e-06, "loss": 0.8829, "step": 4775 }, { "epoch": 0.77, "grad_norm": 2.997360021585499, "learning_rate": 2.6585023426665534e-06, "loss": 0.909, "step": 4776 }, { "epoch": 0.77, "grad_norm": 2.382000093641047, "learning_rate": 2.6549593947351258e-06, "loss": 0.8552, "step": 4777 }, { "epoch": 0.77, "grad_norm": 3.0961382346527446, "learning_rate": 2.6514184477952244e-06, "loss": 0.8515, "step": 4778 }, { "epoch": 0.77, "grad_norm": 1.1480120954408022, "learning_rate": 2.6478795028115046e-06, "loss": 0.922, "step": 4779 }, { "epoch": 0.77, "grad_norm": 3.2170373008019313, "learning_rate": 2.644342560748071e-06, "loss": 0.8557, "step": 4780 }, { "epoch": 0.77, "grad_norm": 3.0688942310134215, "learning_rate": 2.6408076225684808e-06, "loss": 0.8687, "step": 4781 }, { "epoch": 0.77, "grad_norm": 2.9592907032546885, "learning_rate": 2.6372746892357514e-06, "loss": 0.8284, "step": 4782 }, { "epoch": 0.77, "grad_norm": 2.992000564034557, "learning_rate": 2.6337437617123586e-06, "loss": 0.8726, "step": 4783 }, { "epoch": 0.77, "grad_norm": 3.693049247869893, "learning_rate": 2.630214840960209e-06, "loss": 0.8437, "step": 4784 }, { "epoch": 0.77, "grad_norm": 3.0691105088927473, "learning_rate": 2.626687927940688e-06, "loss": 0.8466, "step": 4785 }, { "epoch": 0.77, "grad_norm": 3.2455554795895494, "learning_rate": 2.623163023614623e-06, "loss": 0.8767, "step": 4786 }, { "epoch": 0.77, "grad_norm": 2.3958137400834922, "learning_rate": 2.6196401289422955e-06, "loss": 0.9027, "step": 4787 }, { "epoch": 0.77, "grad_norm": 3.262643721765492, "learning_rate": 2.6161192448834348e-06, "loss": 0.8664, "step": 4788 }, { "epoch": 0.77, "grad_norm": 2.63772703883988, "learning_rate": 2.6126003723972325e-06, "loss": 0.9049, "step": 4789 }, { "epoch": 0.77, "grad_norm": 3.1660742121834766, "learning_rate": 2.609083512442323e-06, "loss": 0.9168, "step": 4790 }, { "epoch": 0.77, "grad_norm": 2.1676009682410484, "learning_rate": 2.6055686659767944e-06, "loss": 0.8595, "step": 4791 }, { "epoch": 0.77, "grad_norm": 4.316399842684724, "learning_rate": 2.6020558339581893e-06, "loss": 0.8693, "step": 4792 }, { "epoch": 0.77, "grad_norm": 2.377411371097374, "learning_rate": 2.5985450173435035e-06, "loss": 0.947, "step": 4793 }, { "epoch": 0.77, "grad_norm": 2.962833652941873, "learning_rate": 2.5950362170891774e-06, "loss": 0.8766, "step": 4794 }, { "epoch": 0.77, "grad_norm": 3.085349806825955, "learning_rate": 2.591529434151101e-06, "loss": 0.9455, "step": 4795 }, { "epoch": 0.77, "grad_norm": 2.566836804369794, "learning_rate": 2.5880246694846266e-06, "loss": 0.8301, "step": 4796 }, { "epoch": 0.77, "grad_norm": 2.9440363135275103, "learning_rate": 2.584521924044544e-06, "loss": 0.9058, "step": 4797 }, { "epoch": 0.77, "grad_norm": 3.111164440727313, "learning_rate": 2.5810211987850953e-06, "loss": 0.8363, "step": 4798 }, { "epoch": 0.77, "grad_norm": 3.4172574594711946, "learning_rate": 2.577522494659981e-06, "loss": 0.944, "step": 4799 }, { "epoch": 0.77, "grad_norm": 2.45099532870809, "learning_rate": 2.574025812622337e-06, "loss": 0.8637, "step": 4800 }, { "epoch": 0.77, "grad_norm": 2.803991204752712, "learning_rate": 2.5705311536247622e-06, "loss": 0.8468, "step": 4801 }, { "epoch": 0.77, "grad_norm": 2.8795635658291827, "learning_rate": 2.567038518619297e-06, "loss": 0.8976, "step": 4802 }, { "epoch": 0.77, "grad_norm": 3.2469770604560684, "learning_rate": 2.5635479085574267e-06, "loss": 0.9048, "step": 4803 }, { "epoch": 0.77, "grad_norm": 2.0885464358255503, "learning_rate": 2.5600593243900927e-06, "loss": 0.9071, "step": 4804 }, { "epoch": 0.77, "grad_norm": 4.0948568696845955, "learning_rate": 2.5565727670676887e-06, "loss": 0.8845, "step": 4805 }, { "epoch": 0.77, "grad_norm": 1.6300626533262212, "learning_rate": 2.5530882375400358e-06, "loss": 0.8575, "step": 4806 }, { "epoch": 0.77, "grad_norm": 2.995152053157436, "learning_rate": 2.549605736756423e-06, "loss": 0.8287, "step": 4807 }, { "epoch": 0.77, "grad_norm": 2.397409622668756, "learning_rate": 2.5461252656655813e-06, "loss": 0.88, "step": 4808 }, { "epoch": 0.77, "grad_norm": 2.9528118956556457, "learning_rate": 2.5426468252156846e-06, "loss": 0.8956, "step": 4809 }, { "epoch": 0.77, "grad_norm": 4.163914776269813, "learning_rate": 2.5391704163543516e-06, "loss": 0.8642, "step": 4810 }, { "epoch": 0.78, "grad_norm": 2.7507008613122714, "learning_rate": 2.535696040028658e-06, "loss": 0.7897, "step": 4811 }, { "epoch": 0.78, "grad_norm": 3.4735121471433694, "learning_rate": 2.532223697185122e-06, "loss": 0.9129, "step": 4812 }, { "epoch": 0.78, "grad_norm": 3.856192570139682, "learning_rate": 2.528753388769697e-06, "loss": 0.8913, "step": 4813 }, { "epoch": 0.78, "grad_norm": 3.005468470764544, "learning_rate": 2.5252851157277945e-06, "loss": 0.9367, "step": 4814 }, { "epoch": 0.78, "grad_norm": 3.314110130676401, "learning_rate": 2.521818879004271e-06, "loss": 0.8452, "step": 4815 }, { "epoch": 0.78, "grad_norm": 2.628677154126661, "learning_rate": 2.518354679543422e-06, "loss": 0.8561, "step": 4816 }, { "epoch": 0.78, "grad_norm": 1.7507253233638427, "learning_rate": 2.514892518288988e-06, "loss": 0.3106, "step": 4817 }, { "epoch": 0.78, "grad_norm": 2.8690083306378757, "learning_rate": 2.511432396184165e-06, "loss": 0.9163, "step": 4818 }, { "epoch": 0.78, "grad_norm": 3.244175019101995, "learning_rate": 2.5079743141715786e-06, "loss": 0.9181, "step": 4819 }, { "epoch": 0.78, "grad_norm": 3.5025796920129264, "learning_rate": 2.504518273193306e-06, "loss": 0.8679, "step": 4820 }, { "epoch": 0.78, "grad_norm": 3.1390636605487994, "learning_rate": 2.501064274190872e-06, "loss": 0.8012, "step": 4821 }, { "epoch": 0.78, "grad_norm": 4.325527913062701, "learning_rate": 2.497612318105237e-06, "loss": 0.8866, "step": 4822 }, { "epoch": 0.78, "grad_norm": 2.666117571883223, "learning_rate": 2.4941624058768143e-06, "loss": 0.8993, "step": 4823 }, { "epoch": 0.78, "grad_norm": 2.124065081308718, "learning_rate": 2.4907145384454514e-06, "loss": 0.8988, "step": 4824 }, { "epoch": 0.78, "grad_norm": 3.420431590370475, "learning_rate": 2.4872687167504393e-06, "loss": 0.8448, "step": 4825 }, { "epoch": 0.78, "grad_norm": 3.723493868088594, "learning_rate": 2.4838249417305214e-06, "loss": 0.8676, "step": 4826 }, { "epoch": 0.78, "grad_norm": 2.9478560981773727, "learning_rate": 2.480383214323875e-06, "loss": 0.9237, "step": 4827 }, { "epoch": 0.78, "grad_norm": 2.018439523542624, "learning_rate": 2.476943535468117e-06, "loss": 0.3317, "step": 4828 }, { "epoch": 0.78, "grad_norm": 3.700316833127583, "learning_rate": 2.4735059061003143e-06, "loss": 0.8332, "step": 4829 }, { "epoch": 0.78, "grad_norm": 2.599580301742714, "learning_rate": 2.470070327156975e-06, "loss": 0.9069, "step": 4830 }, { "epoch": 0.78, "grad_norm": 1.6949007439598374, "learning_rate": 2.4666367995740414e-06, "loss": 0.8963, "step": 4831 }, { "epoch": 0.78, "grad_norm": 3.404960878346306, "learning_rate": 2.4632053242869005e-06, "loss": 0.8358, "step": 4832 }, { "epoch": 0.78, "grad_norm": 4.1893110771373125, "learning_rate": 2.4597759022303813e-06, "loss": 0.8759, "step": 4833 }, { "epoch": 0.78, "grad_norm": 3.9538668814649305, "learning_rate": 2.4563485343387596e-06, "loss": 0.9005, "step": 4834 }, { "epoch": 0.78, "grad_norm": 3.760708713902574, "learning_rate": 2.4529232215457333e-06, "loss": 0.8532, "step": 4835 }, { "epoch": 0.78, "grad_norm": 2.8801432505055398, "learning_rate": 2.4494999647844574e-06, "loss": 0.8979, "step": 4836 }, { "epoch": 0.78, "grad_norm": 3.8558320992190733, "learning_rate": 2.4460787649875263e-06, "loss": 0.8567, "step": 4837 }, { "epoch": 0.78, "grad_norm": 2.379903750196418, "learning_rate": 2.4426596230869624e-06, "loss": 0.8736, "step": 4838 }, { "epoch": 0.78, "grad_norm": 4.133788255042149, "learning_rate": 2.439242540014236e-06, "loss": 0.9072, "step": 4839 }, { "epoch": 0.78, "grad_norm": 3.1694853100680724, "learning_rate": 2.4358275167002564e-06, "loss": 0.875, "step": 4840 }, { "epoch": 0.78, "grad_norm": 3.179714943207355, "learning_rate": 2.4324145540753697e-06, "loss": 0.7854, "step": 4841 }, { "epoch": 0.78, "grad_norm": 2.2569544656645175, "learning_rate": 2.429003653069357e-06, "loss": 0.9288, "step": 4842 }, { "epoch": 0.78, "grad_norm": 2.922234497801989, "learning_rate": 2.4255948146114483e-06, "loss": 0.8777, "step": 4843 }, { "epoch": 0.78, "grad_norm": 3.6954895364238767, "learning_rate": 2.4221880396302977e-06, "loss": 0.8528, "step": 4844 }, { "epoch": 0.78, "grad_norm": 3.482402842111828, "learning_rate": 2.418783329054013e-06, "loss": 0.8199, "step": 4845 }, { "epoch": 0.78, "grad_norm": 1.9877087081633216, "learning_rate": 2.4153806838101257e-06, "loss": 0.9789, "step": 4846 }, { "epoch": 0.78, "grad_norm": 3.1906271162035402, "learning_rate": 2.4119801048256096e-06, "loss": 0.861, "step": 4847 }, { "epoch": 0.78, "grad_norm": 2.657205207960408, "learning_rate": 2.4085815930268807e-06, "loss": 0.9033, "step": 4848 }, { "epoch": 0.78, "grad_norm": 3.7791801651472094, "learning_rate": 2.4051851493397835e-06, "loss": 0.8969, "step": 4849 }, { "epoch": 0.78, "grad_norm": 3.614010167186223, "learning_rate": 2.401790774689602e-06, "loss": 0.7626, "step": 4850 }, { "epoch": 0.78, "grad_norm": 2.432259035946693, "learning_rate": 2.3983984700010587e-06, "loss": 0.9362, "step": 4851 }, { "epoch": 0.78, "grad_norm": 3.3093371856517435, "learning_rate": 2.395008236198315e-06, "loss": 0.8713, "step": 4852 }, { "epoch": 0.78, "grad_norm": 2.0945615192119975, "learning_rate": 2.39162007420496e-06, "loss": 0.8643, "step": 4853 }, { "epoch": 0.78, "grad_norm": 3.8254691740027282, "learning_rate": 2.3882339849440206e-06, "loss": 0.8374, "step": 4854 }, { "epoch": 0.78, "grad_norm": 3.5941443185504336, "learning_rate": 2.384849969337967e-06, "loss": 0.8834, "step": 4855 }, { "epoch": 0.78, "grad_norm": 2.6122961083387164, "learning_rate": 2.381468028308693e-06, "loss": 0.8561, "step": 4856 }, { "epoch": 0.78, "grad_norm": 3.6436216930484457, "learning_rate": 2.378088162777532e-06, "loss": 0.9141, "step": 4857 }, { "epoch": 0.78, "grad_norm": 0.7273961278820211, "learning_rate": 2.3747103736652543e-06, "loss": 0.3428, "step": 4858 }, { "epoch": 0.78, "grad_norm": 1.7374553647242796, "learning_rate": 2.371334661892066e-06, "loss": 0.9244, "step": 4859 }, { "epoch": 0.78, "grad_norm": 2.494453476273749, "learning_rate": 2.3679610283776023e-06, "loss": 0.8415, "step": 4860 }, { "epoch": 0.78, "grad_norm": 3.490594897571564, "learning_rate": 2.3645894740409294e-06, "loss": 0.7466, "step": 4861 }, { "epoch": 0.78, "grad_norm": 4.023273472081204, "learning_rate": 2.361219999800558e-06, "loss": 0.8177, "step": 4862 }, { "epoch": 0.78, "grad_norm": 4.41952333495126, "learning_rate": 2.3578526065744223e-06, "loss": 0.9007, "step": 4863 }, { "epoch": 0.78, "grad_norm": 3.5162243141513594, "learning_rate": 2.3544872952798913e-06, "loss": 0.8566, "step": 4864 }, { "epoch": 0.78, "grad_norm": 2.1916845352353396, "learning_rate": 2.351124066833773e-06, "loss": 0.9024, "step": 4865 }, { "epoch": 0.78, "grad_norm": 3.048865191608839, "learning_rate": 2.3477629221522992e-06, "loss": 0.8644, "step": 4866 }, { "epoch": 0.78, "grad_norm": 2.4659477457789145, "learning_rate": 2.3444038621511435e-06, "loss": 0.925, "step": 4867 }, { "epoch": 0.78, "grad_norm": 3.8010383040724385, "learning_rate": 2.341046887745403e-06, "loss": 0.8737, "step": 4868 }, { "epoch": 0.78, "grad_norm": 3.5929539337814895, "learning_rate": 2.3376919998496083e-06, "loss": 0.9446, "step": 4869 }, { "epoch": 0.78, "grad_norm": 3.3444532563976006, "learning_rate": 2.3343391993777274e-06, "loss": 0.825, "step": 4870 }, { "epoch": 0.78, "grad_norm": 3.3825971081109825, "learning_rate": 2.330988487243152e-06, "loss": 0.8228, "step": 4871 }, { "epoch": 0.78, "grad_norm": 2.212376749517455, "learning_rate": 2.3276398643587127e-06, "loss": 0.8611, "step": 4872 }, { "epoch": 0.79, "grad_norm": 1.9773624897057167, "learning_rate": 2.3242933316366613e-06, "loss": 0.3056, "step": 4873 }, { "epoch": 0.79, "grad_norm": 2.776799273337333, "learning_rate": 2.320948889988691e-06, "loss": 0.8553, "step": 4874 }, { "epoch": 0.79, "grad_norm": 3.7107678826367723, "learning_rate": 2.3176065403259184e-06, "loss": 0.7685, "step": 4875 }, { "epoch": 0.79, "grad_norm": 2.6335127717058433, "learning_rate": 2.314266283558887e-06, "loss": 0.9166, "step": 4876 }, { "epoch": 0.79, "grad_norm": 3.0095376355995835, "learning_rate": 2.310928120597581e-06, "loss": 0.8635, "step": 4877 }, { "epoch": 0.79, "grad_norm": 1.9908852303810274, "learning_rate": 2.307592052351406e-06, "loss": 0.9165, "step": 4878 }, { "epoch": 0.79, "grad_norm": 2.400958994161351, "learning_rate": 2.3042580797291956e-06, "loss": 0.8431, "step": 4879 }, { "epoch": 0.79, "grad_norm": 2.6921645205021254, "learning_rate": 2.300926203639218e-06, "loss": 0.8534, "step": 4880 }, { "epoch": 0.79, "grad_norm": 2.4521130460906786, "learning_rate": 2.2975964249891723e-06, "loss": 0.7787, "step": 4881 }, { "epoch": 0.79, "grad_norm": 2.7762382871886553, "learning_rate": 2.294268744686178e-06, "loss": 0.9054, "step": 4882 }, { "epoch": 0.79, "grad_norm": 2.7248379300419168, "learning_rate": 2.2909431636367853e-06, "loss": 0.9132, "step": 4883 }, { "epoch": 0.79, "grad_norm": 2.1561487146444898, "learning_rate": 2.2876196827469776e-06, "loss": 0.7631, "step": 4884 }, { "epoch": 0.79, "grad_norm": 4.376493362594307, "learning_rate": 2.284298302922162e-06, "loss": 0.9097, "step": 4885 }, { "epoch": 0.79, "grad_norm": 3.453653838799835, "learning_rate": 2.2809790250671717e-06, "loss": 0.8525, "step": 4886 }, { "epoch": 0.79, "grad_norm": 1.8097689211325678, "learning_rate": 2.2776618500862725e-06, "loss": 0.9094, "step": 4887 }, { "epoch": 0.79, "grad_norm": 3.439641112303043, "learning_rate": 2.2743467788831496e-06, "loss": 0.8245, "step": 4888 }, { "epoch": 0.79, "grad_norm": 2.9384220380659247, "learning_rate": 2.2710338123609264e-06, "loss": 0.9212, "step": 4889 }, { "epoch": 0.79, "grad_norm": 3.5281139390500056, "learning_rate": 2.267722951422141e-06, "loss": 0.8769, "step": 4890 }, { "epoch": 0.79, "grad_norm": 3.888173261525971, "learning_rate": 2.2644141969687674e-06, "loss": 0.8198, "step": 4891 }, { "epoch": 0.79, "grad_norm": 2.921260014240564, "learning_rate": 2.2611075499021985e-06, "loss": 0.9263, "step": 4892 }, { "epoch": 0.79, "grad_norm": 2.23000789230387, "learning_rate": 2.257803011123254e-06, "loss": 0.8184, "step": 4893 }, { "epoch": 0.79, "grad_norm": 3.078574675424575, "learning_rate": 2.2545005815321875e-06, "loss": 0.8852, "step": 4894 }, { "epoch": 0.79, "grad_norm": 1.5391924653148423, "learning_rate": 2.2512002620286653e-06, "loss": 0.8672, "step": 4895 }, { "epoch": 0.79, "grad_norm": 2.4146247875570768, "learning_rate": 2.2479020535117924e-06, "loss": 0.3097, "step": 4896 }, { "epoch": 0.79, "grad_norm": 2.3507307742560966, "learning_rate": 2.2446059568800872e-06, "loss": 0.8819, "step": 4897 }, { "epoch": 0.79, "grad_norm": 3.9478669310215753, "learning_rate": 2.241311973031496e-06, "loss": 0.8246, "step": 4898 }, { "epoch": 0.79, "grad_norm": 2.5706705232462044, "learning_rate": 2.2380201028633954e-06, "loss": 0.8874, "step": 4899 }, { "epoch": 0.79, "grad_norm": 2.577680699760347, "learning_rate": 2.2347303472725778e-06, "loss": 0.8074, "step": 4900 }, { "epoch": 0.79, "grad_norm": 2.0505465127933946, "learning_rate": 2.2314427071552624e-06, "loss": 0.8616, "step": 4901 }, { "epoch": 0.79, "grad_norm": 2.542489142830772, "learning_rate": 2.228157183407096e-06, "loss": 0.8476, "step": 4902 }, { "epoch": 0.79, "grad_norm": 2.6173830784841017, "learning_rate": 2.2248737769231497e-06, "loss": 0.8724, "step": 4903 }, { "epoch": 0.79, "grad_norm": 3.5981753758622474, "learning_rate": 2.2215924885979035e-06, "loss": 0.8571, "step": 4904 }, { "epoch": 0.79, "grad_norm": 2.9932903916550453, "learning_rate": 2.218313319325277e-06, "loss": 0.8334, "step": 4905 }, { "epoch": 0.79, "grad_norm": 2.1650465507876815, "learning_rate": 2.2150362699986084e-06, "loss": 0.8959, "step": 4906 }, { "epoch": 0.79, "grad_norm": 1.900148852691292, "learning_rate": 2.2117613415106533e-06, "loss": 0.8144, "step": 4907 }, { "epoch": 0.79, "grad_norm": 2.713748145469619, "learning_rate": 2.20848853475359e-06, "loss": 0.8627, "step": 4908 }, { "epoch": 0.79, "grad_norm": 4.009073932669168, "learning_rate": 2.2052178506190267e-06, "loss": 0.8846, "step": 4909 }, { "epoch": 0.79, "grad_norm": 2.043199223488501, "learning_rate": 2.201949289997983e-06, "loss": 0.9078, "step": 4910 }, { "epoch": 0.79, "grad_norm": 2.74882274050915, "learning_rate": 2.1986828537809103e-06, "loss": 0.8732, "step": 4911 }, { "epoch": 0.79, "grad_norm": 1.9260828737589473, "learning_rate": 2.195418542857669e-06, "loss": 0.8754, "step": 4912 }, { "epoch": 0.79, "grad_norm": 2.303240719583477, "learning_rate": 2.1921563581175553e-06, "loss": 0.879, "step": 4913 }, { "epoch": 0.79, "grad_norm": 4.577689609069451, "learning_rate": 2.1888963004492735e-06, "loss": 0.8293, "step": 4914 }, { "epoch": 0.79, "grad_norm": 1.6480661127523264, "learning_rate": 2.1856383707409513e-06, "loss": 0.8685, "step": 4915 }, { "epoch": 0.79, "grad_norm": 3.206408199698039, "learning_rate": 2.1823825698801424e-06, "loss": 0.9025, "step": 4916 }, { "epoch": 0.79, "grad_norm": 1.9933809183919982, "learning_rate": 2.179128898753814e-06, "loss": 0.8544, "step": 4917 }, { "epoch": 0.79, "grad_norm": 2.763604670809444, "learning_rate": 2.1758773582483594e-06, "loss": 0.8636, "step": 4918 }, { "epoch": 0.79, "grad_norm": 2.6311726429638362, "learning_rate": 2.172627949249586e-06, "loss": 0.8495, "step": 4919 }, { "epoch": 0.79, "grad_norm": 2.7253218399515933, "learning_rate": 2.169380672642719e-06, "loss": 0.891, "step": 4920 }, { "epoch": 0.79, "grad_norm": 3.141528099370321, "learning_rate": 2.166135529312412e-06, "loss": 0.8506, "step": 4921 }, { "epoch": 0.79, "grad_norm": 2.633856863380416, "learning_rate": 2.1628925201427285e-06, "loss": 0.3062, "step": 4922 }, { "epoch": 0.79, "grad_norm": 1.9109728107324722, "learning_rate": 2.159651646017151e-06, "loss": 0.8973, "step": 4923 }, { "epoch": 0.79, "grad_norm": 2.24915986800559, "learning_rate": 2.156412907818586e-06, "loss": 0.8704, "step": 4924 }, { "epoch": 0.79, "grad_norm": 4.059588753679247, "learning_rate": 2.1531763064293587e-06, "loss": 0.8829, "step": 4925 }, { "epoch": 0.79, "grad_norm": 3.5388679366703357, "learning_rate": 2.1499418427312002e-06, "loss": 0.8692, "step": 4926 }, { "epoch": 0.79, "grad_norm": 3.9827708886039175, "learning_rate": 2.146709517605271e-06, "loss": 0.8953, "step": 4927 }, { "epoch": 0.79, "grad_norm": 2.7540568858402934, "learning_rate": 2.14347933193215e-06, "loss": 0.9501, "step": 4928 }, { "epoch": 0.79, "grad_norm": 4.036537665131032, "learning_rate": 2.1402512865918246e-06, "loss": 0.7908, "step": 4929 }, { "epoch": 0.79, "grad_norm": 2.8405743987861087, "learning_rate": 2.1370253824637023e-06, "loss": 0.8999, "step": 4930 }, { "epoch": 0.79, "grad_norm": 3.0173798036063384, "learning_rate": 2.1338016204266087e-06, "loss": 0.9159, "step": 4931 }, { "epoch": 0.79, "grad_norm": 3.5862975760462317, "learning_rate": 2.1305800013587906e-06, "loss": 0.856, "step": 4932 }, { "epoch": 0.79, "grad_norm": 2.426485460069202, "learning_rate": 2.1273605261379015e-06, "loss": 0.9309, "step": 4933 }, { "epoch": 0.79, "grad_norm": 2.1547775769786544, "learning_rate": 2.124143195641013e-06, "loss": 0.8399, "step": 4934 }, { "epoch": 0.8, "grad_norm": 1.218445617617432, "learning_rate": 2.1209280107446195e-06, "loss": 0.91, "step": 4935 }, { "epoch": 0.8, "grad_norm": 3.3353899968780882, "learning_rate": 2.117714972324624e-06, "loss": 0.7575, "step": 4936 }, { "epoch": 0.8, "grad_norm": 2.8401708191554995, "learning_rate": 2.1145040812563432e-06, "loss": 0.8648, "step": 4937 }, { "epoch": 0.8, "grad_norm": 2.5479125735316823, "learning_rate": 2.1112953384145195e-06, "loss": 0.7878, "step": 4938 }, { "epoch": 0.8, "grad_norm": 3.9200381654710967, "learning_rate": 2.1080887446732955e-06, "loss": 0.9316, "step": 4939 }, { "epoch": 0.8, "grad_norm": 3.5938392959779017, "learning_rate": 2.1048843009062424e-06, "loss": 0.8473, "step": 4940 }, { "epoch": 0.8, "grad_norm": 3.0621976119247334, "learning_rate": 2.1016820079863366e-06, "loss": 0.9113, "step": 4941 }, { "epoch": 0.8, "grad_norm": 2.2351589294865537, "learning_rate": 2.0984818667859665e-06, "loss": 0.8679, "step": 4942 }, { "epoch": 0.8, "grad_norm": 2.9419228554782753, "learning_rate": 2.0952838781769446e-06, "loss": 0.862, "step": 4943 }, { "epoch": 0.8, "grad_norm": 1.9934856299309842, "learning_rate": 2.09208804303049e-06, "loss": 0.8323, "step": 4944 }, { "epoch": 0.8, "grad_norm": 3.56348740128353, "learning_rate": 2.088894362217233e-06, "loss": 0.8121, "step": 4945 }, { "epoch": 0.8, "grad_norm": 1.7526142484725424, "learning_rate": 2.0857028366072217e-06, "loss": 0.8836, "step": 4946 }, { "epoch": 0.8, "grad_norm": 1.9038776581831884, "learning_rate": 2.0825134670699232e-06, "loss": 0.3102, "step": 4947 }, { "epoch": 0.8, "grad_norm": 3.7295810462132923, "learning_rate": 2.0793262544741965e-06, "loss": 0.8229, "step": 4948 }, { "epoch": 0.8, "grad_norm": 4.669009347271872, "learning_rate": 2.076141199688333e-06, "loss": 0.7655, "step": 4949 }, { "epoch": 0.8, "grad_norm": 2.3981313106164523, "learning_rate": 2.072958303580033e-06, "loss": 0.8769, "step": 4950 }, { "epoch": 0.8, "grad_norm": 2.888286407523881, "learning_rate": 2.0697775670164e-06, "loss": 0.8425, "step": 4951 }, { "epoch": 0.8, "grad_norm": 3.0308944650952334, "learning_rate": 2.0665989908639526e-06, "loss": 0.9839, "step": 4952 }, { "epoch": 0.8, "grad_norm": 3.3808129724483864, "learning_rate": 2.063422575988626e-06, "loss": 0.8891, "step": 4953 }, { "epoch": 0.8, "grad_norm": 3.440968099147475, "learning_rate": 2.060248323255769e-06, "loss": 0.8133, "step": 4954 }, { "epoch": 0.8, "grad_norm": 4.1880060146401545, "learning_rate": 2.057076233530123e-06, "loss": 0.9003, "step": 4955 }, { "epoch": 0.8, "grad_norm": 1.9101270427694181, "learning_rate": 2.053906307675859e-06, "loss": 0.8828, "step": 4956 }, { "epoch": 0.8, "grad_norm": 2.905138356891228, "learning_rate": 2.0507385465565557e-06, "loss": 0.8976, "step": 4957 }, { "epoch": 0.8, "grad_norm": 3.3307710972528453, "learning_rate": 2.0475729510351937e-06, "loss": 0.9179, "step": 4958 }, { "epoch": 0.8, "grad_norm": 1.883446424458995, "learning_rate": 2.044409521974168e-06, "loss": 0.8344, "step": 4959 }, { "epoch": 0.8, "grad_norm": 2.065355924547209, "learning_rate": 2.0412482602352877e-06, "loss": 0.3271, "step": 4960 }, { "epoch": 0.8, "grad_norm": 2.5185976649711335, "learning_rate": 2.038089166679763e-06, "loss": 0.9101, "step": 4961 }, { "epoch": 0.8, "grad_norm": 2.29527964299045, "learning_rate": 2.0349322421682238e-06, "loss": 0.9274, "step": 4962 }, { "epoch": 0.8, "grad_norm": 1.0514595650205332, "learning_rate": 2.0317774875606997e-06, "loss": 0.9279, "step": 4963 }, { "epoch": 0.8, "grad_norm": 2.5414099537599335, "learning_rate": 2.0286249037166316e-06, "loss": 0.9186, "step": 4964 }, { "epoch": 0.8, "grad_norm": 2.6096131907040236, "learning_rate": 2.025474491494874e-06, "loss": 0.8889, "step": 4965 }, { "epoch": 0.8, "grad_norm": 2.868164561255064, "learning_rate": 2.0223262517536833e-06, "loss": 0.9298, "step": 4966 }, { "epoch": 0.8, "grad_norm": 1.928663468561423, "learning_rate": 2.0191801853507264e-06, "loss": 0.874, "step": 4967 }, { "epoch": 0.8, "grad_norm": 4.084536368721928, "learning_rate": 2.0160362931430788e-06, "loss": 0.9051, "step": 4968 }, { "epoch": 0.8, "grad_norm": 2.789540311345331, "learning_rate": 2.0128945759872264e-06, "loss": 0.8262, "step": 4969 }, { "epoch": 0.8, "grad_norm": 2.8767277045937028, "learning_rate": 2.009755034739057e-06, "loss": 0.8836, "step": 4970 }, { "epoch": 0.8, "grad_norm": 2.9021646642048378, "learning_rate": 2.0066176702538675e-06, "loss": 0.9079, "step": 4971 }, { "epoch": 0.8, "grad_norm": 2.438264580168205, "learning_rate": 2.0034824833863652e-06, "loss": 0.804, "step": 4972 }, { "epoch": 0.8, "grad_norm": 1.4448485583504531, "learning_rate": 2.0003494749906595e-06, "loss": 0.8392, "step": 4973 }, { "epoch": 0.8, "grad_norm": 2.5793037263096665, "learning_rate": 1.9972186459202656e-06, "loss": 0.7917, "step": 4974 }, { "epoch": 0.8, "grad_norm": 2.605427754550045, "learning_rate": 1.9940899970281115e-06, "loss": 0.8847, "step": 4975 }, { "epoch": 0.8, "grad_norm": 1.025496820672741, "learning_rate": 1.9909635291665307e-06, "loss": 0.8831, "step": 4976 }, { "epoch": 0.8, "grad_norm": 2.895380510744996, "learning_rate": 1.9878392431872506e-06, "loss": 0.9389, "step": 4977 }, { "epoch": 0.8, "grad_norm": 3.0820815320379498, "learning_rate": 1.984717139941417e-06, "loss": 0.8155, "step": 4978 }, { "epoch": 0.8, "grad_norm": 2.629914723743452, "learning_rate": 1.9815972202795796e-06, "loss": 0.9624, "step": 4979 }, { "epoch": 0.8, "grad_norm": 2.878527260501996, "learning_rate": 1.9784794850516874e-06, "loss": 0.9713, "step": 4980 }, { "epoch": 0.8, "grad_norm": 2.88035085554267, "learning_rate": 1.9753639351070954e-06, "loss": 0.9333, "step": 4981 }, { "epoch": 0.8, "grad_norm": 2.2948731502473767, "learning_rate": 1.9722505712945715e-06, "loss": 0.8406, "step": 4982 }, { "epoch": 0.8, "grad_norm": 1.5758230208542452, "learning_rate": 1.969139394462277e-06, "loss": 0.3097, "step": 4983 }, { "epoch": 0.8, "grad_norm": 2.83961471200698, "learning_rate": 1.9660304054577815e-06, "loss": 0.8271, "step": 4984 }, { "epoch": 0.8, "grad_norm": 2.521310577194401, "learning_rate": 1.962923605128064e-06, "loss": 0.7978, "step": 4985 }, { "epoch": 0.8, "grad_norm": 2.334221965875165, "learning_rate": 1.9598189943194965e-06, "loss": 0.9111, "step": 4986 }, { "epoch": 0.8, "grad_norm": 3.515344227069383, "learning_rate": 1.956716573877867e-06, "loss": 0.8184, "step": 4987 }, { "epoch": 0.8, "grad_norm": 2.294211991270352, "learning_rate": 1.953616344648357e-06, "loss": 0.8702, "step": 4988 }, { "epoch": 0.8, "grad_norm": 3.762041975258702, "learning_rate": 1.9505183074755516e-06, "loss": 0.9283, "step": 4989 }, { "epoch": 0.8, "grad_norm": 2.906825183022419, "learning_rate": 1.9474224632034442e-06, "loss": 0.8605, "step": 4990 }, { "epoch": 0.8, "grad_norm": 2.465036907577449, "learning_rate": 1.944328812675432e-06, "loss": 0.8334, "step": 4991 }, { "epoch": 0.8, "grad_norm": 2.114762046540643, "learning_rate": 1.941237356734307e-06, "loss": 0.9068, "step": 4992 }, { "epoch": 0.8, "grad_norm": 2.325119741494269, "learning_rate": 1.938148096222264e-06, "loss": 0.8476, "step": 4993 }, { "epoch": 0.8, "grad_norm": 2.7275621390672558, "learning_rate": 1.935061031980909e-06, "loss": 0.865, "step": 4994 }, { "epoch": 0.8, "grad_norm": 3.6356474940663377, "learning_rate": 1.9319761648512404e-06, "loss": 0.8654, "step": 4995 }, { "epoch": 0.8, "grad_norm": 3.8595983772653164, "learning_rate": 1.9288934956736572e-06, "loss": 0.8874, "step": 4996 }, { "epoch": 0.81, "grad_norm": 2.3502041261365525, "learning_rate": 1.9258130252879683e-06, "loss": 0.9038, "step": 4997 }, { "epoch": 0.81, "grad_norm": 2.7689959139147993, "learning_rate": 1.922734754533382e-06, "loss": 0.9246, "step": 4998 }, { "epoch": 0.81, "grad_norm": 2.6432440596037234, "learning_rate": 1.919658684248494e-06, "loss": 0.8796, "step": 4999 }, { "epoch": 0.81, "grad_norm": 4.017560321803651, "learning_rate": 1.9165848152713174e-06, "loss": 0.906, "step": 5000 }, { "epoch": 0.81, "grad_norm": 4.048057825150441, "learning_rate": 1.91351314843926e-06, "loss": 0.9233, "step": 5001 }, { "epoch": 0.81, "grad_norm": 3.5772608084059825, "learning_rate": 1.910443684589127e-06, "loss": 0.8821, "step": 5002 }, { "epoch": 0.81, "grad_norm": 2.9701265055112103, "learning_rate": 1.9073764245571204e-06, "loss": 0.8617, "step": 5003 }, { "epoch": 0.81, "grad_norm": 3.0590059238961342, "learning_rate": 1.904311369178854e-06, "loss": 0.8254, "step": 5004 }, { "epoch": 0.81, "grad_norm": 2.418446039228377, "learning_rate": 1.9012485192893283e-06, "loss": 0.8868, "step": 5005 }, { "epoch": 0.81, "grad_norm": 2.011241552549727, "learning_rate": 1.8981878757229478e-06, "loss": 0.9168, "step": 5006 }, { "epoch": 0.81, "grad_norm": 2.253339302781153, "learning_rate": 1.89512943931352e-06, "loss": 0.9322, "step": 5007 }, { "epoch": 0.81, "grad_norm": 1.121002406210448, "learning_rate": 1.892073210894242e-06, "loss": 0.3091, "step": 5008 }, { "epoch": 0.81, "grad_norm": 2.312798145766622, "learning_rate": 1.8890191912977207e-06, "loss": 0.8681, "step": 5009 }, { "epoch": 0.81, "grad_norm": 2.0383159103751023, "learning_rate": 1.88596738135595e-06, "loss": 0.8786, "step": 5010 }, { "epoch": 0.81, "grad_norm": 3.5394273447769877, "learning_rate": 1.8829177819003308e-06, "loss": 0.8424, "step": 5011 }, { "epoch": 0.81, "grad_norm": 3.012023831165563, "learning_rate": 1.879870393761657e-06, "loss": 0.874, "step": 5012 }, { "epoch": 0.81, "grad_norm": 2.075709724845072, "learning_rate": 1.8768252177701184e-06, "loss": 0.9783, "step": 5013 }, { "epoch": 0.81, "grad_norm": 2.9670577195136083, "learning_rate": 1.8737822547553086e-06, "loss": 0.8012, "step": 5014 }, { "epoch": 0.81, "grad_norm": 2.7990322211653926, "learning_rate": 1.870741505546212e-06, "loss": 0.8772, "step": 5015 }, { "epoch": 0.81, "grad_norm": 2.7130397593604045, "learning_rate": 1.8677029709712147e-06, "loss": 0.8972, "step": 5016 }, { "epoch": 0.81, "grad_norm": 2.3892962184873046, "learning_rate": 1.8646666518580968e-06, "loss": 0.8416, "step": 5017 }, { "epoch": 0.81, "grad_norm": 2.0577351410039295, "learning_rate": 1.8616325490340326e-06, "loss": 0.8267, "step": 5018 }, { "epoch": 0.81, "grad_norm": 3.2087938713043735, "learning_rate": 1.858600663325597e-06, "loss": 0.8225, "step": 5019 }, { "epoch": 0.81, "grad_norm": 2.926924315417321, "learning_rate": 1.8555709955587654e-06, "loss": 0.9302, "step": 5020 }, { "epoch": 0.81, "grad_norm": 2.8890308830218827, "learning_rate": 1.8525435465588914e-06, "loss": 0.8624, "step": 5021 }, { "epoch": 0.81, "grad_norm": 2.9358957658189957, "learning_rate": 1.8495183171507415e-06, "loss": 0.9572, "step": 5022 }, { "epoch": 0.81, "grad_norm": 2.7877389882366, "learning_rate": 1.8464953081584735e-06, "loss": 0.9132, "step": 5023 }, { "epoch": 0.81, "grad_norm": 2.2411041300802585, "learning_rate": 1.843474520405637e-06, "loss": 0.8466, "step": 5024 }, { "epoch": 0.81, "grad_norm": 4.0035148913155485, "learning_rate": 1.8404559547151746e-06, "loss": 0.8654, "step": 5025 }, { "epoch": 0.81, "grad_norm": 2.776266197490546, "learning_rate": 1.8374396119094307e-06, "loss": 0.8417, "step": 5026 }, { "epoch": 0.81, "grad_norm": 3.0249401370343847, "learning_rate": 1.8344254928101401e-06, "loss": 0.9095, "step": 5027 }, { "epoch": 0.81, "grad_norm": 3.706203935153563, "learning_rate": 1.8314135982384274e-06, "loss": 0.9166, "step": 5028 }, { "epoch": 0.81, "grad_norm": 3.772924036825906, "learning_rate": 1.828403929014818e-06, "loss": 0.8646, "step": 5029 }, { "epoch": 0.81, "grad_norm": 3.1853402992876014, "learning_rate": 1.8253964859592343e-06, "loss": 0.9167, "step": 5030 }, { "epoch": 0.81, "grad_norm": 3.1501859821940847, "learning_rate": 1.8223912698909807e-06, "loss": 0.8212, "step": 5031 }, { "epoch": 0.81, "grad_norm": 2.279404193930748, "learning_rate": 1.81938828162876e-06, "loss": 0.8591, "step": 5032 }, { "epoch": 0.81, "grad_norm": 2.8574896261405702, "learning_rate": 1.8163875219906735e-06, "loss": 0.8695, "step": 5033 }, { "epoch": 0.81, "grad_norm": 4.255834053317855, "learning_rate": 1.8133889917942093e-06, "loss": 0.8925, "step": 5034 }, { "epoch": 0.81, "grad_norm": 1.4439121369975725, "learning_rate": 1.8103926918562465e-06, "loss": 0.823, "step": 5035 }, { "epoch": 0.81, "grad_norm": 2.9027565910219186, "learning_rate": 1.807398622993064e-06, "loss": 0.9196, "step": 5036 }, { "epoch": 0.81, "grad_norm": 1.9541387139810762, "learning_rate": 1.804406786020325e-06, "loss": 0.8901, "step": 5037 }, { "epoch": 0.81, "grad_norm": 2.87734425379272, "learning_rate": 1.8014171817530934e-06, "loss": 0.855, "step": 5038 }, { "epoch": 0.81, "grad_norm": 2.9007608402190685, "learning_rate": 1.7984298110058162e-06, "loss": 0.8674, "step": 5039 }, { "epoch": 0.81, "grad_norm": 2.6908409288979844, "learning_rate": 1.7954446745923325e-06, "loss": 0.7662, "step": 5040 }, { "epoch": 0.81, "grad_norm": 3.759454887727268, "learning_rate": 1.7924617733258831e-06, "loss": 0.9158, "step": 5041 }, { "epoch": 0.81, "grad_norm": 4.507450294157095, "learning_rate": 1.7894811080190888e-06, "loss": 0.899, "step": 5042 }, { "epoch": 0.81, "grad_norm": 4.403567749028972, "learning_rate": 1.7865026794839625e-06, "loss": 0.827, "step": 5043 }, { "epoch": 0.81, "grad_norm": 3.2262277498774927, "learning_rate": 1.7835264885319127e-06, "loss": 0.7622, "step": 5044 }, { "epoch": 0.81, "grad_norm": 2.0680213774529843, "learning_rate": 1.7805525359737375e-06, "loss": 0.8312, "step": 5045 }, { "epoch": 0.81, "grad_norm": 2.1486257019840758, "learning_rate": 1.7775808226196222e-06, "loss": 0.3268, "step": 5046 }, { "epoch": 0.81, "grad_norm": 3.780839503681702, "learning_rate": 1.7746113492791407e-06, "loss": 0.9165, "step": 5047 }, { "epoch": 0.81, "grad_norm": 3.3236430618855306, "learning_rate": 1.7716441167612618e-06, "loss": 1.0419, "step": 5048 }, { "epoch": 0.81, "grad_norm": 2.4658472886828853, "learning_rate": 1.7686791258743475e-06, "loss": 0.9013, "step": 5049 }, { "epoch": 0.81, "grad_norm": 3.7184693046227215, "learning_rate": 1.765716377426131e-06, "loss": 0.8431, "step": 5050 }, { "epoch": 0.81, "grad_norm": 3.5248304772703114, "learning_rate": 1.7627558722237525e-06, "loss": 0.8572, "step": 5051 }, { "epoch": 0.81, "grad_norm": 3.1321040467851153, "learning_rate": 1.759797611073738e-06, "loss": 0.8719, "step": 5052 }, { "epoch": 0.81, "grad_norm": 2.472034924683258, "learning_rate": 1.7568415947819973e-06, "loss": 0.8824, "step": 5053 }, { "epoch": 0.81, "grad_norm": 2.8940696193754554, "learning_rate": 1.753887824153827e-06, "loss": 0.8529, "step": 5054 }, { "epoch": 0.81, "grad_norm": 3.0921130232590786, "learning_rate": 1.7509362999939217e-06, "loss": 0.8663, "step": 5055 }, { "epoch": 0.81, "grad_norm": 5.009391928374684, "learning_rate": 1.7479870231063544e-06, "loss": 0.9088, "step": 5056 }, { "epoch": 0.81, "grad_norm": 2.796919326857188, "learning_rate": 1.7450399942945884e-06, "loss": 0.7725, "step": 5057 }, { "epoch": 0.81, "grad_norm": 2.6980803058343086, "learning_rate": 1.74209521436148e-06, "loss": 0.7858, "step": 5058 }, { "epoch": 0.82, "grad_norm": 4.01499750006611, "learning_rate": 1.739152684109262e-06, "loss": 0.8396, "step": 5059 }, { "epoch": 0.82, "grad_norm": 2.009893504116122, "learning_rate": 1.7362124043395678e-06, "loss": 0.9012, "step": 5060 }, { "epoch": 0.82, "grad_norm": 3.6738944486221565, "learning_rate": 1.7332743758534076e-06, "loss": 0.8867, "step": 5061 }, { "epoch": 0.82, "grad_norm": 2.7401263976353922, "learning_rate": 1.730338599451178e-06, "loss": 0.914, "step": 5062 }, { "epoch": 0.82, "grad_norm": 2.6773825650146788, "learning_rate": 1.727405075932671e-06, "loss": 0.9244, "step": 5063 }, { "epoch": 0.82, "grad_norm": 2.9701855852604013, "learning_rate": 1.7244738060970567e-06, "loss": 0.8277, "step": 5064 }, { "epoch": 0.82, "grad_norm": 3.041635556693341, "learning_rate": 1.7215447907428907e-06, "loss": 0.885, "step": 5065 }, { "epoch": 0.82, "grad_norm": 4.190989620267794, "learning_rate": 1.7186180306681221e-06, "loss": 0.9009, "step": 5066 }, { "epoch": 0.82, "grad_norm": 2.332751496750835, "learning_rate": 1.7156935266700814e-06, "loss": 0.8211, "step": 5067 }, { "epoch": 0.82, "grad_norm": 4.101339512167626, "learning_rate": 1.712771279545482e-06, "loss": 0.7999, "step": 5068 }, { "epoch": 0.82, "grad_norm": 2.229525231255619, "learning_rate": 1.7098512900904219e-06, "loss": 0.9047, "step": 5069 }, { "epoch": 0.82, "grad_norm": 3.471646448504733, "learning_rate": 1.706933559100391e-06, "loss": 0.9426, "step": 5070 }, { "epoch": 0.82, "grad_norm": 3.350615254170782, "learning_rate": 1.7040180873702629e-06, "loss": 0.8899, "step": 5071 }, { "epoch": 0.82, "grad_norm": 1.0952013468155197, "learning_rate": 1.7011048756942817e-06, "loss": 0.9257, "step": 5072 }, { "epoch": 0.82, "grad_norm": 2.152097379842329, "learning_rate": 1.6981939248660938e-06, "loss": 0.8638, "step": 5073 }, { "epoch": 0.82, "grad_norm": 4.000232928169841, "learning_rate": 1.6952852356787241e-06, "loss": 0.7859, "step": 5074 }, { "epoch": 0.82, "grad_norm": 3.788978182454243, "learning_rate": 1.6923788089245764e-06, "loss": 0.8858, "step": 5075 }, { "epoch": 0.82, "grad_norm": 2.743792637382932, "learning_rate": 1.6894746453954402e-06, "loss": 0.3079, "step": 5076 }, { "epoch": 0.82, "grad_norm": 2.799284680138657, "learning_rate": 1.6865727458824931e-06, "loss": 0.8738, "step": 5077 }, { "epoch": 0.82, "grad_norm": 3.4562885413340467, "learning_rate": 1.6836731111762915e-06, "loss": 0.8862, "step": 5078 }, { "epoch": 0.82, "grad_norm": 2.2976207949624428, "learning_rate": 1.6807757420667737e-06, "loss": 0.8805, "step": 5079 }, { "epoch": 0.82, "grad_norm": 1.3495905608481937, "learning_rate": 1.6778806393432656e-06, "loss": 0.8637, "step": 5080 }, { "epoch": 0.82, "grad_norm": 2.5140825364456583, "learning_rate": 1.67498780379447e-06, "loss": 0.8777, "step": 5081 }, { "epoch": 0.82, "grad_norm": 2.9816047633315987, "learning_rate": 1.6720972362084798e-06, "loss": 0.8257, "step": 5082 }, { "epoch": 0.82, "grad_norm": 2.9595657432176994, "learning_rate": 1.6692089373727616e-06, "loss": 0.9159, "step": 5083 }, { "epoch": 0.82, "grad_norm": 2.7566224034763813, "learning_rate": 1.6663229080741672e-06, "loss": 0.3138, "step": 5084 }, { "epoch": 0.82, "grad_norm": 1.1225197259745712, "learning_rate": 1.6634391490989333e-06, "loss": 0.903, "step": 5085 }, { "epoch": 0.82, "grad_norm": 3.0239933892737896, "learning_rate": 1.6605576612326745e-06, "loss": 0.9096, "step": 5086 }, { "epoch": 0.82, "grad_norm": 2.93414342126917, "learning_rate": 1.6576784452603833e-06, "loss": 0.9021, "step": 5087 }, { "epoch": 0.82, "grad_norm": 3.5217665539675584, "learning_rate": 1.654801501966442e-06, "loss": 0.8527, "step": 5088 }, { "epoch": 0.82, "grad_norm": 2.3615305571637673, "learning_rate": 1.6519268321346104e-06, "loss": 0.8852, "step": 5089 }, { "epoch": 0.82, "grad_norm": 2.146327088470703, "learning_rate": 1.6490544365480266e-06, "loss": 0.9054, "step": 5090 }, { "epoch": 0.82, "grad_norm": 2.2837595265758237, "learning_rate": 1.6461843159892055e-06, "loss": 0.884, "step": 5091 }, { "epoch": 0.82, "grad_norm": 4.890416150005975, "learning_rate": 1.643316471240054e-06, "loss": 0.84, "step": 5092 }, { "epoch": 0.82, "grad_norm": 3.7803003048564094, "learning_rate": 1.6404509030818493e-06, "loss": 0.8781, "step": 5093 }, { "epoch": 0.82, "grad_norm": 1.9213635764370278, "learning_rate": 1.6375876122952483e-06, "loss": 0.8205, "step": 5094 }, { "epoch": 0.82, "grad_norm": 2.6319442811651075, "learning_rate": 1.6347265996602935e-06, "loss": 0.8659, "step": 5095 }, { "epoch": 0.82, "grad_norm": 2.725152293158043, "learning_rate": 1.6318678659564046e-06, "loss": 0.8617, "step": 5096 }, { "epoch": 0.82, "grad_norm": 3.6786819023442696, "learning_rate": 1.629011411962379e-06, "loss": 0.8741, "step": 5097 }, { "epoch": 0.82, "grad_norm": 2.645210212762333, "learning_rate": 1.6261572384563895e-06, "loss": 0.8907, "step": 5098 }, { "epoch": 0.82, "grad_norm": 2.4814845130463277, "learning_rate": 1.6233053462159965e-06, "loss": 0.9184, "step": 5099 }, { "epoch": 0.82, "grad_norm": 3.9374396909908893, "learning_rate": 1.6204557360181328e-06, "loss": 0.8391, "step": 5100 }, { "epoch": 0.82, "grad_norm": 1.9506313426786424, "learning_rate": 1.6176084086391075e-06, "loss": 0.3312, "step": 5101 }, { "epoch": 0.82, "grad_norm": 2.290381666636267, "learning_rate": 1.6147633648546157e-06, "loss": 0.8588, "step": 5102 }, { "epoch": 0.82, "grad_norm": 3.611156733950123, "learning_rate": 1.6119206054397218e-06, "loss": 0.807, "step": 5103 }, { "epoch": 0.82, "grad_norm": 3.3387032329407367, "learning_rate": 1.6090801311688764e-06, "loss": 0.8026, "step": 5104 }, { "epoch": 0.82, "grad_norm": 4.825112399832827, "learning_rate": 1.6062419428158993e-06, "loss": 0.8352, "step": 5105 }, { "epoch": 0.82, "grad_norm": 1.59993322352299, "learning_rate": 1.603406041153991e-06, "loss": 0.897, "step": 5106 }, { "epoch": 0.82, "grad_norm": 1.990138118014683, "learning_rate": 1.6005724269557322e-06, "loss": 0.8837, "step": 5107 }, { "epoch": 0.82, "grad_norm": 2.289845928189801, "learning_rate": 1.5977411009930743e-06, "loss": 0.9281, "step": 5108 }, { "epoch": 0.82, "grad_norm": 4.384320820632032, "learning_rate": 1.5949120640373517e-06, "loss": 0.897, "step": 5109 }, { "epoch": 0.82, "grad_norm": 1.9935609757795223, "learning_rate": 1.5920853168592676e-06, "loss": 0.84, "step": 5110 }, { "epoch": 0.82, "grad_norm": 3.065319398352481, "learning_rate": 1.5892608602289129e-06, "loss": 0.8453, "step": 5111 }, { "epoch": 0.82, "grad_norm": 1.9169683080848416, "learning_rate": 1.5864386949157419e-06, "loss": 0.8892, "step": 5112 }, { "epoch": 0.82, "grad_norm": 1.72383371538526, "learning_rate": 1.5836188216885895e-06, "loss": 0.8725, "step": 5113 }, { "epoch": 0.82, "grad_norm": 2.964561961658369, "learning_rate": 1.5808012413156715e-06, "loss": 0.8207, "step": 5114 }, { "epoch": 0.82, "grad_norm": 1.3547570726597558, "learning_rate": 1.5779859545645714e-06, "loss": 0.8143, "step": 5115 }, { "epoch": 0.82, "grad_norm": 3.389894046309211, "learning_rate": 1.5751729622022494e-06, "loss": 0.9036, "step": 5116 }, { "epoch": 0.82, "grad_norm": 2.2047943932826004, "learning_rate": 1.5723622649950442e-06, "loss": 0.9565, "step": 5117 }, { "epoch": 0.82, "grad_norm": 2.2390559322157033, "learning_rate": 1.5695538637086693e-06, "loss": 0.8711, "step": 5118 }, { "epoch": 0.82, "grad_norm": 4.292275592356831, "learning_rate": 1.5667477591082092e-06, "loss": 0.8208, "step": 5119 }, { "epoch": 0.82, "grad_norm": 2.5603269341442685, "learning_rate": 1.5639439519581212e-06, "loss": 0.8803, "step": 5120 }, { "epoch": 0.83, "grad_norm": 2.8663719742878646, "learning_rate": 1.5611424430222432e-06, "loss": 0.9061, "step": 5121 }, { "epoch": 0.83, "grad_norm": 3.036505793118239, "learning_rate": 1.5583432330637826e-06, "loss": 0.8171, "step": 5122 }, { "epoch": 0.83, "grad_norm": 1.5666896344929446, "learning_rate": 1.5555463228453193e-06, "loss": 0.8801, "step": 5123 }, { "epoch": 0.83, "grad_norm": 2.5035488688927816, "learning_rate": 1.5527517131288128e-06, "loss": 0.861, "step": 5124 }, { "epoch": 0.83, "grad_norm": 2.5185605567133047, "learning_rate": 1.5499594046755862e-06, "loss": 0.8422, "step": 5125 }, { "epoch": 0.83, "grad_norm": 3.71469241053617, "learning_rate": 1.5471693982463476e-06, "loss": 0.9137, "step": 5126 }, { "epoch": 0.83, "grad_norm": 2.4476369711527304, "learning_rate": 1.5443816946011658e-06, "loss": 0.8455, "step": 5127 }, { "epoch": 0.83, "grad_norm": 2.973311924650613, "learning_rate": 1.5415962944994933e-06, "loss": 0.8689, "step": 5128 }, { "epoch": 0.83, "grad_norm": 1.931153331423578, "learning_rate": 1.5388131987001464e-06, "loss": 0.9082, "step": 5129 }, { "epoch": 0.83, "grad_norm": 3.5020416299642885, "learning_rate": 1.5360324079613175e-06, "loss": 0.8369, "step": 5130 }, { "epoch": 0.83, "grad_norm": 1.3768405300125153, "learning_rate": 1.533253923040573e-06, "loss": 0.8872, "step": 5131 }, { "epoch": 0.83, "grad_norm": 3.1061946380409795, "learning_rate": 1.5304777446948448e-06, "loss": 0.95, "step": 5132 }, { "epoch": 0.83, "grad_norm": 2.345046943406158, "learning_rate": 1.527703873680445e-06, "loss": 0.7629, "step": 5133 }, { "epoch": 0.83, "grad_norm": 3.287200037974666, "learning_rate": 1.52493231075305e-06, "loss": 0.8817, "step": 5134 }, { "epoch": 0.83, "grad_norm": 1.6749641898583203, "learning_rate": 1.522163056667708e-06, "loss": 0.9304, "step": 5135 }, { "epoch": 0.83, "grad_norm": 2.2265296264363252, "learning_rate": 1.5193961121788448e-06, "loss": 0.8556, "step": 5136 }, { "epoch": 0.83, "grad_norm": 3.3741137435906783, "learning_rate": 1.5166314780402492e-06, "loss": 0.8013, "step": 5137 }, { "epoch": 0.83, "grad_norm": 2.27449383397167, "learning_rate": 1.513869155005082e-06, "loss": 0.766, "step": 5138 }, { "epoch": 0.83, "grad_norm": 2.612217525561068, "learning_rate": 1.5111091438258796e-06, "loss": 0.8142, "step": 5139 }, { "epoch": 0.83, "grad_norm": 3.6000087473021307, "learning_rate": 1.5083514452545488e-06, "loss": 0.7941, "step": 5140 }, { "epoch": 0.83, "grad_norm": 2.5591831605810973, "learning_rate": 1.5055960600423524e-06, "loss": 0.8163, "step": 5141 }, { "epoch": 0.83, "grad_norm": 3.587628665969383, "learning_rate": 1.5028429889399388e-06, "loss": 0.8288, "step": 5142 }, { "epoch": 0.83, "grad_norm": 2.8791308373643094, "learning_rate": 1.5000922326973233e-06, "loss": 0.8384, "step": 5143 }, { "epoch": 0.83, "grad_norm": 3.0170047746164728, "learning_rate": 1.4973437920638856e-06, "loss": 0.8715, "step": 5144 }, { "epoch": 0.83, "grad_norm": 2.731685863781967, "learning_rate": 1.4945976677883723e-06, "loss": 0.8991, "step": 5145 }, { "epoch": 0.83, "grad_norm": 2.3073863633630984, "learning_rate": 1.4918538606189104e-06, "loss": 0.292, "step": 5146 }, { "epoch": 0.83, "grad_norm": 3.2650569439956447, "learning_rate": 1.4891123713029832e-06, "loss": 0.8932, "step": 5147 }, { "epoch": 0.83, "grad_norm": 3.566616290321982, "learning_rate": 1.4863732005874509e-06, "loss": 0.8553, "step": 5148 }, { "epoch": 0.83, "grad_norm": 2.740323468348516, "learning_rate": 1.483636349218538e-06, "loss": 0.9098, "step": 5149 }, { "epoch": 0.83, "grad_norm": 1.9744222157995173, "learning_rate": 1.4809018179418388e-06, "loss": 0.9029, "step": 5150 }, { "epoch": 0.83, "grad_norm": 4.566979913108158, "learning_rate": 1.4781696075023156e-06, "loss": 0.8819, "step": 5151 }, { "epoch": 0.83, "grad_norm": 1.6352567493303518, "learning_rate": 1.4754397186442947e-06, "loss": 0.9084, "step": 5152 }, { "epoch": 0.83, "grad_norm": 2.1663433958634695, "learning_rate": 1.4727121521114784e-06, "loss": 0.8605, "step": 5153 }, { "epoch": 0.83, "grad_norm": 3.069729582218404, "learning_rate": 1.4699869086469242e-06, "loss": 0.9133, "step": 5154 }, { "epoch": 0.83, "grad_norm": 2.021547355223622, "learning_rate": 1.4672639889930707e-06, "loss": 0.8895, "step": 5155 }, { "epoch": 0.83, "grad_norm": 2.4511027170584945, "learning_rate": 1.464543393891712e-06, "loss": 0.9398, "step": 5156 }, { "epoch": 0.83, "grad_norm": 3.20746656987645, "learning_rate": 1.461825124084012e-06, "loss": 0.8826, "step": 5157 }, { "epoch": 0.83, "grad_norm": 2.9189477628899887, "learning_rate": 1.4591091803105072e-06, "loss": 0.9622, "step": 5158 }, { "epoch": 0.83, "grad_norm": 4.204856544252046, "learning_rate": 1.4563955633110926e-06, "loss": 0.9082, "step": 5159 }, { "epoch": 0.83, "grad_norm": 3.4795134289714387, "learning_rate": 1.453684273825029e-06, "loss": 0.8367, "step": 5160 }, { "epoch": 0.83, "grad_norm": 3.5074511641394586, "learning_rate": 1.450975312590951e-06, "loss": 0.905, "step": 5161 }, { "epoch": 0.83, "grad_norm": 2.602751039385531, "learning_rate": 1.448268680346857e-06, "loss": 0.8602, "step": 5162 }, { "epoch": 0.83, "grad_norm": 2.3494814158514634, "learning_rate": 1.4455643778301e-06, "loss": 0.33, "step": 5163 }, { "epoch": 0.83, "grad_norm": 2.6074953904664357, "learning_rate": 1.442862405777411e-06, "loss": 0.8511, "step": 5164 }, { "epoch": 0.83, "grad_norm": 1.8152936912188695, "learning_rate": 1.440162764924884e-06, "loss": 0.3257, "step": 5165 }, { "epoch": 0.83, "grad_norm": 3.2184831823478826, "learning_rate": 1.4374654560079725e-06, "loss": 0.8131, "step": 5166 }, { "epoch": 0.83, "grad_norm": 3.4870962968924832, "learning_rate": 1.434770479761497e-06, "loss": 0.8367, "step": 5167 }, { "epoch": 0.83, "grad_norm": 3.0695444170367727, "learning_rate": 1.4320778369196443e-06, "loss": 0.8624, "step": 5168 }, { "epoch": 0.83, "grad_norm": 2.8215057300412774, "learning_rate": 1.4293875282159698e-06, "loss": 0.8992, "step": 5169 }, { "epoch": 0.83, "grad_norm": 2.0105976425173773, "learning_rate": 1.4266995543833772e-06, "loss": 0.3337, "step": 5170 }, { "epoch": 0.83, "grad_norm": 2.180957711209903, "learning_rate": 1.424013916154151e-06, "loss": 0.9135, "step": 5171 }, { "epoch": 0.83, "grad_norm": 3.0288993923925305, "learning_rate": 1.421330614259936e-06, "loss": 0.9001, "step": 5172 }, { "epoch": 0.83, "grad_norm": 3.8394695732221567, "learning_rate": 1.4186496494317325e-06, "loss": 0.7572, "step": 5173 }, { "epoch": 0.83, "grad_norm": 2.9388728079048403, "learning_rate": 1.415971022399909e-06, "loss": 0.8259, "step": 5174 }, { "epoch": 0.83, "grad_norm": 2.2447744724355694, "learning_rate": 1.4132947338942016e-06, "loss": 0.8705, "step": 5175 }, { "epoch": 0.83, "grad_norm": 3.433357100419924, "learning_rate": 1.4106207846437003e-06, "loss": 0.9484, "step": 5176 }, { "epoch": 0.83, "grad_norm": 2.8686200546720033, "learning_rate": 1.407949175376867e-06, "loss": 0.8541, "step": 5177 }, { "epoch": 0.83, "grad_norm": 3.9311695375683646, "learning_rate": 1.4052799068215206e-06, "loss": 0.902, "step": 5178 }, { "epoch": 0.83, "grad_norm": 2.4100168642269195, "learning_rate": 1.4026129797048393e-06, "loss": 0.8814, "step": 5179 }, { "epoch": 0.83, "grad_norm": 2.0898861889674554, "learning_rate": 1.399948394753372e-06, "loss": 0.8412, "step": 5180 }, { "epoch": 0.83, "grad_norm": 3.2465319102505217, "learning_rate": 1.3972861526930249e-06, "loss": 0.8959, "step": 5181 }, { "epoch": 0.83, "grad_norm": 2.245383613113323, "learning_rate": 1.394626254249063e-06, "loss": 0.8519, "step": 5182 }, { "epoch": 0.84, "grad_norm": 3.1258377478159973, "learning_rate": 1.391968700146118e-06, "loss": 0.8743, "step": 5183 }, { "epoch": 0.84, "grad_norm": 3.0213922093247607, "learning_rate": 1.389313491108184e-06, "loss": 0.8706, "step": 5184 }, { "epoch": 0.84, "grad_norm": 2.5897581336597697, "learning_rate": 1.386660627858607e-06, "loss": 0.8554, "step": 5185 }, { "epoch": 0.84, "grad_norm": 2.155956967110142, "learning_rate": 1.384010111120102e-06, "loss": 0.872, "step": 5186 }, { "epoch": 0.84, "grad_norm": 2.5949595744782363, "learning_rate": 1.3813619416147472e-06, "loss": 0.827, "step": 5187 }, { "epoch": 0.84, "grad_norm": 2.308027530433247, "learning_rate": 1.378716120063973e-06, "loss": 0.8537, "step": 5188 }, { "epoch": 0.84, "grad_norm": 3.7715167415105215, "learning_rate": 1.3760726471885722e-06, "loss": 0.923, "step": 5189 }, { "epoch": 0.84, "grad_norm": 3.776475650797242, "learning_rate": 1.3734315237087027e-06, "loss": 0.9347, "step": 5190 }, { "epoch": 0.84, "grad_norm": 2.618855141506927, "learning_rate": 1.3707927503438833e-06, "loss": 0.8808, "step": 5191 }, { "epoch": 0.84, "grad_norm": 3.3369124906255445, "learning_rate": 1.3681563278129794e-06, "loss": 0.8649, "step": 5192 }, { "epoch": 0.84, "grad_norm": 2.351990112763987, "learning_rate": 1.3655222568342308e-06, "loss": 0.8954, "step": 5193 }, { "epoch": 0.84, "grad_norm": 2.533855651248655, "learning_rate": 1.3628905381252322e-06, "loss": 0.8507, "step": 5194 }, { "epoch": 0.84, "grad_norm": 2.297325764754114, "learning_rate": 1.3602611724029335e-06, "loss": 0.9425, "step": 5195 }, { "epoch": 0.84, "grad_norm": 2.3943260316513926, "learning_rate": 1.3576341603836462e-06, "loss": 0.8561, "step": 5196 }, { "epoch": 0.84, "grad_norm": 2.4570085436525333, "learning_rate": 1.3550095027830435e-06, "loss": 0.3175, "step": 5197 }, { "epoch": 0.84, "grad_norm": 2.0025973144202367, "learning_rate": 1.352387200316152e-06, "loss": 0.8573, "step": 5198 }, { "epoch": 0.84, "grad_norm": 1.900277960673076, "learning_rate": 1.3497672536973594e-06, "loss": 0.8671, "step": 5199 }, { "epoch": 0.84, "grad_norm": 2.9926706427008627, "learning_rate": 1.3471496636404124e-06, "loss": 0.9045, "step": 5200 }, { "epoch": 0.84, "grad_norm": 3.2553477671960307, "learning_rate": 1.344534430858413e-06, "loss": 0.8314, "step": 5201 }, { "epoch": 0.84, "grad_norm": 1.990556954698529, "learning_rate": 1.3419215560638265e-06, "loss": 0.3384, "step": 5202 }, { "epoch": 0.84, "grad_norm": 2.7220331297181977, "learning_rate": 1.3393110399684695e-06, "loss": 0.9059, "step": 5203 }, { "epoch": 0.84, "grad_norm": 2.1678796698337335, "learning_rate": 1.3367028832835149e-06, "loss": 0.9101, "step": 5204 }, { "epoch": 0.84, "grad_norm": 2.668781117742507, "learning_rate": 1.3340970867195014e-06, "loss": 0.8301, "step": 5205 }, { "epoch": 0.84, "grad_norm": 3.8569370245130488, "learning_rate": 1.331493650986323e-06, "loss": 0.8746, "step": 5206 }, { "epoch": 0.84, "grad_norm": 3.49303519864699, "learning_rate": 1.328892576793217e-06, "loss": 0.8435, "step": 5207 }, { "epoch": 0.84, "grad_norm": 2.9840535095571576, "learning_rate": 1.3262938648487955e-06, "loss": 0.8513, "step": 5208 }, { "epoch": 0.84, "grad_norm": 3.3619996219705146, "learning_rate": 1.3236975158610178e-06, "loss": 0.8115, "step": 5209 }, { "epoch": 0.84, "grad_norm": 2.236348807559927, "learning_rate": 1.321103530537201e-06, "loss": 0.86, "step": 5210 }, { "epoch": 0.84, "grad_norm": 4.178401799188176, "learning_rate": 1.318511909584016e-06, "loss": 0.8571, "step": 5211 }, { "epoch": 0.84, "grad_norm": 2.8069587373272626, "learning_rate": 1.3159226537074933e-06, "loss": 0.8234, "step": 5212 }, { "epoch": 0.84, "grad_norm": 2.2116790714026235, "learning_rate": 1.3133357636130217e-06, "loss": 0.8621, "step": 5213 }, { "epoch": 0.84, "grad_norm": 3.1885716749787907, "learning_rate": 1.3107512400053335e-06, "loss": 0.9177, "step": 5214 }, { "epoch": 0.84, "grad_norm": 3.5394955132277226, "learning_rate": 1.3081690835885274e-06, "loss": 0.9347, "step": 5215 }, { "epoch": 0.84, "grad_norm": 2.75189022644807, "learning_rate": 1.3055892950660576e-06, "loss": 0.8332, "step": 5216 }, { "epoch": 0.84, "grad_norm": 3.3160713664751174, "learning_rate": 1.303011875140726e-06, "loss": 0.838, "step": 5217 }, { "epoch": 0.84, "grad_norm": 2.722330826476492, "learning_rate": 1.3004368245146913e-06, "loss": 0.8916, "step": 5218 }, { "epoch": 0.84, "grad_norm": 3.1577472723444058, "learning_rate": 1.2978641438894735e-06, "loss": 0.8282, "step": 5219 }, { "epoch": 0.84, "grad_norm": 1.8001458294393609, "learning_rate": 1.2952938339659382e-06, "loss": 0.334, "step": 5220 }, { "epoch": 0.84, "grad_norm": 1.5661795207101639, "learning_rate": 1.2927258954443066e-06, "loss": 0.3183, "step": 5221 }, { "epoch": 0.84, "grad_norm": 2.4726478612676126, "learning_rate": 1.2901603290241615e-06, "loss": 0.8736, "step": 5222 }, { "epoch": 0.84, "grad_norm": 2.9674061192973338, "learning_rate": 1.287597135404428e-06, "loss": 0.8652, "step": 5223 }, { "epoch": 0.84, "grad_norm": 1.3308713455042533, "learning_rate": 1.2850363152833955e-06, "loss": 0.8626, "step": 5224 }, { "epoch": 0.84, "grad_norm": 3.442240272979768, "learning_rate": 1.2824778693587014e-06, "loss": 0.8719, "step": 5225 }, { "epoch": 0.84, "grad_norm": 1.0389378408355603, "learning_rate": 1.2799217983273326e-06, "loss": 0.8625, "step": 5226 }, { "epoch": 0.84, "grad_norm": 3.596798872287071, "learning_rate": 1.2773681028856366e-06, "loss": 0.9163, "step": 5227 }, { "epoch": 0.84, "grad_norm": 3.406230541488644, "learning_rate": 1.2748167837293113e-06, "loss": 0.8633, "step": 5228 }, { "epoch": 0.84, "grad_norm": 3.2216984845711867, "learning_rate": 1.2722678415534062e-06, "loss": 0.8837, "step": 5229 }, { "epoch": 0.84, "grad_norm": 3.6917603015147575, "learning_rate": 1.2697212770523205e-06, "loss": 0.8085, "step": 5230 }, { "epoch": 0.84, "grad_norm": 3.0004164088854375, "learning_rate": 1.2671770909198122e-06, "loss": 0.8793, "step": 5231 }, { "epoch": 0.84, "grad_norm": 2.643668232548366, "learning_rate": 1.264635283848985e-06, "loss": 0.9246, "step": 5232 }, { "epoch": 0.84, "grad_norm": 2.7633397202672954, "learning_rate": 1.262095856532297e-06, "loss": 0.9028, "step": 5233 }, { "epoch": 0.84, "grad_norm": 2.698084105569869, "learning_rate": 1.2595588096615596e-06, "loss": 0.9145, "step": 5234 }, { "epoch": 0.84, "grad_norm": 1.3426849225087032, "learning_rate": 1.2570241439279386e-06, "loss": 0.9247, "step": 5235 }, { "epoch": 0.84, "grad_norm": 1.965842567617287, "learning_rate": 1.254491860021938e-06, "loss": 0.8682, "step": 5236 }, { "epoch": 0.84, "grad_norm": 3.99493540571985, "learning_rate": 1.2519619586334253e-06, "loss": 0.9261, "step": 5237 }, { "epoch": 0.84, "grad_norm": 1.4989138485491134, "learning_rate": 1.249434440451619e-06, "loss": 0.8604, "step": 5238 }, { "epoch": 0.84, "grad_norm": 2.9501468233974477, "learning_rate": 1.2469093061650816e-06, "loss": 0.8824, "step": 5239 }, { "epoch": 0.84, "grad_norm": 3.4776944150131803, "learning_rate": 1.2443865564617274e-06, "loss": 0.7694, "step": 5240 }, { "epoch": 0.84, "grad_norm": 1.5044957182326641, "learning_rate": 1.2418661920288278e-06, "loss": 0.8482, "step": 5241 }, { "epoch": 0.84, "grad_norm": 1.8609397138683306, "learning_rate": 1.2393482135529954e-06, "loss": 0.8354, "step": 5242 }, { "epoch": 0.84, "grad_norm": 3.856595787731233, "learning_rate": 1.2368326217201976e-06, "loss": 0.8294, "step": 5243 }, { "epoch": 0.84, "grad_norm": 1.5425458497539513, "learning_rate": 1.2343194172157535e-06, "loss": 0.8266, "step": 5244 }, { "epoch": 0.85, "grad_norm": 3.572649768762572, "learning_rate": 1.2318086007243257e-06, "loss": 0.8446, "step": 5245 }, { "epoch": 0.85, "grad_norm": 3.3881130494281866, "learning_rate": 1.2293001729299336e-06, "loss": 0.9229, "step": 5246 }, { "epoch": 0.85, "grad_norm": 2.5066372503252277, "learning_rate": 1.2267941345159385e-06, "loss": 0.9392, "step": 5247 }, { "epoch": 0.85, "grad_norm": 3.156722080693638, "learning_rate": 1.2242904861650574e-06, "loss": 0.8885, "step": 5248 }, { "epoch": 0.85, "grad_norm": 2.1844325766117607, "learning_rate": 1.2217892285593513e-06, "loss": 0.8824, "step": 5249 }, { "epoch": 0.85, "grad_norm": 2.9826955813338047, "learning_rate": 1.219290362380231e-06, "loss": 0.8837, "step": 5250 }, { "epoch": 0.85, "grad_norm": 2.8531518138949994, "learning_rate": 1.2167938883084595e-06, "loss": 0.9021, "step": 5251 }, { "epoch": 0.85, "grad_norm": 1.7408493445308222, "learning_rate": 1.2142998070241407e-06, "loss": 0.7938, "step": 5252 }, { "epoch": 0.85, "grad_norm": 1.9902087388713239, "learning_rate": 1.2118081192067365e-06, "loss": 0.8756, "step": 5253 }, { "epoch": 0.85, "grad_norm": 3.2133039307205413, "learning_rate": 1.2093188255350485e-06, "loss": 0.8722, "step": 5254 }, { "epoch": 0.85, "grad_norm": 3.8334047614920195, "learning_rate": 1.2068319266872264e-06, "loss": 0.8397, "step": 5255 }, { "epoch": 0.85, "grad_norm": 3.388418156252787, "learning_rate": 1.2043474233407737e-06, "loss": 0.8976, "step": 5256 }, { "epoch": 0.85, "grad_norm": 1.7004327055063986, "learning_rate": 1.2018653161725392e-06, "loss": 0.8356, "step": 5257 }, { "epoch": 0.85, "grad_norm": 3.6476532381118427, "learning_rate": 1.1993856058587117e-06, "loss": 0.9021, "step": 5258 }, { "epoch": 0.85, "grad_norm": 1.5323463718893267, "learning_rate": 1.1969082930748343e-06, "loss": 0.8612, "step": 5259 }, { "epoch": 0.85, "grad_norm": 2.3761437322382, "learning_rate": 1.1944333784957996e-06, "loss": 0.9105, "step": 5260 }, { "epoch": 0.85, "grad_norm": 3.412191546127054, "learning_rate": 1.191960862795839e-06, "loss": 0.8063, "step": 5261 }, { "epoch": 0.85, "grad_norm": 3.220340669037703, "learning_rate": 1.1894907466485317e-06, "loss": 0.8622, "step": 5262 }, { "epoch": 0.85, "grad_norm": 2.637566233883532, "learning_rate": 1.1870230307268116e-06, "loss": 0.866, "step": 5263 }, { "epoch": 0.85, "grad_norm": 3.5831448852902854, "learning_rate": 1.1845577157029474e-06, "loss": 0.8381, "step": 5264 }, { "epoch": 0.85, "grad_norm": 3.7842191278186617, "learning_rate": 1.1820948022485602e-06, "loss": 0.9051, "step": 5265 }, { "epoch": 0.85, "grad_norm": 1.900714654535901, "learning_rate": 1.1796342910346147e-06, "loss": 0.8379, "step": 5266 }, { "epoch": 0.85, "grad_norm": 4.270238372795838, "learning_rate": 1.1771761827314254e-06, "loss": 0.8128, "step": 5267 }, { "epoch": 0.85, "grad_norm": 2.6822188185394835, "learning_rate": 1.1747204780086462e-06, "loss": 0.8512, "step": 5268 }, { "epoch": 0.85, "grad_norm": 2.067232194043958, "learning_rate": 1.1722671775352778e-06, "loss": 0.8922, "step": 5269 }, { "epoch": 0.85, "grad_norm": 3.470192961370873, "learning_rate": 1.16981628197967e-06, "loss": 0.8442, "step": 5270 }, { "epoch": 0.85, "grad_norm": 3.619606478045397, "learning_rate": 1.1673677920095116e-06, "loss": 0.8574, "step": 5271 }, { "epoch": 0.85, "grad_norm": 2.4689831201123877, "learning_rate": 1.1649217082918385e-06, "loss": 0.8869, "step": 5272 }, { "epoch": 0.85, "grad_norm": 3.038287301718388, "learning_rate": 1.1624780314930339e-06, "loss": 0.9074, "step": 5273 }, { "epoch": 0.85, "grad_norm": 2.788071827760926, "learning_rate": 1.16003676227882e-06, "loss": 0.863, "step": 5274 }, { "epoch": 0.85, "grad_norm": 2.7256861811443103, "learning_rate": 1.157597901314268e-06, "loss": 0.8574, "step": 5275 }, { "epoch": 0.85, "grad_norm": 2.4475430683714645, "learning_rate": 1.1551614492637908e-06, "loss": 0.8867, "step": 5276 }, { "epoch": 0.85, "grad_norm": 3.1558367770283415, "learning_rate": 1.152727406791142e-06, "loss": 0.8643, "step": 5277 }, { "epoch": 0.85, "grad_norm": 3.035420331426925, "learning_rate": 1.1502957745594256e-06, "loss": 0.8562, "step": 5278 }, { "epoch": 0.85, "grad_norm": 3.5720401648968383, "learning_rate": 1.1478665532310839e-06, "loss": 0.867, "step": 5279 }, { "epoch": 0.85, "grad_norm": 3.070307840824232, "learning_rate": 1.1454397434679022e-06, "loss": 0.8392, "step": 5280 }, { "epoch": 0.85, "grad_norm": 2.9214895065513495, "learning_rate": 1.1430153459310112e-06, "loss": 0.9647, "step": 5281 }, { "epoch": 0.85, "grad_norm": 3.7511938102197617, "learning_rate": 1.1405933612808862e-06, "loss": 0.8522, "step": 5282 }, { "epoch": 0.85, "grad_norm": 1.7352578605294653, "learning_rate": 1.1381737901773405e-06, "loss": 0.8371, "step": 5283 }, { "epoch": 0.85, "grad_norm": 2.163433878795167, "learning_rate": 1.13575663327953e-06, "loss": 0.3014, "step": 5284 }, { "epoch": 0.85, "grad_norm": 3.9693502137484376, "learning_rate": 1.1333418912459593e-06, "loss": 0.9425, "step": 5285 }, { "epoch": 0.85, "grad_norm": 2.8554406953991194, "learning_rate": 1.1309295647344675e-06, "loss": 0.9066, "step": 5286 }, { "epoch": 0.85, "grad_norm": 3.704141083075816, "learning_rate": 1.1285196544022392e-06, "loss": 0.9609, "step": 5287 }, { "epoch": 0.85, "grad_norm": 3.8969749505165687, "learning_rate": 1.126112160905799e-06, "loss": 0.9067, "step": 5288 }, { "epoch": 0.85, "grad_norm": 2.454308194115083, "learning_rate": 1.12370708490102e-06, "loss": 0.8924, "step": 5289 }, { "epoch": 0.85, "grad_norm": 2.476763214738362, "learning_rate": 1.1213044270431062e-06, "loss": 0.8323, "step": 5290 }, { "epoch": 0.85, "grad_norm": 1.4184040841021748, "learning_rate": 1.1189041879866081e-06, "loss": 0.9441, "step": 5291 }, { "epoch": 0.85, "grad_norm": 2.543511445577826, "learning_rate": 1.1165063683854193e-06, "loss": 0.9031, "step": 5292 }, { "epoch": 0.85, "grad_norm": 2.692457815847522, "learning_rate": 1.1141109688927709e-06, "loss": 0.794, "step": 5293 }, { "epoch": 0.85, "grad_norm": 3.2579489114501383, "learning_rate": 1.1117179901612328e-06, "loss": 0.8294, "step": 5294 }, { "epoch": 0.85, "grad_norm": 2.6142624636620466, "learning_rate": 1.109327432842725e-06, "loss": 0.9162, "step": 5295 }, { "epoch": 0.85, "grad_norm": 3.1046986134776087, "learning_rate": 1.106939297588494e-06, "loss": 0.8685, "step": 5296 }, { "epoch": 0.85, "grad_norm": 3.436664132882882, "learning_rate": 1.1045535850491396e-06, "loss": 0.7777, "step": 5297 }, { "epoch": 0.85, "grad_norm": 2.4298235005772577, "learning_rate": 1.1021702958745917e-06, "loss": 0.8347, "step": 5298 }, { "epoch": 0.85, "grad_norm": 3.3526861267991586, "learning_rate": 1.0997894307141244e-06, "loss": 0.7841, "step": 5299 }, { "epoch": 0.85, "grad_norm": 4.153990855780406, "learning_rate": 1.0974109902163544e-06, "loss": 0.8974, "step": 5300 }, { "epoch": 0.85, "grad_norm": 2.8271735682712933, "learning_rate": 1.0950349750292311e-06, "loss": 0.8714, "step": 5301 }, { "epoch": 0.85, "grad_norm": 3.464211458918825, "learning_rate": 1.0926613858000456e-06, "loss": 0.8689, "step": 5302 }, { "epoch": 0.85, "grad_norm": 2.373953136813904, "learning_rate": 1.0902902231754309e-06, "loss": 0.8438, "step": 5303 }, { "epoch": 0.85, "grad_norm": 3.4931479547783497, "learning_rate": 1.087921487801359e-06, "loss": 0.8321, "step": 5304 }, { "epoch": 0.85, "grad_norm": 2.7079558671879598, "learning_rate": 1.0855551803231368e-06, "loss": 0.916, "step": 5305 }, { "epoch": 0.85, "grad_norm": 4.017036872571087, "learning_rate": 1.0831913013854101e-06, "loss": 0.8402, "step": 5306 }, { "epoch": 0.86, "grad_norm": 2.4962733148929193, "learning_rate": 1.080829851632167e-06, "loss": 0.9864, "step": 5307 }, { "epoch": 0.86, "grad_norm": 3.5244884742075375, "learning_rate": 1.0784708317067316e-06, "loss": 0.8612, "step": 5308 }, { "epoch": 0.86, "grad_norm": 3.2977840381653145, "learning_rate": 1.0761142422517623e-06, "loss": 0.9413, "step": 5309 }, { "epoch": 0.86, "grad_norm": 1.971371313412742, "learning_rate": 1.073760083909262e-06, "loss": 0.3168, "step": 5310 }, { "epoch": 0.86, "grad_norm": 3.0098731970454304, "learning_rate": 1.0714083573205702e-06, "loss": 0.8797, "step": 5311 }, { "epoch": 0.86, "grad_norm": 3.197089934371868, "learning_rate": 1.069059063126361e-06, "loss": 0.8767, "step": 5312 }, { "epoch": 0.86, "grad_norm": 2.539633255411294, "learning_rate": 1.066712201966642e-06, "loss": 0.7939, "step": 5313 }, { "epoch": 0.86, "grad_norm": 3.2469879277612574, "learning_rate": 1.06436777448077e-06, "loss": 0.8981, "step": 5314 }, { "epoch": 0.86, "grad_norm": 2.973638104213272, "learning_rate": 1.0620257813074274e-06, "loss": 0.8197, "step": 5315 }, { "epoch": 0.86, "grad_norm": 2.9379759058058497, "learning_rate": 1.0596862230846371e-06, "loss": 0.8446, "step": 5316 }, { "epoch": 0.86, "grad_norm": 3.141116583010176, "learning_rate": 1.0573491004497637e-06, "loss": 0.8634, "step": 5317 }, { "epoch": 0.86, "grad_norm": 3.7224578324203956, "learning_rate": 1.055014414039498e-06, "loss": 0.8661, "step": 5318 }, { "epoch": 0.86, "grad_norm": 3.6545442694183734, "learning_rate": 1.0526821644898777e-06, "loss": 0.8776, "step": 5319 }, { "epoch": 0.86, "grad_norm": 3.67146305451837, "learning_rate": 1.0503523524362701e-06, "loss": 0.8634, "step": 5320 }, { "epoch": 0.86, "grad_norm": 2.044509452613255, "learning_rate": 1.048024978513379e-06, "loss": 0.8793, "step": 5321 }, { "epoch": 0.86, "grad_norm": 3.41321251106504, "learning_rate": 1.0457000433552478e-06, "loss": 0.8512, "step": 5322 }, { "epoch": 0.86, "grad_norm": 3.4964643058247766, "learning_rate": 1.0433775475952511e-06, "loss": 0.9139, "step": 5323 }, { "epoch": 0.86, "grad_norm": 3.583034562067973, "learning_rate": 1.0410574918660998e-06, "loss": 0.9438, "step": 5324 }, { "epoch": 0.86, "grad_norm": 2.5885111284405458, "learning_rate": 1.0387398767998425e-06, "loss": 0.9172, "step": 5325 }, { "epoch": 0.86, "grad_norm": 3.8320541458923545, "learning_rate": 1.036424703027863e-06, "loss": 0.7901, "step": 5326 }, { "epoch": 0.86, "grad_norm": 3.136280299952963, "learning_rate": 1.0341119711808778e-06, "loss": 0.86, "step": 5327 }, { "epoch": 0.86, "grad_norm": 2.495342207166279, "learning_rate": 1.0318016818889343e-06, "loss": 0.9111, "step": 5328 }, { "epoch": 0.86, "grad_norm": 2.913017351428185, "learning_rate": 1.0294938357814254e-06, "loss": 0.8992, "step": 5329 }, { "epoch": 0.86, "grad_norm": 3.910385993968648, "learning_rate": 1.0271884334870685e-06, "loss": 0.8586, "step": 5330 }, { "epoch": 0.86, "grad_norm": 2.868009277988467, "learning_rate": 1.0248854756339176e-06, "loss": 0.8781, "step": 5331 }, { "epoch": 0.86, "grad_norm": 4.051916568465794, "learning_rate": 1.0225849628493634e-06, "loss": 0.8756, "step": 5332 }, { "epoch": 0.86, "grad_norm": 3.1242578769684357, "learning_rate": 1.020286895760132e-06, "loss": 0.8022, "step": 5333 }, { "epoch": 0.86, "grad_norm": 4.527249378869753, "learning_rate": 1.0179912749922772e-06, "loss": 0.9091, "step": 5334 }, { "epoch": 0.86, "grad_norm": 2.3525660201729646, "learning_rate": 1.0156981011711875e-06, "loss": 0.8746, "step": 5335 }, { "epoch": 0.86, "grad_norm": 2.6685863578707854, "learning_rate": 1.01340737492159e-06, "loss": 0.9186, "step": 5336 }, { "epoch": 0.86, "grad_norm": 3.5189185418914963, "learning_rate": 1.011119096867541e-06, "loss": 0.8749, "step": 5337 }, { "epoch": 0.86, "grad_norm": 3.770613156348593, "learning_rate": 1.0088332676324285e-06, "loss": 0.8368, "step": 5338 }, { "epoch": 0.86, "grad_norm": 3.1843428590479586, "learning_rate": 1.006549887838978e-06, "loss": 0.9119, "step": 5339 }, { "epoch": 0.86, "grad_norm": 2.040377956233763, "learning_rate": 1.0042689581092424e-06, "loss": 0.8589, "step": 5340 }, { "epoch": 0.86, "grad_norm": 3.1480680868984607, "learning_rate": 1.001990479064613e-06, "loss": 0.9096, "step": 5341 }, { "epoch": 0.86, "grad_norm": 2.49270442755638, "learning_rate": 9.997144513258095e-07, "loss": 0.8656, "step": 5342 }, { "epoch": 0.86, "grad_norm": 2.378539509683873, "learning_rate": 9.974408755128817e-07, "loss": 0.8732, "step": 5343 }, { "epoch": 0.86, "grad_norm": 3.305524426980645, "learning_rate": 9.951697522452175e-07, "loss": 0.8763, "step": 5344 }, { "epoch": 0.86, "grad_norm": 4.046655361740016, "learning_rate": 9.92901082141531e-07, "loss": 0.7814, "step": 5345 }, { "epoch": 0.86, "grad_norm": 1.5917213843295663, "learning_rate": 9.90634865819874e-07, "loss": 0.8572, "step": 5346 }, { "epoch": 0.86, "grad_norm": 2.390918090406478, "learning_rate": 9.883711038976218e-07, "loss": 0.833, "step": 5347 }, { "epoch": 0.86, "grad_norm": 2.7872322124954123, "learning_rate": 9.861097969914901e-07, "loss": 0.8728, "step": 5348 }, { "epoch": 0.86, "grad_norm": 3.238870049522344, "learning_rate": 9.838509457175183e-07, "loss": 0.8549, "step": 5349 }, { "epoch": 0.86, "grad_norm": 3.3588198180869324, "learning_rate": 9.815945506910795e-07, "loss": 0.8403, "step": 5350 }, { "epoch": 0.86, "grad_norm": 1.9275576926971985, "learning_rate": 9.793406125268801e-07, "loss": 0.8593, "step": 5351 }, { "epoch": 0.86, "grad_norm": 3.5219704561743614, "learning_rate": 9.770891318389542e-07, "loss": 0.9107, "step": 5352 }, { "epoch": 0.86, "grad_norm": 4.301476584344504, "learning_rate": 9.748401092406657e-07, "loss": 0.8554, "step": 5353 }, { "epoch": 0.86, "grad_norm": 2.2617927499633166, "learning_rate": 9.7259354534471e-07, "loss": 0.9804, "step": 5354 }, { "epoch": 0.86, "grad_norm": 2.416344116872388, "learning_rate": 9.703494407631176e-07, "loss": 0.8397, "step": 5355 }, { "epoch": 0.86, "grad_norm": 2.8267394829164156, "learning_rate": 9.681077961072405e-07, "loss": 0.8774, "step": 5356 }, { "epoch": 0.86, "grad_norm": 3.1571842737076614, "learning_rate": 9.658686119877636e-07, "loss": 0.8352, "step": 5357 }, { "epoch": 0.86, "grad_norm": 2.0626937312837184, "learning_rate": 9.636318890147057e-07, "loss": 0.8404, "step": 5358 }, { "epoch": 0.86, "grad_norm": 2.2088624092517333, "learning_rate": 9.613976277974101e-07, "loss": 0.869, "step": 5359 }, { "epoch": 0.86, "grad_norm": 4.114492732470996, "learning_rate": 9.591658289445504e-07, "loss": 0.8088, "step": 5360 }, { "epoch": 0.86, "grad_norm": 3.791082665598943, "learning_rate": 9.569364930641323e-07, "loss": 0.8717, "step": 5361 }, { "epoch": 0.86, "grad_norm": 2.3863265344286932, "learning_rate": 9.547096207634843e-07, "loss": 0.332, "step": 5362 }, { "epoch": 0.86, "grad_norm": 2.65134155935473, "learning_rate": 9.524852126492734e-07, "loss": 0.8906, "step": 5363 }, { "epoch": 0.86, "grad_norm": 2.750749572554176, "learning_rate": 9.50263269327486e-07, "loss": 0.8239, "step": 5364 }, { "epoch": 0.86, "grad_norm": 3.014474283568999, "learning_rate": 9.480437914034402e-07, "loss": 0.9197, "step": 5365 }, { "epoch": 0.86, "grad_norm": 2.6785533940752626, "learning_rate": 9.458267794817866e-07, "loss": 0.8609, "step": 5366 }, { "epoch": 0.86, "grad_norm": 2.015778646601235, "learning_rate": 9.436122341664955e-07, "loss": 0.8581, "step": 5367 }, { "epoch": 0.86, "grad_norm": 3.0992922990292118, "learning_rate": 9.414001560608743e-07, "loss": 0.9397, "step": 5368 }, { "epoch": 0.87, "grad_norm": 2.6484832309611885, "learning_rate": 9.391905457675499e-07, "loss": 0.914, "step": 5369 }, { "epoch": 0.87, "grad_norm": 2.0425875656426267, "learning_rate": 9.369834038884862e-07, "loss": 0.9257, "step": 5370 }, { "epoch": 0.87, "grad_norm": 3.1082298432031554, "learning_rate": 9.347787310249668e-07, "loss": 0.8909, "step": 5371 }, { "epoch": 0.87, "grad_norm": 3.7694672416035746, "learning_rate": 9.325765277776033e-07, "loss": 0.7787, "step": 5372 }, { "epoch": 0.87, "grad_norm": 1.8884548626697606, "learning_rate": 9.303767947463416e-07, "loss": 0.8902, "step": 5373 }, { "epoch": 0.87, "grad_norm": 2.728646342833349, "learning_rate": 9.281795325304454e-07, "loss": 0.811, "step": 5374 }, { "epoch": 0.87, "grad_norm": 3.7518215841481304, "learning_rate": 9.259847417285084e-07, "loss": 0.8332, "step": 5375 }, { "epoch": 0.87, "grad_norm": 3.895098829472421, "learning_rate": 9.237924229384554e-07, "loss": 0.8228, "step": 5376 }, { "epoch": 0.87, "grad_norm": 3.927308714425608, "learning_rate": 9.216025767575376e-07, "loss": 0.8223, "step": 5377 }, { "epoch": 0.87, "grad_norm": 2.234084423884472, "learning_rate": 9.194152037823211e-07, "loss": 0.8738, "step": 5378 }, { "epoch": 0.87, "grad_norm": 3.259634555858086, "learning_rate": 9.172303046087105e-07, "loss": 0.8284, "step": 5379 }, { "epoch": 0.87, "grad_norm": 3.6383721580886133, "learning_rate": 9.150478798319351e-07, "loss": 0.9221, "step": 5380 }, { "epoch": 0.87, "grad_norm": 1.1927769015174288, "learning_rate": 9.128679300465459e-07, "loss": 0.8886, "step": 5381 }, { "epoch": 0.87, "grad_norm": 2.1224745160517386, "learning_rate": 9.10690455846418e-07, "loss": 0.8496, "step": 5382 }, { "epoch": 0.87, "grad_norm": 2.426209637690892, "learning_rate": 9.085154578247613e-07, "loss": 0.9077, "step": 5383 }, { "epoch": 0.87, "grad_norm": 2.8557364237891005, "learning_rate": 9.063429365740995e-07, "loss": 0.8819, "step": 5384 }, { "epoch": 0.87, "grad_norm": 3.4429846271367803, "learning_rate": 9.041728926862914e-07, "loss": 0.8804, "step": 5385 }, { "epoch": 0.87, "grad_norm": 2.0786217117407437, "learning_rate": 9.020053267525142e-07, "loss": 0.3512, "step": 5386 }, { "epoch": 0.87, "grad_norm": 3.591470675567929, "learning_rate": 8.998402393632755e-07, "loss": 0.865, "step": 5387 }, { "epoch": 0.87, "grad_norm": 2.7749957797731493, "learning_rate": 8.976776311084024e-07, "loss": 0.8692, "step": 5388 }, { "epoch": 0.87, "grad_norm": 2.49457476369179, "learning_rate": 8.95517502577048e-07, "loss": 0.8535, "step": 5389 }, { "epoch": 0.87, "grad_norm": 2.671972351085355, "learning_rate": 8.933598543576938e-07, "loss": 0.8936, "step": 5390 }, { "epoch": 0.87, "grad_norm": 3.8170946294953785, "learning_rate": 8.912046870381397e-07, "loss": 0.8074, "step": 5391 }, { "epoch": 0.87, "grad_norm": 3.422984043664766, "learning_rate": 8.89052001205517e-07, "loss": 0.8612, "step": 5392 }, { "epoch": 0.87, "grad_norm": 3.3831138564708, "learning_rate": 8.869017974462735e-07, "loss": 0.8671, "step": 5393 }, { "epoch": 0.87, "grad_norm": 2.289761797747439, "learning_rate": 8.847540763461815e-07, "loss": 0.9001, "step": 5394 }, { "epoch": 0.87, "grad_norm": 3.1129128729970272, "learning_rate": 8.826088384903453e-07, "loss": 0.8099, "step": 5395 }, { "epoch": 0.87, "grad_norm": 2.4409422410619306, "learning_rate": 8.804660844631841e-07, "loss": 0.3321, "step": 5396 }, { "epoch": 0.87, "grad_norm": 3.5823879917095147, "learning_rate": 8.783258148484397e-07, "loss": 0.8825, "step": 5397 }, { "epoch": 0.87, "grad_norm": 2.6740924935789128, "learning_rate": 8.761880302291847e-07, "loss": 0.8569, "step": 5398 }, { "epoch": 0.87, "grad_norm": 2.111411460904475, "learning_rate": 8.740527311878133e-07, "loss": 0.9464, "step": 5399 }, { "epoch": 0.87, "grad_norm": 2.1802889086516055, "learning_rate": 8.719199183060323e-07, "loss": 0.8131, "step": 5400 }, { "epoch": 0.87, "grad_norm": 2.3918980219609987, "learning_rate": 8.697895921648824e-07, "loss": 0.8111, "step": 5401 }, { "epoch": 0.87, "grad_norm": 2.7060362733663896, "learning_rate": 8.676617533447251e-07, "loss": 0.8755, "step": 5402 }, { "epoch": 0.87, "grad_norm": 2.27280734528023, "learning_rate": 8.655364024252411e-07, "loss": 0.8411, "step": 5403 }, { "epoch": 0.87, "grad_norm": 3.4878848065244332, "learning_rate": 8.634135399854315e-07, "loss": 0.8955, "step": 5404 }, { "epoch": 0.87, "grad_norm": 3.441439781068911, "learning_rate": 8.612931666036262e-07, "loss": 0.8245, "step": 5405 }, { "epoch": 0.87, "grad_norm": 2.355431399832138, "learning_rate": 8.59175282857475e-07, "loss": 0.8694, "step": 5406 }, { "epoch": 0.87, "grad_norm": 3.155165382076958, "learning_rate": 8.570598893239413e-07, "loss": 0.8674, "step": 5407 }, { "epoch": 0.87, "grad_norm": 4.304256133743456, "learning_rate": 8.549469865793214e-07, "loss": 0.8399, "step": 5408 }, { "epoch": 0.87, "grad_norm": 2.829251902611391, "learning_rate": 8.528365751992284e-07, "loss": 0.9077, "step": 5409 }, { "epoch": 0.87, "grad_norm": 3.630734478965721, "learning_rate": 8.507286557585948e-07, "loss": 0.7957, "step": 5410 }, { "epoch": 0.87, "grad_norm": 3.4159998514412124, "learning_rate": 8.486232288316754e-07, "loss": 0.8165, "step": 5411 }, { "epoch": 0.87, "grad_norm": 3.8550128454644983, "learning_rate": 8.465202949920492e-07, "loss": 0.8497, "step": 5412 }, { "epoch": 0.87, "grad_norm": 1.6358157924204602, "learning_rate": 8.444198548126103e-07, "loss": 0.8955, "step": 5413 }, { "epoch": 0.87, "grad_norm": 3.6246456104551243, "learning_rate": 8.423219088655788e-07, "loss": 0.9093, "step": 5414 }, { "epoch": 0.87, "grad_norm": 2.196703487120451, "learning_rate": 8.402264577224928e-07, "loss": 0.8485, "step": 5415 }, { "epoch": 0.87, "grad_norm": 3.5014212311313506, "learning_rate": 8.381335019542091e-07, "loss": 0.9227, "step": 5416 }, { "epoch": 0.87, "grad_norm": 1.682449766432268, "learning_rate": 8.360430421309096e-07, "loss": 0.8936, "step": 5417 }, { "epoch": 0.87, "grad_norm": 4.727177048761956, "learning_rate": 8.339550788220907e-07, "loss": 0.8978, "step": 5418 }, { "epoch": 0.87, "grad_norm": 1.8550601549497137, "learning_rate": 8.318696125965698e-07, "loss": 0.7851, "step": 5419 }, { "epoch": 0.87, "grad_norm": 3.2001342208846877, "learning_rate": 8.297866440224889e-07, "loss": 0.8282, "step": 5420 }, { "epoch": 0.87, "grad_norm": 4.504050656970527, "learning_rate": 8.277061736673064e-07, "loss": 0.8623, "step": 5421 }, { "epoch": 0.87, "grad_norm": 4.170998699150532, "learning_rate": 8.256282020977957e-07, "loss": 0.789, "step": 5422 }, { "epoch": 0.87, "grad_norm": 1.9038692052819497, "learning_rate": 8.235527298800549e-07, "loss": 0.8363, "step": 5423 }, { "epoch": 0.87, "grad_norm": 2.7444750164644085, "learning_rate": 8.21479757579503e-07, "loss": 0.8029, "step": 5424 }, { "epoch": 0.87, "grad_norm": 1.1710786784132998, "learning_rate": 8.194092857608726e-07, "loss": 0.839, "step": 5425 }, { "epoch": 0.87, "grad_norm": 2.691539212999418, "learning_rate": 8.173413149882147e-07, "loss": 0.9222, "step": 5426 }, { "epoch": 0.87, "grad_norm": 2.5570618214747944, "learning_rate": 8.152758458249055e-07, "loss": 0.8556, "step": 5427 }, { "epoch": 0.87, "grad_norm": 3.3954162497541063, "learning_rate": 8.132128788336368e-07, "loss": 0.8489, "step": 5428 }, { "epoch": 0.87, "grad_norm": 3.585684707141826, "learning_rate": 8.111524145764116e-07, "loss": 0.8395, "step": 5429 }, { "epoch": 0.87, "grad_norm": 2.4587247073603975, "learning_rate": 8.090944536145606e-07, "loss": 0.9331, "step": 5430 }, { "epoch": 0.88, "grad_norm": 2.316246503239569, "learning_rate": 8.070389965087311e-07, "loss": 0.8564, "step": 5431 }, { "epoch": 0.88, "grad_norm": 2.9366880877518557, "learning_rate": 8.04986043818885e-07, "loss": 0.9416, "step": 5432 }, { "epoch": 0.88, "grad_norm": 3.917804566249283, "learning_rate": 8.029355961043006e-07, "loss": 0.9357, "step": 5433 }, { "epoch": 0.88, "grad_norm": 3.198297351623494, "learning_rate": 8.008876539235799e-07, "loss": 0.8418, "step": 5434 }, { "epoch": 0.88, "grad_norm": 3.4285625446295307, "learning_rate": 7.988422178346378e-07, "loss": 0.8214, "step": 5435 }, { "epoch": 0.88, "grad_norm": 2.8224201305296477, "learning_rate": 7.967992883947051e-07, "loss": 0.8362, "step": 5436 }, { "epoch": 0.88, "grad_norm": 2.2385482761953512, "learning_rate": 7.947588661603345e-07, "loss": 0.9031, "step": 5437 }, { "epoch": 0.88, "grad_norm": 3.7537544529418754, "learning_rate": 7.927209516873924e-07, "loss": 0.8444, "step": 5438 }, { "epoch": 0.88, "grad_norm": 3.241829358278409, "learning_rate": 7.906855455310647e-07, "loss": 0.8132, "step": 5439 }, { "epoch": 0.88, "grad_norm": 3.829995457419946, "learning_rate": 7.886526482458501e-07, "loss": 0.9195, "step": 5440 }, { "epoch": 0.88, "grad_norm": 4.5160304748750315, "learning_rate": 7.866222603855656e-07, "loss": 0.8005, "step": 5441 }, { "epoch": 0.88, "grad_norm": 2.877492239085717, "learning_rate": 7.845943825033442e-07, "loss": 0.8709, "step": 5442 }, { "epoch": 0.88, "grad_norm": 3.6648920417487525, "learning_rate": 7.825690151516418e-07, "loss": 0.9267, "step": 5443 }, { "epoch": 0.88, "grad_norm": 2.3308355950008255, "learning_rate": 7.805461588822161e-07, "loss": 0.8548, "step": 5444 }, { "epoch": 0.88, "grad_norm": 2.96573188355987, "learning_rate": 7.785258142461516e-07, "loss": 0.824, "step": 5445 }, { "epoch": 0.88, "grad_norm": 2.5319228985092264, "learning_rate": 7.765079817938493e-07, "loss": 0.8833, "step": 5446 }, { "epoch": 0.88, "grad_norm": 2.7063023410166926, "learning_rate": 7.744926620750193e-07, "loss": 0.9115, "step": 5447 }, { "epoch": 0.88, "grad_norm": 4.041354268903214, "learning_rate": 7.724798556386892e-07, "loss": 0.8862, "step": 5448 }, { "epoch": 0.88, "grad_norm": 3.4803659956948865, "learning_rate": 7.704695630332048e-07, "loss": 0.8778, "step": 5449 }, { "epoch": 0.88, "grad_norm": 2.651545498579343, "learning_rate": 7.684617848062281e-07, "loss": 0.8288, "step": 5450 }, { "epoch": 0.88, "grad_norm": 4.1124416266011785, "learning_rate": 7.664565215047259e-07, "loss": 0.8305, "step": 5451 }, { "epoch": 0.88, "grad_norm": 1.6423748312393405, "learning_rate": 7.644537736749924e-07, "loss": 0.3476, "step": 5452 }, { "epoch": 0.88, "grad_norm": 3.4123356203165525, "learning_rate": 7.624535418626323e-07, "loss": 0.9186, "step": 5453 }, { "epoch": 0.88, "grad_norm": 3.948274433495887, "learning_rate": 7.604558266125606e-07, "loss": 0.9149, "step": 5454 }, { "epoch": 0.88, "grad_norm": 1.9698862097161187, "learning_rate": 7.584606284690099e-07, "loss": 0.8849, "step": 5455 }, { "epoch": 0.88, "grad_norm": 0.8863781850572265, "learning_rate": 7.56467947975531e-07, "loss": 0.3254, "step": 5456 }, { "epoch": 0.88, "grad_norm": 2.1197227698024785, "learning_rate": 7.544777856749818e-07, "loss": 0.8316, "step": 5457 }, { "epoch": 0.88, "grad_norm": 3.5426315937892787, "learning_rate": 7.524901421095365e-07, "loss": 0.8778, "step": 5458 }, { "epoch": 0.88, "grad_norm": 3.6377495801321267, "learning_rate": 7.505050178206874e-07, "loss": 0.7827, "step": 5459 }, { "epoch": 0.88, "grad_norm": 3.1870817022177715, "learning_rate": 7.485224133492341e-07, "loss": 0.7818, "step": 5460 }, { "epoch": 0.88, "grad_norm": 2.852988359536519, "learning_rate": 7.465423292352947e-07, "loss": 0.8988, "step": 5461 }, { "epoch": 0.88, "grad_norm": 2.073488613438669, "learning_rate": 7.445647660182987e-07, "loss": 0.3374, "step": 5462 }, { "epoch": 0.88, "grad_norm": 1.0400002055901545, "learning_rate": 7.425897242369861e-07, "loss": 0.8393, "step": 5463 }, { "epoch": 0.88, "grad_norm": 2.4202580129992968, "learning_rate": 7.406172044294157e-07, "loss": 0.8901, "step": 5464 }, { "epoch": 0.88, "grad_norm": 3.3777177958711886, "learning_rate": 7.386472071329543e-07, "loss": 0.84, "step": 5465 }, { "epoch": 0.88, "grad_norm": 3.2403903622414125, "learning_rate": 7.366797328842856e-07, "loss": 0.8296, "step": 5466 }, { "epoch": 0.88, "grad_norm": 2.7792902696414394, "learning_rate": 7.347147822194012e-07, "loss": 0.8984, "step": 5467 }, { "epoch": 0.88, "grad_norm": 3.12298198391582, "learning_rate": 7.327523556736104e-07, "loss": 0.8122, "step": 5468 }, { "epoch": 0.88, "grad_norm": 2.5463720655888316, "learning_rate": 7.307924537815314e-07, "loss": 0.325, "step": 5469 }, { "epoch": 0.88, "grad_norm": 4.09285277806508, "learning_rate": 7.288350770770935e-07, "loss": 0.8258, "step": 5470 }, { "epoch": 0.88, "grad_norm": 2.8580198985448853, "learning_rate": 7.268802260935415e-07, "loss": 0.882, "step": 5471 }, { "epoch": 0.88, "grad_norm": 5.001404183625175, "learning_rate": 7.249279013634348e-07, "loss": 0.8475, "step": 5472 }, { "epoch": 0.88, "grad_norm": 2.6539664831280168, "learning_rate": 7.229781034186323e-07, "loss": 0.8824, "step": 5473 }, { "epoch": 0.88, "grad_norm": 3.5458583635163223, "learning_rate": 7.210308327903182e-07, "loss": 0.9341, "step": 5474 }, { "epoch": 0.88, "grad_norm": 3.399038397621218, "learning_rate": 7.190860900089824e-07, "loss": 0.8452, "step": 5475 }, { "epoch": 0.88, "grad_norm": 2.853934693945306, "learning_rate": 7.171438756044258e-07, "loss": 0.8578, "step": 5476 }, { "epoch": 0.88, "grad_norm": 2.6701749334284943, "learning_rate": 7.152041901057594e-07, "loss": 0.8869, "step": 5477 }, { "epoch": 0.88, "grad_norm": 4.119343665352087, "learning_rate": 7.132670340414106e-07, "loss": 0.7782, "step": 5478 }, { "epoch": 0.88, "grad_norm": 3.3137063762681382, "learning_rate": 7.113324079391115e-07, "loss": 0.8721, "step": 5479 }, { "epoch": 0.88, "grad_norm": 3.7303990861869085, "learning_rate": 7.094003123259063e-07, "loss": 0.7983, "step": 5480 }, { "epoch": 0.88, "grad_norm": 1.9321743774462021, "learning_rate": 7.07470747728155e-07, "loss": 0.2981, "step": 5481 }, { "epoch": 0.88, "grad_norm": 2.5640577838117427, "learning_rate": 7.055437146715194e-07, "loss": 0.323, "step": 5482 }, { "epoch": 0.88, "grad_norm": 3.547239041236695, "learning_rate": 7.036192136809816e-07, "loss": 0.8942, "step": 5483 }, { "epoch": 0.88, "grad_norm": 3.9257234331874367, "learning_rate": 7.016972452808246e-07, "loss": 0.8787, "step": 5484 }, { "epoch": 0.88, "grad_norm": 3.0617291784437417, "learning_rate": 6.997778099946495e-07, "loss": 0.9127, "step": 5485 }, { "epoch": 0.88, "grad_norm": 2.473897854249791, "learning_rate": 6.978609083453602e-07, "loss": 0.8584, "step": 5486 }, { "epoch": 0.88, "grad_norm": 3.177887128031885, "learning_rate": 6.959465408551724e-07, "loss": 0.8662, "step": 5487 }, { "epoch": 0.88, "grad_norm": 4.505141817458154, "learning_rate": 6.940347080456178e-07, "loss": 0.8542, "step": 5488 }, { "epoch": 0.88, "grad_norm": 2.5904645218973728, "learning_rate": 6.921254104375264e-07, "loss": 0.8483, "step": 5489 }, { "epoch": 0.88, "grad_norm": 2.3201699775054894, "learning_rate": 6.902186485510476e-07, "loss": 0.3378, "step": 5490 }, { "epoch": 0.88, "grad_norm": 4.450496285871021, "learning_rate": 6.883144229056349e-07, "loss": 0.8502, "step": 5491 }, { "epoch": 0.88, "grad_norm": 3.9044705419573464, "learning_rate": 6.864127340200499e-07, "loss": 0.8698, "step": 5492 }, { "epoch": 0.89, "grad_norm": 2.672150268652206, "learning_rate": 6.845135824123672e-07, "loss": 0.8508, "step": 5493 }, { "epoch": 0.89, "grad_norm": 2.010471349838875, "learning_rate": 6.826169685999707e-07, "loss": 0.9398, "step": 5494 }, { "epoch": 0.89, "grad_norm": 1.9889959763207643, "learning_rate": 6.807228930995436e-07, "loss": 0.8111, "step": 5495 }, { "epoch": 0.89, "grad_norm": 2.5875476998617937, "learning_rate": 6.788313564270877e-07, "loss": 0.8321, "step": 5496 }, { "epoch": 0.89, "grad_norm": 1.8529743976090387, "learning_rate": 6.769423590979107e-07, "loss": 0.7961, "step": 5497 }, { "epoch": 0.89, "grad_norm": 2.0835733911172594, "learning_rate": 6.750559016266278e-07, "loss": 0.951, "step": 5498 }, { "epoch": 0.89, "grad_norm": 2.5965623430965246, "learning_rate": 6.731719845271589e-07, "loss": 0.9237, "step": 5499 }, { "epoch": 0.89, "grad_norm": 1.7748024682826309, "learning_rate": 6.71290608312739e-07, "loss": 0.3147, "step": 5500 }, { "epoch": 0.89, "grad_norm": 2.9111264243786836, "learning_rate": 6.694117734959038e-07, "loss": 0.9025, "step": 5501 }, { "epoch": 0.89, "grad_norm": 3.6904635401331696, "learning_rate": 6.675354805885004e-07, "loss": 0.7945, "step": 5502 }, { "epoch": 0.89, "grad_norm": 2.9145729906408997, "learning_rate": 6.656617301016833e-07, "loss": 0.8695, "step": 5503 }, { "epoch": 0.89, "grad_norm": 2.360833935892944, "learning_rate": 6.637905225459129e-07, "loss": 0.9365, "step": 5504 }, { "epoch": 0.89, "grad_norm": 3.119949379290231, "learning_rate": 6.619218584309595e-07, "loss": 0.8907, "step": 5505 }, { "epoch": 0.89, "grad_norm": 3.5741717893089846, "learning_rate": 6.600557382658956e-07, "loss": 0.8958, "step": 5506 }, { "epoch": 0.89, "grad_norm": 1.342738458351216, "learning_rate": 6.58192162559107e-07, "loss": 0.8384, "step": 5507 }, { "epoch": 0.89, "grad_norm": 2.703017745343272, "learning_rate": 6.563311318182819e-07, "loss": 0.9305, "step": 5508 }, { "epoch": 0.89, "grad_norm": 2.015462114716661, "learning_rate": 6.544726465504159e-07, "loss": 0.8469, "step": 5509 }, { "epoch": 0.89, "grad_norm": 3.466719711949361, "learning_rate": 6.526167072618117e-07, "loss": 0.9612, "step": 5510 }, { "epoch": 0.89, "grad_norm": 3.050232900263455, "learning_rate": 6.507633144580783e-07, "loss": 0.9089, "step": 5511 }, { "epoch": 0.89, "grad_norm": 4.211659442113958, "learning_rate": 6.489124686441328e-07, "loss": 0.876, "step": 5512 }, { "epoch": 0.89, "grad_norm": 2.6494072233140624, "learning_rate": 6.470641703241942e-07, "loss": 0.8789, "step": 5513 }, { "epoch": 0.89, "grad_norm": 4.3024952147134155, "learning_rate": 6.452184200017897e-07, "loss": 0.8938, "step": 5514 }, { "epoch": 0.89, "grad_norm": 3.5864588539255373, "learning_rate": 6.433752181797548e-07, "loss": 0.8262, "step": 5515 }, { "epoch": 0.89, "grad_norm": 1.7775491784397557, "learning_rate": 6.415345653602278e-07, "loss": 0.947, "step": 5516 }, { "epoch": 0.89, "grad_norm": 2.658701046312298, "learning_rate": 6.396964620446522e-07, "loss": 0.8692, "step": 5517 }, { "epoch": 0.89, "grad_norm": 3.0432218120763985, "learning_rate": 6.378609087337773e-07, "loss": 0.8865, "step": 5518 }, { "epoch": 0.89, "grad_norm": 3.0872042989249646, "learning_rate": 6.360279059276619e-07, "loss": 0.8545, "step": 5519 }, { "epoch": 0.89, "grad_norm": 4.171501685880951, "learning_rate": 6.341974541256635e-07, "loss": 0.9243, "step": 5520 }, { "epoch": 0.89, "grad_norm": 4.512071101974398, "learning_rate": 6.323695538264474e-07, "loss": 0.8359, "step": 5521 }, { "epoch": 0.89, "grad_norm": 2.503258965154176, "learning_rate": 6.305442055279864e-07, "loss": 0.8966, "step": 5522 }, { "epoch": 0.89, "grad_norm": 1.934367385380907, "learning_rate": 6.287214097275551e-07, "loss": 0.3377, "step": 5523 }, { "epoch": 0.89, "grad_norm": 3.9211879903592104, "learning_rate": 6.269011669217306e-07, "loss": 0.8539, "step": 5524 }, { "epoch": 0.89, "grad_norm": 2.4102559617745025, "learning_rate": 6.250834776063996e-07, "loss": 0.9, "step": 5525 }, { "epoch": 0.89, "grad_norm": 3.6088606562890773, "learning_rate": 6.232683422767516e-07, "loss": 0.8938, "step": 5526 }, { "epoch": 0.89, "grad_norm": 2.6256561367528723, "learning_rate": 6.214557614272787e-07, "loss": 0.8717, "step": 5527 }, { "epoch": 0.89, "grad_norm": 3.171316764829427, "learning_rate": 6.196457355517749e-07, "loss": 0.9171, "step": 5528 }, { "epoch": 0.89, "grad_norm": 3.917271804054554, "learning_rate": 6.178382651433456e-07, "loss": 0.8721, "step": 5529 }, { "epoch": 0.89, "grad_norm": 4.079257851589058, "learning_rate": 6.160333506943939e-07, "loss": 0.8807, "step": 5530 }, { "epoch": 0.89, "grad_norm": 2.9778873568944078, "learning_rate": 6.142309926966273e-07, "loss": 0.8186, "step": 5531 }, { "epoch": 0.89, "grad_norm": 2.702415505499029, "learning_rate": 6.124311916410586e-07, "loss": 0.8257, "step": 5532 }, { "epoch": 0.89, "grad_norm": 1.983899998321141, "learning_rate": 6.106339480180023e-07, "loss": 0.3307, "step": 5533 }, { "epoch": 0.89, "grad_norm": 2.4339224163281856, "learning_rate": 6.088392623170802e-07, "loss": 0.3211, "step": 5534 }, { "epoch": 0.89, "grad_norm": 4.142603448325476, "learning_rate": 6.070471350272111e-07, "loss": 0.8919, "step": 5535 }, { "epoch": 0.89, "grad_norm": 2.047915710987115, "learning_rate": 6.052575666366189e-07, "loss": 0.85, "step": 5536 }, { "epoch": 0.89, "grad_norm": 2.089382798171489, "learning_rate": 6.034705576328348e-07, "loss": 0.8477, "step": 5537 }, { "epoch": 0.89, "grad_norm": 3.4055721063369018, "learning_rate": 6.016861085026881e-07, "loss": 0.9147, "step": 5538 }, { "epoch": 0.89, "grad_norm": 2.192956603818062, "learning_rate": 5.999042197323102e-07, "loss": 0.8408, "step": 5539 }, { "epoch": 0.89, "grad_norm": 3.227008040657819, "learning_rate": 5.98124891807137e-07, "loss": 0.8358, "step": 5540 }, { "epoch": 0.89, "grad_norm": 3.1846905463062534, "learning_rate": 5.963481252119096e-07, "loss": 0.9309, "step": 5541 }, { "epoch": 0.89, "grad_norm": 2.665210912635581, "learning_rate": 5.945739204306666e-07, "loss": 0.9204, "step": 5542 }, { "epoch": 0.89, "grad_norm": 2.6299374783993072, "learning_rate": 5.928022779467468e-07, "loss": 0.8738, "step": 5543 }, { "epoch": 0.89, "grad_norm": 4.496608939925526, "learning_rate": 5.910331982428008e-07, "loss": 0.8454, "step": 5544 }, { "epoch": 0.89, "grad_norm": 2.650396112311241, "learning_rate": 5.892666818007698e-07, "loss": 0.8676, "step": 5545 }, { "epoch": 0.89, "grad_norm": 2.6015141113180067, "learning_rate": 5.87502729101902e-07, "loss": 0.945, "step": 5546 }, { "epoch": 0.89, "grad_norm": 3.1915317543406716, "learning_rate": 5.857413406267476e-07, "loss": 0.8175, "step": 5547 }, { "epoch": 0.89, "grad_norm": 4.2164834044265715, "learning_rate": 5.839825168551594e-07, "loss": 0.8684, "step": 5548 }, { "epoch": 0.89, "grad_norm": 2.0445089861569006, "learning_rate": 5.822262582662874e-07, "loss": 0.3088, "step": 5549 }, { "epoch": 0.89, "grad_norm": 2.860501260379433, "learning_rate": 5.804725653385846e-07, "loss": 0.906, "step": 5550 }, { "epoch": 0.89, "grad_norm": 2.817489183027731, "learning_rate": 5.787214385498063e-07, "loss": 0.8189, "step": 5551 }, { "epoch": 0.89, "grad_norm": 1.638676530833672, "learning_rate": 5.769728783770068e-07, "loss": 0.8514, "step": 5552 }, { "epoch": 0.89, "grad_norm": 3.768296176795264, "learning_rate": 5.752268852965426e-07, "loss": 0.9052, "step": 5553 }, { "epoch": 0.89, "grad_norm": 2.7334639421132394, "learning_rate": 5.734834597840699e-07, "loss": 0.8679, "step": 5554 }, { "epoch": 0.9, "grad_norm": 3.3466586002946683, "learning_rate": 5.717426023145456e-07, "loss": 0.9431, "step": 5555 }, { "epoch": 0.9, "grad_norm": 2.759759145676444, "learning_rate": 5.70004313362229e-07, "loss": 0.7861, "step": 5556 }, { "epoch": 0.9, "grad_norm": 1.8708271003851538, "learning_rate": 5.682685934006771e-07, "loss": 0.3068, "step": 5557 }, { "epoch": 0.9, "grad_norm": 2.733961935178797, "learning_rate": 5.66535442902747e-07, "loss": 0.9125, "step": 5558 }, { "epoch": 0.9, "grad_norm": 2.1451591864785353, "learning_rate": 5.648048623405977e-07, "loss": 0.8055, "step": 5559 }, { "epoch": 0.9, "grad_norm": 3.4583773476124615, "learning_rate": 5.630768521856866e-07, "loss": 0.8369, "step": 5560 }, { "epoch": 0.9, "grad_norm": 4.046296892150737, "learning_rate": 5.613514129087693e-07, "loss": 0.8817, "step": 5561 }, { "epoch": 0.9, "grad_norm": 2.4902728627540474, "learning_rate": 5.596285449799055e-07, "loss": 0.87, "step": 5562 }, { "epoch": 0.9, "grad_norm": 3.1741659976407366, "learning_rate": 5.579082488684529e-07, "loss": 0.851, "step": 5563 }, { "epoch": 0.9, "grad_norm": 2.7760028848330762, "learning_rate": 5.561905250430665e-07, "loss": 0.8275, "step": 5564 }, { "epoch": 0.9, "grad_norm": 2.5977660191798018, "learning_rate": 5.54475373971699e-07, "loss": 0.877, "step": 5565 }, { "epoch": 0.9, "grad_norm": 2.746227364253648, "learning_rate": 5.527627961216087e-07, "loss": 0.8629, "step": 5566 }, { "epoch": 0.9, "grad_norm": 3.5042476764134305, "learning_rate": 5.510527919593478e-07, "loss": 0.8631, "step": 5567 }, { "epoch": 0.9, "grad_norm": 2.113752781411027, "learning_rate": 5.493453619507672e-07, "loss": 0.9488, "step": 5568 }, { "epoch": 0.9, "grad_norm": 4.9189264093777645, "learning_rate": 5.47640506561018e-07, "loss": 0.8307, "step": 5569 }, { "epoch": 0.9, "grad_norm": 4.296865678256934, "learning_rate": 5.45938226254552e-07, "loss": 0.7805, "step": 5570 }, { "epoch": 0.9, "grad_norm": 2.3109415318658306, "learning_rate": 5.442385214951151e-07, "loss": 0.9981, "step": 5571 }, { "epoch": 0.9, "grad_norm": 4.137517028957946, "learning_rate": 5.425413927457546e-07, "loss": 0.8674, "step": 5572 }, { "epoch": 0.9, "grad_norm": 2.9481441738013836, "learning_rate": 5.408468404688161e-07, "loss": 0.8046, "step": 5573 }, { "epoch": 0.9, "grad_norm": 3.0506335429091704, "learning_rate": 5.391548651259415e-07, "loss": 0.8785, "step": 5574 }, { "epoch": 0.9, "grad_norm": 4.130334352717026, "learning_rate": 5.374654671780688e-07, "loss": 0.8229, "step": 5575 }, { "epoch": 0.9, "grad_norm": 2.161868970233397, "learning_rate": 5.357786470854421e-07, "loss": 0.3158, "step": 5576 }, { "epoch": 0.9, "grad_norm": 3.9037324651054983, "learning_rate": 5.340944053075925e-07, "loss": 0.8205, "step": 5577 }, { "epoch": 0.9, "grad_norm": 3.1048978076771165, "learning_rate": 5.324127423033576e-07, "loss": 0.8673, "step": 5578 }, { "epoch": 0.9, "grad_norm": 3.3241193422544706, "learning_rate": 5.307336585308676e-07, "loss": 0.8045, "step": 5579 }, { "epoch": 0.9, "grad_norm": 2.1972291663641923, "learning_rate": 5.290571544475487e-07, "loss": 0.8905, "step": 5580 }, { "epoch": 0.9, "grad_norm": 2.160196645403831, "learning_rate": 5.273832305101312e-07, "loss": 0.8404, "step": 5581 }, { "epoch": 0.9, "grad_norm": 4.26594693153305, "learning_rate": 5.257118871746347e-07, "loss": 0.9244, "step": 5582 }, { "epoch": 0.9, "grad_norm": 4.29777134649494, "learning_rate": 5.240431248963807e-07, "loss": 0.8396, "step": 5583 }, { "epoch": 0.9, "grad_norm": 3.3960175415502625, "learning_rate": 5.223769441299842e-07, "loss": 0.8747, "step": 5584 }, { "epoch": 0.9, "grad_norm": 2.2630228818848312, "learning_rate": 5.207133453293633e-07, "loss": 0.9204, "step": 5585 }, { "epoch": 0.9, "grad_norm": 2.7038731063491195, "learning_rate": 5.190523289477234e-07, "loss": 0.898, "step": 5586 }, { "epoch": 0.9, "grad_norm": 3.5019572098937415, "learning_rate": 5.173938954375734e-07, "loss": 0.8119, "step": 5587 }, { "epoch": 0.9, "grad_norm": 3.9726483280249183, "learning_rate": 5.157380452507166e-07, "loss": 0.8132, "step": 5588 }, { "epoch": 0.9, "grad_norm": 3.633037338682367, "learning_rate": 5.140847788382508e-07, "loss": 0.7695, "step": 5589 }, { "epoch": 0.9, "grad_norm": 1.311099440492172, "learning_rate": 5.124340966505715e-07, "loss": 0.862, "step": 5590 }, { "epoch": 0.9, "grad_norm": 3.2714374413610994, "learning_rate": 5.107859991373698e-07, "loss": 0.8133, "step": 5591 }, { "epoch": 0.9, "grad_norm": 2.242402176100144, "learning_rate": 5.091404867476368e-07, "loss": 0.8638, "step": 5592 }, { "epoch": 0.9, "grad_norm": 2.414715626087388, "learning_rate": 5.074975599296494e-07, "loss": 0.9139, "step": 5593 }, { "epoch": 0.9, "grad_norm": 3.876629640550554, "learning_rate": 5.058572191309896e-07, "loss": 0.8157, "step": 5594 }, { "epoch": 0.9, "grad_norm": 2.725932138584877, "learning_rate": 5.042194647985311e-07, "loss": 0.8726, "step": 5595 }, { "epoch": 0.9, "grad_norm": 1.667046964490691, "learning_rate": 5.025842973784445e-07, "loss": 0.3558, "step": 5596 }, { "epoch": 0.9, "grad_norm": 2.8521448664207596, "learning_rate": 5.009517173161904e-07, "loss": 0.8496, "step": 5597 }, { "epoch": 0.9, "grad_norm": 3.3253595028754797, "learning_rate": 4.993217250565341e-07, "loss": 0.8589, "step": 5598 }, { "epoch": 0.9, "grad_norm": 2.6017547628022437, "learning_rate": 4.976943210435247e-07, "loss": 0.938, "step": 5599 }, { "epoch": 0.9, "grad_norm": 1.9067367182491994, "learning_rate": 4.960695057205178e-07, "loss": 0.8752, "step": 5600 }, { "epoch": 0.9, "grad_norm": 2.5580701918407707, "learning_rate": 4.944472795301535e-07, "loss": 0.8669, "step": 5601 }, { "epoch": 0.9, "grad_norm": 1.9436583935901677, "learning_rate": 4.928276429143719e-07, "loss": 0.8633, "step": 5602 }, { "epoch": 0.9, "grad_norm": 3.0949929562983067, "learning_rate": 4.912105963144076e-07, "loss": 0.8388, "step": 5603 }, { "epoch": 0.9, "grad_norm": 2.9755237599140405, "learning_rate": 4.895961401707882e-07, "loss": 0.9632, "step": 5604 }, { "epoch": 0.9, "grad_norm": 2.372424083978394, "learning_rate": 4.879842749233366e-07, "loss": 0.8831, "step": 5605 }, { "epoch": 0.9, "grad_norm": 3.5255886370983656, "learning_rate": 4.863750010111667e-07, "loss": 0.926, "step": 5606 }, { "epoch": 0.9, "grad_norm": 2.879932815774782, "learning_rate": 4.847683188726938e-07, "loss": 0.8947, "step": 5607 }, { "epoch": 0.9, "grad_norm": 4.153100218458561, "learning_rate": 4.831642289456184e-07, "loss": 0.8466, "step": 5608 }, { "epoch": 0.9, "grad_norm": 3.6713651262519216, "learning_rate": 4.815627316669403e-07, "loss": 0.9168, "step": 5609 }, { "epoch": 0.9, "grad_norm": 4.023814833641042, "learning_rate": 4.799638274729513e-07, "loss": 0.8244, "step": 5610 }, { "epoch": 0.9, "grad_norm": 2.9597199284304816, "learning_rate": 4.783675167992385e-07, "loss": 0.8838, "step": 5611 }, { "epoch": 0.9, "grad_norm": 2.5563150523519402, "learning_rate": 4.767738000806765e-07, "loss": 0.854, "step": 5612 }, { "epoch": 0.9, "grad_norm": 3.1337860192997065, "learning_rate": 4.7518267775144233e-07, "loss": 0.8929, "step": 5613 }, { "epoch": 0.9, "grad_norm": 4.296932927521462, "learning_rate": 4.7359415024500143e-07, "loss": 0.8682, "step": 5614 }, { "epoch": 0.9, "grad_norm": 3.2140422577468577, "learning_rate": 4.7200821799410767e-07, "loss": 0.8536, "step": 5615 }, { "epoch": 0.9, "grad_norm": 4.2200864582335065, "learning_rate": 4.7042488143081766e-07, "loss": 0.8642, "step": 5616 }, { "epoch": 0.91, "grad_norm": 3.535673628404041, "learning_rate": 4.6884414098647415e-07, "loss": 0.9278, "step": 5617 }, { "epoch": 0.91, "grad_norm": 2.538602253208229, "learning_rate": 4.6726599709171483e-07, "loss": 0.8719, "step": 5618 }, { "epoch": 0.91, "grad_norm": 2.394612197378979, "learning_rate": 4.6569045017646807e-07, "loss": 0.8076, "step": 5619 }, { "epoch": 0.91, "grad_norm": 3.5282636860163277, "learning_rate": 4.641175006699594e-07, "loss": 0.9381, "step": 5620 }, { "epoch": 0.91, "grad_norm": 2.8077079638857616, "learning_rate": 4.625471490007005e-07, "loss": 0.7749, "step": 5621 }, { "epoch": 0.91, "grad_norm": 3.664993916047213, "learning_rate": 4.609793955964992e-07, "loss": 0.8081, "step": 5622 }, { "epoch": 0.91, "grad_norm": 1.9037744676065866, "learning_rate": 4.5941424088445485e-07, "loss": 0.331, "step": 5623 }, { "epoch": 0.91, "grad_norm": 3.461071278761459, "learning_rate": 4.578516852909609e-07, "loss": 0.8928, "step": 5624 }, { "epoch": 0.91, "grad_norm": 2.8705519392041916, "learning_rate": 4.5629172924169793e-07, "loss": 0.8595, "step": 5625 }, { "epoch": 0.91, "grad_norm": 1.7367492096529724, "learning_rate": 4.547343731616405e-07, "loss": 0.3444, "step": 5626 }, { "epoch": 0.91, "grad_norm": 2.2818907791465617, "learning_rate": 4.5317961747505803e-07, "loss": 0.8328, "step": 5627 }, { "epoch": 0.91, "grad_norm": 2.9998070336906477, "learning_rate": 4.5162746260550614e-07, "loss": 0.889, "step": 5628 }, { "epoch": 0.91, "grad_norm": 3.7992458398234796, "learning_rate": 4.500779089758378e-07, "loss": 0.791, "step": 5629 }, { "epoch": 0.91, "grad_norm": 3.681195705253055, "learning_rate": 4.4853095700819196e-07, "loss": 0.8068, "step": 5630 }, { "epoch": 0.91, "grad_norm": 4.248172367040091, "learning_rate": 4.469866071239992e-07, "loss": 0.8908, "step": 5631 }, { "epoch": 0.91, "grad_norm": 2.156425579496595, "learning_rate": 4.4544485974398757e-07, "loss": 0.8416, "step": 5632 }, { "epoch": 0.91, "grad_norm": 2.6563358966178203, "learning_rate": 4.439057152881676e-07, "loss": 0.8882, "step": 5633 }, { "epoch": 0.91, "grad_norm": 2.2533649078767817, "learning_rate": 4.4236917417584513e-07, "loss": 0.9012, "step": 5634 }, { "epoch": 0.91, "grad_norm": 2.532452144792004, "learning_rate": 4.4083523682561747e-07, "loss": 0.7942, "step": 5635 }, { "epoch": 0.91, "grad_norm": 1.9762332928582884, "learning_rate": 4.393039036553748e-07, "loss": 0.3362, "step": 5636 }, { "epoch": 0.91, "grad_norm": 3.760208095919903, "learning_rate": 4.377751750822867e-07, "loss": 0.7991, "step": 5637 }, { "epoch": 0.91, "grad_norm": 2.9556576802799825, "learning_rate": 4.362490515228257e-07, "loss": 0.8533, "step": 5638 }, { "epoch": 0.91, "grad_norm": 3.1308774989240704, "learning_rate": 4.3472553339275136e-07, "loss": 0.9069, "step": 5639 }, { "epoch": 0.91, "grad_norm": 3.346257490649156, "learning_rate": 4.3320462110710946e-07, "loss": 0.7941, "step": 5640 }, { "epoch": 0.91, "grad_norm": 3.2607927846853073, "learning_rate": 4.316863150802375e-07, "loss": 0.8947, "step": 5641 }, { "epoch": 0.91, "grad_norm": 2.4997489803177455, "learning_rate": 4.301706157257657e-07, "loss": 0.8241, "step": 5642 }, { "epoch": 0.91, "grad_norm": 3.4965794060810036, "learning_rate": 4.286575234566148e-07, "loss": 0.8954, "step": 5643 }, { "epoch": 0.91, "grad_norm": 2.484024311001993, "learning_rate": 4.271470386849874e-07, "loss": 0.8174, "step": 5644 }, { "epoch": 0.91, "grad_norm": 1.7474089923785754, "learning_rate": 4.256391618223843e-07, "loss": 0.8222, "step": 5645 }, { "epoch": 0.91, "grad_norm": 2.9022758958567993, "learning_rate": 4.241338932795935e-07, "loss": 0.8712, "step": 5646 }, { "epoch": 0.91, "grad_norm": 3.35150046835806, "learning_rate": 4.226312334666904e-07, "loss": 0.8335, "step": 5647 }, { "epoch": 0.91, "grad_norm": 4.215646011605577, "learning_rate": 4.211311827930398e-07, "loss": 0.879, "step": 5648 }, { "epoch": 0.91, "grad_norm": 2.935859221937032, "learning_rate": 4.196337416672991e-07, "loss": 0.8405, "step": 5649 }, { "epoch": 0.91, "grad_norm": 2.7223677844966025, "learning_rate": 4.1813891049740986e-07, "loss": 0.8669, "step": 5650 }, { "epoch": 0.91, "grad_norm": 2.115426545294442, "learning_rate": 4.166466896906085e-07, "loss": 0.8845, "step": 5651 }, { "epoch": 0.91, "grad_norm": 2.5140905024247484, "learning_rate": 4.1515707965341547e-07, "loss": 0.9171, "step": 5652 }, { "epoch": 0.91, "grad_norm": 2.463421346187314, "learning_rate": 4.136700807916405e-07, "loss": 0.9197, "step": 5653 }, { "epoch": 0.91, "grad_norm": 1.5679779158905456, "learning_rate": 4.121856935103863e-07, "loss": 0.3335, "step": 5654 }, { "epoch": 0.91, "grad_norm": 2.440169608678096, "learning_rate": 4.107039182140382e-07, "loss": 0.8147, "step": 5655 }, { "epoch": 0.91, "grad_norm": 2.8225726003576153, "learning_rate": 4.0922475530627224e-07, "loss": 0.8143, "step": 5656 }, { "epoch": 0.91, "grad_norm": 2.958825477891213, "learning_rate": 4.0774820519005385e-07, "loss": 0.8795, "step": 5657 }, { "epoch": 0.91, "grad_norm": 1.741598671250544, "learning_rate": 4.0627426826763903e-07, "loss": 0.3122, "step": 5658 }, { "epoch": 0.91, "grad_norm": 2.4396007972654687, "learning_rate": 4.048029449405633e-07, "loss": 0.792, "step": 5659 }, { "epoch": 0.91, "grad_norm": 3.50223592455626, "learning_rate": 4.033342356096592e-07, "loss": 0.8868, "step": 5660 }, { "epoch": 0.91, "grad_norm": 3.06325724056518, "learning_rate": 4.0186814067504356e-07, "loss": 0.8604, "step": 5661 }, { "epoch": 0.91, "grad_norm": 4.7057925545315085, "learning_rate": 4.0040466053612006e-07, "loss": 0.8016, "step": 5662 }, { "epoch": 0.91, "grad_norm": 2.4066947426923866, "learning_rate": 3.9894379559158094e-07, "loss": 0.8725, "step": 5663 }, { "epoch": 0.91, "grad_norm": 3.597204124048398, "learning_rate": 3.974855462394067e-07, "loss": 0.8323, "step": 5664 }, { "epoch": 0.91, "grad_norm": 3.2840437848219137, "learning_rate": 3.960299128768663e-07, "loss": 0.9502, "step": 5665 }, { "epoch": 0.91, "grad_norm": 1.482688105905595, "learning_rate": 3.945768959005114e-07, "loss": 0.8111, "step": 5666 }, { "epoch": 0.91, "grad_norm": 2.215500937621569, "learning_rate": 3.931264957061842e-07, "loss": 0.7986, "step": 5667 }, { "epoch": 0.91, "grad_norm": 2.488276363350039, "learning_rate": 3.9167871268901536e-07, "loss": 0.8682, "step": 5668 }, { "epoch": 0.91, "grad_norm": 4.523763852378885, "learning_rate": 3.9023354724342157e-07, "loss": 0.873, "step": 5669 }, { "epoch": 0.91, "grad_norm": 1.931488864286921, "learning_rate": 3.887909997631012e-07, "loss": 0.8849, "step": 5670 }, { "epoch": 0.91, "grad_norm": 2.7263668831768313, "learning_rate": 3.8735107064104994e-07, "loss": 0.9057, "step": 5671 }, { "epoch": 0.91, "grad_norm": 2.508997556962686, "learning_rate": 3.8591376026954063e-07, "loss": 0.7922, "step": 5672 }, { "epoch": 0.91, "grad_norm": 2.3213854020684552, "learning_rate": 3.844790690401357e-07, "loss": 0.9097, "step": 5673 }, { "epoch": 0.91, "grad_norm": 2.009571299597629, "learning_rate": 3.830469973436868e-07, "loss": 0.8716, "step": 5674 }, { "epoch": 0.91, "grad_norm": 2.296272042988408, "learning_rate": 3.816175455703264e-07, "loss": 0.8561, "step": 5675 }, { "epoch": 0.91, "grad_norm": 3.288681334384409, "learning_rate": 3.8019071410948183e-07, "loss": 0.9054, "step": 5676 }, { "epoch": 0.91, "grad_norm": 2.300349403876144, "learning_rate": 3.7876650334985776e-07, "loss": 0.9137, "step": 5677 }, { "epoch": 0.91, "grad_norm": 3.3933454578035662, "learning_rate": 3.7734491367944716e-07, "loss": 0.8423, "step": 5678 }, { "epoch": 0.92, "grad_norm": 3.4229621727848407, "learning_rate": 3.7592594548553354e-07, "loss": 0.9046, "step": 5679 }, { "epoch": 0.92, "grad_norm": 4.380578762120103, "learning_rate": 3.745095991546821e-07, "loss": 0.7912, "step": 5680 }, { "epoch": 0.92, "grad_norm": 3.2823387156344532, "learning_rate": 3.7309587507274313e-07, "loss": 0.9096, "step": 5681 }, { "epoch": 0.92, "grad_norm": 2.8042440674875397, "learning_rate": 3.716847736248541e-07, "loss": 0.9245, "step": 5682 }, { "epoch": 0.92, "grad_norm": 3.043942806082542, "learning_rate": 3.7027629519544085e-07, "loss": 0.9007, "step": 5683 }, { "epoch": 0.92, "grad_norm": 2.532803094318112, "learning_rate": 3.688704401682086e-07, "loss": 0.7711, "step": 5684 }, { "epoch": 0.92, "grad_norm": 4.480289951205725, "learning_rate": 3.6746720892615216e-07, "loss": 0.8233, "step": 5685 }, { "epoch": 0.92, "grad_norm": 2.683124262402902, "learning_rate": 3.6606660185154906e-07, "loss": 0.83, "step": 5686 }, { "epoch": 0.92, "grad_norm": 2.722294218274735, "learning_rate": 3.646686193259674e-07, "loss": 0.9428, "step": 5687 }, { "epoch": 0.92, "grad_norm": 4.186286864415964, "learning_rate": 3.632732617302515e-07, "loss": 0.8391, "step": 5688 }, { "epoch": 0.92, "grad_norm": 2.9988168926657233, "learning_rate": 3.61880529444536e-07, "loss": 0.9316, "step": 5689 }, { "epoch": 0.92, "grad_norm": 2.6786676807366834, "learning_rate": 3.604904228482431e-07, "loss": 0.869, "step": 5690 }, { "epoch": 0.92, "grad_norm": 1.9287459203710322, "learning_rate": 3.5910294232007206e-07, "loss": 0.3103, "step": 5691 }, { "epoch": 0.92, "grad_norm": 3.773690477819782, "learning_rate": 3.5771808823801266e-07, "loss": 0.8591, "step": 5692 }, { "epoch": 0.92, "grad_norm": 2.4915877909173965, "learning_rate": 3.563358609793377e-07, "loss": 0.9167, "step": 5693 }, { "epoch": 0.92, "grad_norm": 1.10407031136672, "learning_rate": 3.5495626092060367e-07, "loss": 0.8617, "step": 5694 }, { "epoch": 0.92, "grad_norm": 2.552057341358925, "learning_rate": 3.535792884376499e-07, "loss": 0.8488, "step": 5695 }, { "epoch": 0.92, "grad_norm": 3.038539812106845, "learning_rate": 3.5220494390560414e-07, "loss": 0.8682, "step": 5696 }, { "epoch": 0.92, "grad_norm": 2.7740216566806755, "learning_rate": 3.508332276988735e-07, "loss": 0.8069, "step": 5697 }, { "epoch": 0.92, "grad_norm": 2.0149729065694615, "learning_rate": 3.494641401911536e-07, "loss": 0.9062, "step": 5698 }, { "epoch": 0.92, "grad_norm": 3.8933213783324474, "learning_rate": 3.4809768175542046e-07, "loss": 0.8126, "step": 5699 }, { "epoch": 0.92, "grad_norm": 3.118820642194923, "learning_rate": 3.467338527639341e-07, "loss": 0.8624, "step": 5700 }, { "epoch": 0.92, "grad_norm": 3.33083551916592, "learning_rate": 3.453726535882418e-07, "loss": 0.8093, "step": 5701 }, { "epoch": 0.92, "grad_norm": 2.614298851879501, "learning_rate": 3.44014084599168e-07, "loss": 0.8278, "step": 5702 }, { "epoch": 0.92, "grad_norm": 1.5869726551683092, "learning_rate": 3.4265814616682766e-07, "loss": 0.8755, "step": 5703 }, { "epoch": 0.92, "grad_norm": 2.818184512100722, "learning_rate": 3.4130483866061327e-07, "loss": 0.8846, "step": 5704 }, { "epoch": 0.92, "grad_norm": 2.878469612622481, "learning_rate": 3.3995416244920643e-07, "loss": 0.861, "step": 5705 }, { "epoch": 0.92, "grad_norm": 1.2506038161070236, "learning_rate": 3.386061179005651e-07, "loss": 0.8638, "step": 5706 }, { "epoch": 0.92, "grad_norm": 2.1352350917626675, "learning_rate": 3.372607053819355e-07, "loss": 0.8401, "step": 5707 }, { "epoch": 0.92, "grad_norm": 2.3466808050806223, "learning_rate": 3.3591792525984324e-07, "loss": 0.8145, "step": 5708 }, { "epoch": 0.92, "grad_norm": 2.4231754872137294, "learning_rate": 3.345777779001036e-07, "loss": 0.9222, "step": 5709 }, { "epoch": 0.92, "grad_norm": 2.3696247307524256, "learning_rate": 3.3324026366780224e-07, "loss": 0.873, "step": 5710 }, { "epoch": 0.92, "grad_norm": 2.7022975471034005, "learning_rate": 3.3190538292732e-07, "loss": 0.8681, "step": 5711 }, { "epoch": 0.92, "grad_norm": 3.1982602277640058, "learning_rate": 3.305731360423159e-07, "loss": 0.9278, "step": 5712 }, { "epoch": 0.92, "grad_norm": 3.7160527040092703, "learning_rate": 3.2924352337572743e-07, "loss": 0.8689, "step": 5713 }, { "epoch": 0.92, "grad_norm": 3.181789893482838, "learning_rate": 3.2791654528977924e-07, "loss": 0.7778, "step": 5714 }, { "epoch": 0.92, "grad_norm": 3.323444352458997, "learning_rate": 3.2659220214597666e-07, "loss": 0.8502, "step": 5715 }, { "epoch": 0.92, "grad_norm": 3.3875114806709137, "learning_rate": 3.2527049430510883e-07, "loss": 0.8391, "step": 5716 }, { "epoch": 0.92, "grad_norm": 3.196723309420474, "learning_rate": 3.239514221272411e-07, "loss": 0.8763, "step": 5717 }, { "epoch": 0.92, "grad_norm": 3.18257119074425, "learning_rate": 3.226349859717293e-07, "loss": 0.8446, "step": 5718 }, { "epoch": 0.92, "grad_norm": 2.4762407448839734, "learning_rate": 3.2132118619720545e-07, "loss": 0.954, "step": 5719 }, { "epoch": 0.92, "grad_norm": 2.4151067851528647, "learning_rate": 3.2001002316158434e-07, "loss": 0.8877, "step": 5720 }, { "epoch": 0.92, "grad_norm": 2.009579604486366, "learning_rate": 3.1870149722206366e-07, "loss": 0.8251, "step": 5721 }, { "epoch": 0.92, "grad_norm": 2.9244331136572232, "learning_rate": 3.1739560873512155e-07, "loss": 0.8588, "step": 5722 }, { "epoch": 0.92, "grad_norm": 3.330026862233733, "learning_rate": 3.1609235805651896e-07, "loss": 0.8336, "step": 5723 }, { "epoch": 0.92, "grad_norm": 1.9534762257441807, "learning_rate": 3.147917455412952e-07, "loss": 0.3175, "step": 5724 }, { "epoch": 0.92, "grad_norm": 3.192151285004359, "learning_rate": 3.134937715437758e-07, "loss": 0.8042, "step": 5725 }, { "epoch": 0.92, "grad_norm": 3.493475145368662, "learning_rate": 3.121984364175612e-07, "loss": 0.891, "step": 5726 }, { "epoch": 0.92, "grad_norm": 2.143673139701272, "learning_rate": 3.109057405155402e-07, "loss": 0.894, "step": 5727 }, { "epoch": 0.92, "grad_norm": 3.7666532133644233, "learning_rate": 3.0961568418987673e-07, "loss": 0.8692, "step": 5728 }, { "epoch": 0.92, "grad_norm": 2.89105287426601, "learning_rate": 3.0832826779201633e-07, "loss": 0.8276, "step": 5729 }, { "epoch": 0.92, "grad_norm": 4.334609919578373, "learning_rate": 3.070434916726905e-07, "loss": 0.8612, "step": 5730 }, { "epoch": 0.92, "grad_norm": 2.557655873102569, "learning_rate": 3.0576135618190393e-07, "loss": 0.8802, "step": 5731 }, { "epoch": 0.92, "grad_norm": 3.2364146118026573, "learning_rate": 3.044818616689471e-07, "loss": 0.904, "step": 5732 }, { "epoch": 0.92, "grad_norm": 1.8874307556587648, "learning_rate": 3.032050084823901e-07, "loss": 0.827, "step": 5733 }, { "epoch": 0.92, "grad_norm": 2.3610077317986566, "learning_rate": 3.019307969700824e-07, "loss": 0.8332, "step": 5734 }, { "epoch": 0.92, "grad_norm": 2.2676732768647887, "learning_rate": 3.006592274791553e-07, "loss": 0.832, "step": 5735 }, { "epoch": 0.92, "grad_norm": 1.3846818605384323, "learning_rate": 2.993903003560172e-07, "loss": 0.337, "step": 5736 }, { "epoch": 0.92, "grad_norm": 3.0753702142426493, "learning_rate": 2.981240159463616e-07, "loss": 0.8532, "step": 5737 }, { "epoch": 0.92, "grad_norm": 2.70211023235437, "learning_rate": 2.9686037459515707e-07, "loss": 0.8467, "step": 5738 }, { "epoch": 0.92, "grad_norm": 3.181302047333706, "learning_rate": 2.9559937664665474e-07, "loss": 0.8671, "step": 5739 }, { "epoch": 0.92, "grad_norm": 3.1207171273761936, "learning_rate": 2.9434102244438544e-07, "loss": 0.8148, "step": 5740 }, { "epoch": 0.92, "grad_norm": 2.448901385563702, "learning_rate": 2.9308531233115947e-07, "loss": 0.9057, "step": 5741 }, { "epoch": 0.93, "grad_norm": 2.794549838615621, "learning_rate": 2.918322466490686e-07, "loss": 0.8565, "step": 5742 }, { "epoch": 0.93, "grad_norm": 3.465312732493905, "learning_rate": 2.905818257394799e-07, "loss": 0.8601, "step": 5743 }, { "epoch": 0.93, "grad_norm": 2.576473099896633, "learning_rate": 2.8933404994304417e-07, "loss": 0.8871, "step": 5744 }, { "epoch": 0.93, "grad_norm": 2.8398214179293, "learning_rate": 2.8808891959968946e-07, "loss": 0.8178, "step": 5745 }, { "epoch": 0.93, "grad_norm": 3.677107957680068, "learning_rate": 2.868464350486222e-07, "loss": 0.8538, "step": 5746 }, { "epoch": 0.93, "grad_norm": 1.396022779030209, "learning_rate": 2.856065966283317e-07, "loss": 0.858, "step": 5747 }, { "epoch": 0.93, "grad_norm": 3.408950295293801, "learning_rate": 2.8436940467658213e-07, "loss": 0.8831, "step": 5748 }, { "epoch": 0.93, "grad_norm": 1.880528310950607, "learning_rate": 2.831348595304206e-07, "loss": 0.8973, "step": 5749 }, { "epoch": 0.93, "grad_norm": 1.8290532029862467, "learning_rate": 2.8190296152617035e-07, "loss": 0.8989, "step": 5750 }, { "epoch": 0.93, "grad_norm": 3.135822020696232, "learning_rate": 2.8067371099943286e-07, "loss": 0.8785, "step": 5751 }, { "epoch": 0.93, "grad_norm": 3.6680393395014685, "learning_rate": 2.794471082850936e-07, "loss": 0.8831, "step": 5752 }, { "epoch": 0.93, "grad_norm": 3.1650856153530267, "learning_rate": 2.7822315371730965e-07, "loss": 0.9393, "step": 5753 }, { "epoch": 0.93, "grad_norm": 2.769159590690757, "learning_rate": 2.7700184762952e-07, "loss": 0.9, "step": 5754 }, { "epoch": 0.93, "grad_norm": 2.604291338161581, "learning_rate": 2.7578319035444277e-07, "loss": 0.8876, "step": 5755 }, { "epoch": 0.93, "grad_norm": 2.4925313969416694, "learning_rate": 2.7456718222407584e-07, "loss": 0.8256, "step": 5756 }, { "epoch": 0.93, "grad_norm": 3.2026063080414113, "learning_rate": 2.7335382356969196e-07, "loss": 0.8336, "step": 5757 }, { "epoch": 0.93, "grad_norm": 3.7391091190401697, "learning_rate": 2.721431147218412e-07, "loss": 0.8594, "step": 5758 }, { "epoch": 0.93, "grad_norm": 3.879352463241464, "learning_rate": 2.709350560103574e-07, "loss": 0.8071, "step": 5759 }, { "epoch": 0.93, "grad_norm": 1.11435323936795, "learning_rate": 2.697296477643474e-07, "loss": 0.8699, "step": 5760 }, { "epoch": 0.93, "grad_norm": 1.8727604524715136, "learning_rate": 2.6852689031219626e-07, "loss": 0.3373, "step": 5761 }, { "epoch": 0.93, "grad_norm": 2.0380676882867688, "learning_rate": 2.6732678398157077e-07, "loss": 0.296, "step": 5762 }, { "epoch": 0.93, "grad_norm": 2.4725746756019773, "learning_rate": 2.6612932909941267e-07, "loss": 0.9017, "step": 5763 }, { "epoch": 0.93, "grad_norm": 3.549189547587188, "learning_rate": 2.6493452599194115e-07, "loss": 0.9099, "step": 5764 }, { "epoch": 0.93, "grad_norm": 2.4718411565072156, "learning_rate": 2.637423749846524e-07, "loss": 0.9073, "step": 5765 }, { "epoch": 0.93, "grad_norm": 3.4710884803390254, "learning_rate": 2.625528764023222e-07, "loss": 0.897, "step": 5766 }, { "epoch": 0.93, "grad_norm": 2.7194823232456047, "learning_rate": 2.6136603056900356e-07, "loss": 0.8968, "step": 5767 }, { "epoch": 0.93, "grad_norm": 3.619927375865843, "learning_rate": 2.601818378080245e-07, "loss": 0.768, "step": 5768 }, { "epoch": 0.93, "grad_norm": 1.741669855921014, "learning_rate": 2.590002984419937e-07, "loss": 0.9547, "step": 5769 }, { "epoch": 0.93, "grad_norm": 3.389090898534631, "learning_rate": 2.578214127927925e-07, "loss": 0.8369, "step": 5770 }, { "epoch": 0.93, "grad_norm": 2.240299400177419, "learning_rate": 2.56645181181584e-07, "loss": 0.8314, "step": 5771 }, { "epoch": 0.93, "grad_norm": 3.4398960432712475, "learning_rate": 2.5547160392880523e-07, "loss": 0.8618, "step": 5772 }, { "epoch": 0.93, "grad_norm": 2.6836850790727467, "learning_rate": 2.543006813541704e-07, "loss": 0.8575, "step": 5773 }, { "epoch": 0.93, "grad_norm": 3.8565751394644767, "learning_rate": 2.531324137766722e-07, "loss": 0.8274, "step": 5774 }, { "epoch": 0.93, "grad_norm": 2.022509387811439, "learning_rate": 2.5196680151457933e-07, "loss": 0.3251, "step": 5775 }, { "epoch": 0.93, "grad_norm": 3.547169274041431, "learning_rate": 2.508038448854344e-07, "loss": 0.8001, "step": 5776 }, { "epoch": 0.93, "grad_norm": 3.586064355139782, "learning_rate": 2.496435442060607e-07, "loss": 0.9081, "step": 5777 }, { "epoch": 0.93, "grad_norm": 4.34492597510314, "learning_rate": 2.484858997925566e-07, "loss": 0.9395, "step": 5778 }, { "epoch": 0.93, "grad_norm": 3.232610248712419, "learning_rate": 2.47330911960294e-07, "loss": 0.8947, "step": 5779 }, { "epoch": 0.93, "grad_norm": 3.335624161953005, "learning_rate": 2.461785810239259e-07, "loss": 0.7911, "step": 5780 }, { "epoch": 0.93, "grad_norm": 3.5472586672138844, "learning_rate": 2.4502890729737773e-07, "loss": 0.921, "step": 5781 }, { "epoch": 0.93, "grad_norm": 2.52074967132539, "learning_rate": 2.4388189109385227e-07, "loss": 0.2988, "step": 5782 }, { "epoch": 0.93, "grad_norm": 4.1148566175396875, "learning_rate": 2.427375327258286e-07, "loss": 0.852, "step": 5783 }, { "epoch": 0.93, "grad_norm": 3.038183560790898, "learning_rate": 2.4159583250506157e-07, "loss": 0.8646, "step": 5784 }, { "epoch": 0.93, "grad_norm": 2.856354832968714, "learning_rate": 2.4045679074258253e-07, "loss": 0.83, "step": 5785 }, { "epoch": 0.93, "grad_norm": 2.308417660473149, "learning_rate": 2.393204077486966e-07, "loss": 0.9185, "step": 5786 }, { "epoch": 0.93, "grad_norm": 3.368685572756553, "learning_rate": 2.3818668383298605e-07, "loss": 0.8404, "step": 5787 }, { "epoch": 0.93, "grad_norm": 2.298838496286078, "learning_rate": 2.3705561930430942e-07, "loss": 0.8181, "step": 5788 }, { "epoch": 0.93, "grad_norm": 3.9334971350706343, "learning_rate": 2.3592721447079912e-07, "loss": 0.8653, "step": 5789 }, { "epoch": 0.93, "grad_norm": 3.6133158254257904, "learning_rate": 2.348014696398626e-07, "loss": 0.8051, "step": 5790 }, { "epoch": 0.93, "grad_norm": 1.853727145732566, "learning_rate": 2.3367838511818675e-07, "loss": 0.8767, "step": 5791 }, { "epoch": 0.93, "grad_norm": 2.8394054720043496, "learning_rate": 2.325579612117279e-07, "loss": 0.9009, "step": 5792 }, { "epoch": 0.93, "grad_norm": 3.713632990002652, "learning_rate": 2.3144019822572194e-07, "loss": 0.8663, "step": 5793 }, { "epoch": 0.93, "grad_norm": 2.366112594040745, "learning_rate": 2.303250964646786e-07, "loss": 0.958, "step": 5794 }, { "epoch": 0.93, "grad_norm": 3.1457564152962387, "learning_rate": 2.2921265623238042e-07, "loss": 0.9408, "step": 5795 }, { "epoch": 0.93, "grad_norm": 2.550134083083735, "learning_rate": 2.2810287783188833e-07, "loss": 0.9487, "step": 5796 }, { "epoch": 0.93, "grad_norm": 2.240738988980162, "learning_rate": 2.2699576156553715e-07, "loss": 0.8429, "step": 5797 }, { "epoch": 0.93, "grad_norm": 4.413975213462191, "learning_rate": 2.258913077349334e-07, "loss": 0.8553, "step": 5798 }, { "epoch": 0.93, "grad_norm": 3.2307810695836507, "learning_rate": 2.2478951664096305e-07, "loss": 0.9333, "step": 5799 }, { "epoch": 0.93, "grad_norm": 2.9918421453867845, "learning_rate": 2.236903885837849e-07, "loss": 0.8856, "step": 5800 }, { "epoch": 0.93, "grad_norm": 2.488257104106747, "learning_rate": 2.2259392386282829e-07, "loss": 0.8597, "step": 5801 }, { "epoch": 0.93, "grad_norm": 2.7776277077721194, "learning_rate": 2.215001227768032e-07, "loss": 0.9151, "step": 5802 }, { "epoch": 0.93, "grad_norm": 2.359362267466161, "learning_rate": 2.204089856236913e-07, "loss": 0.9048, "step": 5803 }, { "epoch": 0.94, "grad_norm": 2.9628347795186114, "learning_rate": 2.1932051270074807e-07, "loss": 0.8621, "step": 5804 }, { "epoch": 0.94, "grad_norm": 1.6927582796993197, "learning_rate": 2.182347043045019e-07, "loss": 0.8043, "step": 5805 }, { "epoch": 0.94, "grad_norm": 2.204324057812137, "learning_rate": 2.1715156073075838e-07, "loss": 0.8786, "step": 5806 }, { "epoch": 0.94, "grad_norm": 2.8834671062976573, "learning_rate": 2.1607108227459594e-07, "loss": 0.8682, "step": 5807 }, { "epoch": 0.94, "grad_norm": 2.0325899839482333, "learning_rate": 2.1499326923036688e-07, "loss": 0.7673, "step": 5808 }, { "epoch": 0.94, "grad_norm": 3.0907357684922223, "learning_rate": 2.1391812189169526e-07, "loss": 0.8354, "step": 5809 }, { "epoch": 0.94, "grad_norm": 2.1333780621766874, "learning_rate": 2.1284564055148337e-07, "loss": 0.9401, "step": 5810 }, { "epoch": 0.94, "grad_norm": 2.822194662818947, "learning_rate": 2.1177582550190313e-07, "loss": 0.9161, "step": 5811 }, { "epoch": 0.94, "grad_norm": 2.3023568475646496, "learning_rate": 2.1070867703440135e-07, "loss": 0.8946, "step": 5812 }, { "epoch": 0.94, "grad_norm": 1.8074522432637266, "learning_rate": 2.0964419543970104e-07, "loss": 0.891, "step": 5813 }, { "epoch": 0.94, "grad_norm": 3.179881846794046, "learning_rate": 2.0858238100779248e-07, "loss": 0.8544, "step": 5814 }, { "epoch": 0.94, "grad_norm": 2.7305793794369, "learning_rate": 2.075232340279465e-07, "loss": 0.9096, "step": 5815 }, { "epoch": 0.94, "grad_norm": 2.057562379690973, "learning_rate": 2.0646675478870337e-07, "loss": 0.8659, "step": 5816 }, { "epoch": 0.94, "grad_norm": 4.246651845294314, "learning_rate": 2.0541294357787512e-07, "loss": 0.8146, "step": 5817 }, { "epoch": 0.94, "grad_norm": 1.9380013832347174, "learning_rate": 2.0436180068255207e-07, "loss": 0.8115, "step": 5818 }, { "epoch": 0.94, "grad_norm": 2.7123458677879464, "learning_rate": 2.0331332638909184e-07, "loss": 0.3092, "step": 5819 }, { "epoch": 0.94, "grad_norm": 2.0958155222364088, "learning_rate": 2.022675209831282e-07, "loss": 0.7702, "step": 5820 }, { "epoch": 0.94, "grad_norm": 3.1790872893083626, "learning_rate": 2.0122438474956764e-07, "loss": 0.8425, "step": 5821 }, { "epoch": 0.94, "grad_norm": 2.2008842815186775, "learning_rate": 2.0018391797259063e-07, "loss": 0.9458, "step": 5822 }, { "epoch": 0.94, "grad_norm": 2.5302123780504178, "learning_rate": 1.9914612093564822e-07, "loss": 0.8138, "step": 5823 }, { "epoch": 0.94, "grad_norm": 2.9460895029470846, "learning_rate": 1.9811099392146427e-07, "loss": 0.8777, "step": 5824 }, { "epoch": 0.94, "grad_norm": 4.081728457669427, "learning_rate": 1.970785372120354e-07, "loss": 0.9129, "step": 5825 }, { "epoch": 0.94, "grad_norm": 2.892739012659339, "learning_rate": 1.960487510886333e-07, "loss": 0.8812, "step": 5826 }, { "epoch": 0.94, "grad_norm": 2.6155866139283055, "learning_rate": 1.95021635831798e-07, "loss": 0.8833, "step": 5827 }, { "epoch": 0.94, "grad_norm": 2.639840274660612, "learning_rate": 1.9399719172134458e-07, "loss": 0.8667, "step": 5828 }, { "epoch": 0.94, "grad_norm": 2.2594508790165633, "learning_rate": 1.9297541903636196e-07, "loss": 0.9117, "step": 5829 }, { "epoch": 0.94, "grad_norm": 2.245157859536719, "learning_rate": 1.9195631805520642e-07, "loss": 0.8508, "step": 5830 }, { "epoch": 0.94, "grad_norm": 3.1664931768442934, "learning_rate": 1.909398890555092e-07, "loss": 0.8483, "step": 5831 }, { "epoch": 0.94, "grad_norm": 3.111412415976069, "learning_rate": 1.8992613231417546e-07, "loss": 0.8468, "step": 5832 }, { "epoch": 0.94, "grad_norm": 2.521610886216749, "learning_rate": 1.889150481073798e-07, "loss": 0.8203, "step": 5833 }, { "epoch": 0.94, "grad_norm": 2.047997436275964, "learning_rate": 1.8790663671056863e-07, "loss": 0.8682, "step": 5834 }, { "epoch": 0.94, "grad_norm": 3.6337059973121577, "learning_rate": 1.8690089839846215e-07, "loss": 0.8734, "step": 5835 }, { "epoch": 0.94, "grad_norm": 2.340942939789664, "learning_rate": 1.8589783344504897e-07, "loss": 0.8489, "step": 5836 }, { "epoch": 0.94, "grad_norm": 2.9940286653990067, "learning_rate": 1.8489744212359495e-07, "loss": 0.8419, "step": 5837 }, { "epoch": 0.94, "grad_norm": 1.8585199265252068, "learning_rate": 1.8389972470663208e-07, "loss": 0.8898, "step": 5838 }, { "epoch": 0.94, "grad_norm": 2.730388678384909, "learning_rate": 1.829046814659663e-07, "loss": 0.8833, "step": 5839 }, { "epoch": 0.94, "grad_norm": 3.7312228254545934, "learning_rate": 1.8191231267267629e-07, "loss": 0.8096, "step": 5840 }, { "epoch": 0.94, "grad_norm": 3.664183266933011, "learning_rate": 1.8092261859710802e-07, "loss": 0.9295, "step": 5841 }, { "epoch": 0.94, "grad_norm": 3.101680724535581, "learning_rate": 1.799355995088836e-07, "loss": 0.9312, "step": 5842 }, { "epoch": 0.94, "grad_norm": 3.5679873317955484, "learning_rate": 1.7895125567689354e-07, "loss": 0.8165, "step": 5843 }, { "epoch": 0.94, "grad_norm": 3.0994496872263406, "learning_rate": 1.7796958736929992e-07, "loss": 0.8891, "step": 5844 }, { "epoch": 0.94, "grad_norm": 3.0026652894273833, "learning_rate": 1.7699059485353775e-07, "loss": 0.8856, "step": 5845 }, { "epoch": 0.94, "grad_norm": 2.5825483811343513, "learning_rate": 1.7601427839630814e-07, "loss": 0.8701, "step": 5846 }, { "epoch": 0.94, "grad_norm": 2.5755433074207192, "learning_rate": 1.7504063826359053e-07, "loss": 0.832, "step": 5847 }, { "epoch": 0.94, "grad_norm": 3.678293535103377, "learning_rate": 1.740696747206294e-07, "loss": 0.845, "step": 5848 }, { "epoch": 0.94, "grad_norm": 2.127518563822746, "learning_rate": 1.731013880319421e-07, "loss": 0.9054, "step": 5849 }, { "epoch": 0.94, "grad_norm": 2.6872140599213736, "learning_rate": 1.7213577846131647e-07, "loss": 0.8325, "step": 5850 }, { "epoch": 0.94, "grad_norm": 2.5832485267090792, "learning_rate": 1.7117284627181207e-07, "loss": 0.8529, "step": 5851 }, { "epoch": 0.94, "grad_norm": 3.4265032956652877, "learning_rate": 1.7021259172575688e-07, "loss": 0.8635, "step": 5852 }, { "epoch": 0.94, "grad_norm": 2.242878132965752, "learning_rate": 1.6925501508475162e-07, "loss": 0.8839, "step": 5853 }, { "epoch": 0.94, "grad_norm": 3.440237169296015, "learning_rate": 1.6830011660966648e-07, "loss": 0.7929, "step": 5854 }, { "epoch": 0.94, "grad_norm": 2.145886491530643, "learning_rate": 1.673478965606423e-07, "loss": 0.8998, "step": 5855 }, { "epoch": 0.94, "grad_norm": 4.334642921533693, "learning_rate": 1.6639835519708826e-07, "loss": 0.8229, "step": 5856 }, { "epoch": 0.94, "grad_norm": 2.6578306095463926, "learning_rate": 1.6545149277768845e-07, "loss": 0.93, "step": 5857 }, { "epoch": 0.94, "grad_norm": 2.5666275277394464, "learning_rate": 1.6450730956039328e-07, "loss": 0.8672, "step": 5858 }, { "epoch": 0.94, "grad_norm": 3.442407607401838, "learning_rate": 1.6356580580242253e-07, "loss": 0.8603, "step": 5859 }, { "epoch": 0.94, "grad_norm": 1.9286284841948196, "learning_rate": 1.626269817602699e-07, "loss": 0.9206, "step": 5860 }, { "epoch": 0.94, "grad_norm": 3.156222503844495, "learning_rate": 1.616908376896964e-07, "loss": 0.9415, "step": 5861 }, { "epoch": 0.94, "grad_norm": 2.4091272367745025, "learning_rate": 1.6075737384573354e-07, "loss": 0.8059, "step": 5862 }, { "epoch": 0.94, "grad_norm": 2.7480638797670394, "learning_rate": 1.5982659048268124e-07, "loss": 0.8931, "step": 5863 }, { "epoch": 0.94, "grad_norm": 2.596774991493355, "learning_rate": 1.588984878541133e-07, "loss": 0.9221, "step": 5864 }, { "epoch": 0.94, "grad_norm": 3.1160539368159204, "learning_rate": 1.5797306621286757e-07, "loss": 0.8807, "step": 5865 }, { "epoch": 0.95, "grad_norm": 4.027966011881318, "learning_rate": 1.5705032581105563e-07, "loss": 0.8882, "step": 5866 }, { "epoch": 0.95, "grad_norm": 3.406525347018626, "learning_rate": 1.561302669000586e-07, "loss": 0.8075, "step": 5867 }, { "epoch": 0.95, "grad_norm": 4.608250988482676, "learning_rate": 1.5521288973052274e-07, "loss": 0.8591, "step": 5868 }, { "epoch": 0.95, "grad_norm": 3.9902579166354615, "learning_rate": 1.5429819455237137e-07, "loss": 0.8343, "step": 5869 }, { "epoch": 0.95, "grad_norm": 2.6086649585383253, "learning_rate": 1.5338618161478857e-07, "loss": 0.8458, "step": 5870 }, { "epoch": 0.95, "grad_norm": 2.353167521585226, "learning_rate": 1.5247685116623335e-07, "loss": 0.8102, "step": 5871 }, { "epoch": 0.95, "grad_norm": 2.0145215703207544, "learning_rate": 1.5157020345443195e-07, "loss": 0.8628, "step": 5872 }, { "epoch": 0.95, "grad_norm": 2.825422565960535, "learning_rate": 1.5066623872638242e-07, "loss": 0.8306, "step": 5873 }, { "epoch": 0.95, "grad_norm": 3.279182000388939, "learning_rate": 1.497649572283466e-07, "loss": 0.8812, "step": 5874 }, { "epoch": 0.95, "grad_norm": 2.263108743789892, "learning_rate": 1.4886635920586036e-07, "loss": 0.8379, "step": 5875 }, { "epoch": 0.95, "grad_norm": 3.112063678354729, "learning_rate": 1.479704449037256e-07, "loss": 0.8553, "step": 5876 }, { "epoch": 0.95, "grad_norm": 2.590280533403619, "learning_rate": 1.4707721456601486e-07, "loss": 0.8282, "step": 5877 }, { "epoch": 0.95, "grad_norm": 2.9692897908216103, "learning_rate": 1.46186668436068e-07, "loss": 0.8712, "step": 5878 }, { "epoch": 0.95, "grad_norm": 2.5941696746674756, "learning_rate": 1.4529880675649534e-07, "loss": 0.8975, "step": 5879 }, { "epoch": 0.95, "grad_norm": 2.5814544448985393, "learning_rate": 1.444136297691734e-07, "loss": 0.8203, "step": 5880 }, { "epoch": 0.95, "grad_norm": 2.4440025257653204, "learning_rate": 1.435311377152493e-07, "loss": 0.9159, "step": 5881 }, { "epoch": 0.95, "grad_norm": 1.8332805481449879, "learning_rate": 1.426513308351385e-07, "loss": 0.9288, "step": 5882 }, { "epoch": 0.95, "grad_norm": 2.2417304340732613, "learning_rate": 1.4177420936852482e-07, "loss": 0.9299, "step": 5883 }, { "epoch": 0.95, "grad_norm": 1.9342801195580124, "learning_rate": 1.4089977355436045e-07, "loss": 0.8133, "step": 5884 }, { "epoch": 0.95, "grad_norm": 1.9242134444194199, "learning_rate": 1.4002802363086486e-07, "loss": 0.9398, "step": 5885 }, { "epoch": 0.95, "grad_norm": 2.6296285059403637, "learning_rate": 1.3915895983552806e-07, "loss": 0.866, "step": 5886 }, { "epoch": 0.95, "grad_norm": 3.585046063157819, "learning_rate": 1.3829258240510624e-07, "loss": 0.8822, "step": 5887 }, { "epoch": 0.95, "grad_norm": 2.690301809716163, "learning_rate": 1.374288915756228e-07, "loss": 0.9436, "step": 5888 }, { "epoch": 0.95, "grad_norm": 3.016143754209525, "learning_rate": 1.3656788758237504e-07, "loss": 0.8654, "step": 5889 }, { "epoch": 0.95, "grad_norm": 3.8258445954176095, "learning_rate": 1.3570957065991987e-07, "loss": 0.8702, "step": 5890 }, { "epoch": 0.95, "grad_norm": 2.918741596217994, "learning_rate": 1.3485394104209015e-07, "loss": 0.9184, "step": 5891 }, { "epoch": 0.95, "grad_norm": 3.01041480111633, "learning_rate": 1.340009989619806e-07, "loss": 0.9021, "step": 5892 }, { "epoch": 0.95, "grad_norm": 3.7938324468411344, "learning_rate": 1.3315074465195533e-07, "loss": 0.8874, "step": 5893 }, { "epoch": 0.95, "grad_norm": 2.9456979514545636, "learning_rate": 1.3230317834365013e-07, "loss": 0.8624, "step": 5894 }, { "epoch": 0.95, "grad_norm": 3.7852499746523307, "learning_rate": 1.3145830026796368e-07, "loss": 0.877, "step": 5895 }, { "epoch": 0.95, "grad_norm": 2.9633605222309347, "learning_rate": 1.3061611065506409e-07, "loss": 0.8848, "step": 5896 }, { "epoch": 0.95, "grad_norm": 1.987346856796916, "learning_rate": 1.2977660973438667e-07, "loss": 0.9123, "step": 5897 }, { "epoch": 0.95, "grad_norm": 3.0106064539728745, "learning_rate": 1.2893979773463516e-07, "loss": 0.9147, "step": 5898 }, { "epoch": 0.95, "grad_norm": 3.000841817366254, "learning_rate": 1.2810567488378055e-07, "loss": 0.8953, "step": 5899 }, { "epoch": 0.95, "grad_norm": 3.1187270719797704, "learning_rate": 1.2727424140905998e-07, "loss": 0.8492, "step": 5900 }, { "epoch": 0.95, "grad_norm": 4.503964479174218, "learning_rate": 1.264454975369789e-07, "loss": 0.8456, "step": 5901 }, { "epoch": 0.95, "grad_norm": 2.9171182192077825, "learning_rate": 1.2561944349331223e-07, "loss": 0.8725, "step": 5902 }, { "epoch": 0.95, "grad_norm": 3.1496217833630222, "learning_rate": 1.247960795030967e-07, "loss": 0.8418, "step": 5903 }, { "epoch": 0.95, "grad_norm": 2.647062231510437, "learning_rate": 1.239754057906406e-07, "loss": 0.8602, "step": 5904 }, { "epoch": 0.95, "grad_norm": 3.8590950188643998, "learning_rate": 1.2315742257951847e-07, "loss": 0.8057, "step": 5905 }, { "epoch": 0.95, "grad_norm": 2.085562454396939, "learning_rate": 1.22342130092572e-07, "loss": 0.8709, "step": 5906 }, { "epoch": 0.95, "grad_norm": 2.668625847030263, "learning_rate": 1.215295285519069e-07, "loss": 0.9186, "step": 5907 }, { "epoch": 0.95, "grad_norm": 2.9362122980119443, "learning_rate": 1.2071961817890053e-07, "loss": 0.8752, "step": 5908 }, { "epoch": 0.95, "grad_norm": 3.0086180561899556, "learning_rate": 1.1991239919419529e-07, "loss": 0.8987, "step": 5909 }, { "epoch": 0.95, "grad_norm": 3.220457346405869, "learning_rate": 1.1910787181769745e-07, "loss": 0.8361, "step": 5910 }, { "epoch": 0.95, "grad_norm": 3.5685488562342895, "learning_rate": 1.1830603626858394e-07, "loss": 0.8614, "step": 5911 }, { "epoch": 0.95, "grad_norm": 2.3333383855310874, "learning_rate": 1.175068927652967e-07, "loss": 0.8854, "step": 5912 }, { "epoch": 0.95, "grad_norm": 2.8539082950863532, "learning_rate": 1.1671044152554378e-07, "loss": 0.8495, "step": 5913 }, { "epoch": 0.95, "grad_norm": 1.5274587914734912, "learning_rate": 1.1591668276630274e-07, "loss": 0.8796, "step": 5914 }, { "epoch": 0.95, "grad_norm": 2.8637881383266994, "learning_rate": 1.1512561670381172e-07, "loss": 0.9139, "step": 5915 }, { "epoch": 0.95, "grad_norm": 2.8908145326904924, "learning_rate": 1.1433724355358167e-07, "loss": 0.8971, "step": 5916 }, { "epoch": 0.95, "grad_norm": 2.8635272119480364, "learning_rate": 1.1355156353038743e-07, "loss": 0.3042, "step": 5917 }, { "epoch": 0.95, "grad_norm": 2.7698418318007545, "learning_rate": 1.127685768482667e-07, "loss": 0.8253, "step": 5918 }, { "epoch": 0.95, "grad_norm": 2.157999186136483, "learning_rate": 1.1198828372052994e-07, "loss": 0.851, "step": 5919 }, { "epoch": 0.95, "grad_norm": 3.168605829755085, "learning_rate": 1.1121068435974935e-07, "loss": 0.931, "step": 5920 }, { "epoch": 0.95, "grad_norm": 2.4237318242666896, "learning_rate": 1.1043577897776547e-07, "loss": 0.8845, "step": 5921 }, { "epoch": 0.95, "grad_norm": 3.4854878243712646, "learning_rate": 1.0966356778568055e-07, "loss": 0.8433, "step": 5922 }, { "epoch": 0.95, "grad_norm": 2.9285775892345747, "learning_rate": 1.0889405099386962e-07, "loss": 0.9294, "step": 5923 }, { "epoch": 0.95, "grad_norm": 2.2409909341910477, "learning_rate": 1.0812722881197058e-07, "loss": 0.8795, "step": 5924 }, { "epoch": 0.95, "grad_norm": 2.508390270951434, "learning_rate": 1.0736310144888296e-07, "loss": 0.8486, "step": 5925 }, { "epoch": 0.95, "grad_norm": 3.295701817051862, "learning_rate": 1.0660166911277914e-07, "loss": 0.8483, "step": 5926 }, { "epoch": 0.95, "grad_norm": 2.5622914857463948, "learning_rate": 1.0584293201109541e-07, "loss": 0.8836, "step": 5927 }, { "epoch": 0.96, "grad_norm": 3.1783984521423765, "learning_rate": 1.0508689035052977e-07, "loss": 0.9085, "step": 5928 }, { "epoch": 0.96, "grad_norm": 3.8959080626040423, "learning_rate": 1.0433354433705078e-07, "loss": 0.8601, "step": 5929 }, { "epoch": 0.96, "grad_norm": 3.3716722259500793, "learning_rate": 1.0358289417588874e-07, "loss": 0.7818, "step": 5930 }, { "epoch": 0.96, "grad_norm": 2.6033439150345674, "learning_rate": 1.0283494007154448e-07, "loss": 0.8024, "step": 5931 }, { "epoch": 0.96, "grad_norm": 3.2784480165773413, "learning_rate": 1.0208968222777838e-07, "loss": 0.8742, "step": 5932 }, { "epoch": 0.96, "grad_norm": 3.8003765020069835, "learning_rate": 1.0134712084762022e-07, "loss": 0.8675, "step": 5933 }, { "epoch": 0.96, "grad_norm": 3.519018273434318, "learning_rate": 1.0060725613336375e-07, "loss": 0.7861, "step": 5934 }, { "epoch": 0.96, "grad_norm": 3.689393414104558, "learning_rate": 9.987008828656997e-08, "loss": 0.9215, "step": 5935 }, { "epoch": 0.96, "grad_norm": 2.2077573948899283, "learning_rate": 9.913561750806378e-08, "loss": 0.7954, "step": 5936 }, { "epoch": 0.96, "grad_norm": 2.482791323898592, "learning_rate": 9.84038439979329e-08, "loss": 0.859, "step": 5937 }, { "epoch": 0.96, "grad_norm": 2.562881906474511, "learning_rate": 9.767476795553454e-08, "loss": 0.9282, "step": 5938 }, { "epoch": 0.96, "grad_norm": 3.0338182089752617, "learning_rate": 9.69483895794876e-08, "loss": 0.8424, "step": 5939 }, { "epoch": 0.96, "grad_norm": 2.6086004329663197, "learning_rate": 9.622470906767933e-08, "loss": 0.8436, "step": 5940 }, { "epoch": 0.96, "grad_norm": 2.6094642669576, "learning_rate": 9.550372661725982e-08, "loss": 0.8737, "step": 5941 }, { "epoch": 0.96, "grad_norm": 2.785204615447211, "learning_rate": 9.478544242464415e-08, "loss": 0.9162, "step": 5942 }, { "epoch": 0.96, "grad_norm": 2.716028978437573, "learning_rate": 9.40698566855125e-08, "loss": 0.8498, "step": 5943 }, { "epoch": 0.96, "grad_norm": 2.749243632250331, "learning_rate": 9.335696959481e-08, "loss": 0.8588, "step": 5944 }, { "epoch": 0.96, "grad_norm": 2.819834660762593, "learning_rate": 9.264678134674687e-08, "loss": 0.8978, "step": 5945 }, { "epoch": 0.96, "grad_norm": 3.44801594724751, "learning_rate": 9.193929213480057e-08, "loss": 0.8939, "step": 5946 }, { "epoch": 0.96, "grad_norm": 4.14367747523849, "learning_rate": 9.123450215170693e-08, "loss": 0.8703, "step": 5947 }, { "epoch": 0.96, "grad_norm": 2.5338304341463633, "learning_rate": 9.053241158947123e-08, "loss": 0.8562, "step": 5948 }, { "epoch": 0.96, "grad_norm": 1.9860895514336927, "learning_rate": 8.983302063936272e-08, "loss": 0.892, "step": 5949 }, { "epoch": 0.96, "grad_norm": 2.239519186358017, "learning_rate": 8.913632949191564e-08, "loss": 0.2934, "step": 5950 }, { "epoch": 0.96, "grad_norm": 3.5560557655965166, "learning_rate": 8.844233833692595e-08, "loss": 0.7787, "step": 5951 }, { "epoch": 0.96, "grad_norm": 3.0635431809579963, "learning_rate": 8.775104736345796e-08, "loss": 0.8888, "step": 5952 }, { "epoch": 0.96, "grad_norm": 3.085834433850463, "learning_rate": 8.70624567598366e-08, "loss": 0.8893, "step": 5953 }, { "epoch": 0.96, "grad_norm": 3.0094170588231113, "learning_rate": 8.637656671365402e-08, "loss": 0.848, "step": 5954 }, { "epoch": 0.96, "grad_norm": 3.2482024870646007, "learning_rate": 8.56933774117652e-08, "loss": 0.8889, "step": 5955 }, { "epoch": 0.96, "grad_norm": 2.074607338578423, "learning_rate": 8.501288904029014e-08, "loss": 0.9051, "step": 5956 }, { "epoch": 0.96, "grad_norm": 3.252926462584712, "learning_rate": 8.433510178461168e-08, "loss": 0.844, "step": 5957 }, { "epoch": 0.96, "grad_norm": 1.671926479794824, "learning_rate": 8.366001582937988e-08, "loss": 0.8566, "step": 5958 }, { "epoch": 0.96, "grad_norm": 2.298042882759897, "learning_rate": 8.29876313585043e-08, "loss": 0.8557, "step": 5959 }, { "epoch": 0.96, "grad_norm": 3.8060520866065293, "learning_rate": 8.231794855516173e-08, "loss": 0.8975, "step": 5960 }, { "epoch": 0.96, "grad_norm": 2.3511858160008376, "learning_rate": 8.165096760179181e-08, "loss": 0.8425, "step": 5961 }, { "epoch": 0.96, "grad_norm": 2.7502625080008345, "learning_rate": 8.09866886801014e-08, "loss": 0.7873, "step": 5962 }, { "epoch": 0.96, "grad_norm": 2.677533855206385, "learning_rate": 8.032511197105353e-08, "loss": 0.8279, "step": 5963 }, { "epoch": 0.96, "grad_norm": 3.9966188684256054, "learning_rate": 7.966623765488513e-08, "loss": 0.8419, "step": 5964 }, { "epoch": 0.96, "grad_norm": 2.7574421144913464, "learning_rate": 7.901006591108817e-08, "loss": 0.8907, "step": 5965 }, { "epoch": 0.96, "grad_norm": 1.7674522734461353, "learning_rate": 7.83565969184219e-08, "loss": 0.3315, "step": 5966 }, { "epoch": 0.96, "grad_norm": 3.073012080031342, "learning_rate": 7.770583085491168e-08, "loss": 0.8074, "step": 5967 }, { "epoch": 0.96, "grad_norm": 2.0078975199350997, "learning_rate": 7.705776789784237e-08, "loss": 0.8558, "step": 5968 }, { "epoch": 0.96, "grad_norm": 3.543826906691731, "learning_rate": 7.641240822376495e-08, "loss": 0.8577, "step": 5969 }, { "epoch": 0.96, "grad_norm": 2.208274612605723, "learning_rate": 7.576975200849212e-08, "loss": 0.3474, "step": 5970 }, { "epoch": 0.96, "grad_norm": 3.2420263043512567, "learning_rate": 7.512979942710163e-08, "loss": 0.9024, "step": 5971 }, { "epoch": 0.96, "grad_norm": 2.561101206026854, "learning_rate": 7.449255065393624e-08, "loss": 0.7891, "step": 5972 }, { "epoch": 0.96, "grad_norm": 2.9782543430480826, "learning_rate": 7.385800586259595e-08, "loss": 0.8732, "step": 5973 }, { "epoch": 0.96, "grad_norm": 4.022999445206625, "learning_rate": 7.32261652259525e-08, "loss": 0.8367, "step": 5974 }, { "epoch": 0.96, "grad_norm": 2.3654893884124473, "learning_rate": 7.259702891613374e-08, "loss": 0.9075, "step": 5975 }, { "epoch": 0.96, "grad_norm": 4.707678937869028, "learning_rate": 7.19705971045348e-08, "loss": 0.9287, "step": 5976 }, { "epoch": 0.96, "grad_norm": 2.907440269952221, "learning_rate": 7.134686996181361e-08, "loss": 0.9094, "step": 5977 }, { "epoch": 0.96, "grad_norm": 2.8862410009350423, "learning_rate": 7.07258476578887e-08, "loss": 0.9378, "step": 5978 }, { "epoch": 0.96, "grad_norm": 1.0904961277588208, "learning_rate": 7.010753036194584e-08, "loss": 0.3159, "step": 5979 }, { "epoch": 0.96, "grad_norm": 3.1470114078902864, "learning_rate": 6.949191824243028e-08, "loss": 0.8467, "step": 5980 }, { "epoch": 0.96, "grad_norm": 2.295596850539641, "learning_rate": 6.887901146705344e-08, "loss": 0.3135, "step": 5981 }, { "epoch": 0.96, "grad_norm": 1.9268633605229335, "learning_rate": 6.82688102027862e-08, "loss": 0.8434, "step": 5982 }, { "epoch": 0.96, "grad_norm": 4.250228651291226, "learning_rate": 6.766131461586445e-08, "loss": 0.9471, "step": 5983 }, { "epoch": 0.96, "grad_norm": 3.1512514323042535, "learning_rate": 6.705652487178693e-08, "loss": 0.8821, "step": 5984 }, { "epoch": 0.96, "grad_norm": 2.1243879895487923, "learning_rate": 6.645444113531519e-08, "loss": 0.8626, "step": 5985 }, { "epoch": 0.96, "grad_norm": 3.559501992657116, "learning_rate": 6.585506357047466e-08, "loss": 0.7385, "step": 5986 }, { "epoch": 0.96, "grad_norm": 2.305485370696294, "learning_rate": 6.525839234055032e-08, "loss": 0.8159, "step": 5987 }, { "epoch": 0.96, "grad_norm": 1.9955926852263663, "learning_rate": 6.46644276080921e-08, "loss": 0.925, "step": 5988 }, { "epoch": 0.96, "grad_norm": 2.887815192388215, "learning_rate": 6.407316953491393e-08, "loss": 0.8559, "step": 5989 }, { "epoch": 0.97, "grad_norm": 3.6106676041260504, "learning_rate": 6.348461828208919e-08, "loss": 0.9524, "step": 5990 }, { "epoch": 0.97, "grad_norm": 2.417981371130227, "learning_rate": 6.289877400995625e-08, "loss": 0.9044, "step": 5991 }, { "epoch": 0.97, "grad_norm": 2.8962374252026595, "learning_rate": 6.231563687811526e-08, "loss": 0.9005, "step": 5992 }, { "epoch": 0.97, "grad_norm": 3.5840324045232017, "learning_rate": 6.173520704542802e-08, "loss": 0.8489, "step": 5993 }, { "epoch": 0.97, "grad_norm": 4.116731158741072, "learning_rate": 6.115748467002136e-08, "loss": 0.8272, "step": 5994 }, { "epoch": 0.97, "grad_norm": 2.5685459547842195, "learning_rate": 6.058246990928051e-08, "loss": 0.9069, "step": 5995 }, { "epoch": 0.97, "grad_norm": 3.1293518282529744, "learning_rate": 6.001016291985795e-08, "loss": 0.8515, "step": 5996 }, { "epoch": 0.97, "grad_norm": 2.96670751079203, "learning_rate": 5.944056385766339e-08, "loss": 0.8256, "step": 5997 }, { "epoch": 0.97, "grad_norm": 3.2249694290486444, "learning_rate": 5.887367287787271e-08, "loss": 0.893, "step": 5998 }, { "epoch": 0.97, "grad_norm": 4.041178224812926, "learning_rate": 5.8309490134921265e-08, "loss": 0.8896, "step": 5999 }, { "epoch": 0.97, "grad_norm": 1.5049257464166739, "learning_rate": 5.774801578251055e-08, "loss": 0.9188, "step": 6000 }, { "epoch": 0.97, "grad_norm": 2.4385236889093513, "learning_rate": 5.718924997359932e-08, "loss": 0.9089, "step": 6001 }, { "epoch": 0.97, "grad_norm": 2.9978234024509707, "learning_rate": 5.663319286041136e-08, "loss": 0.8138, "step": 6002 }, { "epoch": 0.97, "grad_norm": 3.7673146109731475, "learning_rate": 5.6079844594433276e-08, "loss": 0.848, "step": 6003 }, { "epoch": 0.97, "grad_norm": 3.431495554880417, "learning_rate": 5.552920532641004e-08, "loss": 0.8911, "step": 6004 }, { "epoch": 0.97, "grad_norm": 2.5463829267259634, "learning_rate": 5.498127520635277e-08, "loss": 0.7866, "step": 6005 }, { "epoch": 0.97, "grad_norm": 2.5406003080437376, "learning_rate": 5.4436054383532054e-08, "loss": 0.8726, "step": 6006 }, { "epoch": 0.97, "grad_norm": 2.459463010810325, "learning_rate": 5.389354300648131e-08, "loss": 0.8559, "step": 6007 }, { "epoch": 0.97, "grad_norm": 3.479340752607296, "learning_rate": 5.3353741222995634e-08, "loss": 0.8545, "step": 6008 }, { "epoch": 0.97, "grad_norm": 2.8948363273353723, "learning_rate": 5.281664918013185e-08, "loss": 0.8647, "step": 6009 }, { "epoch": 0.97, "grad_norm": 3.139474079042692, "learning_rate": 5.2282267024207355e-08, "loss": 0.8417, "step": 6010 }, { "epoch": 0.97, "grad_norm": 2.531319134556945, "learning_rate": 5.1750594900805697e-08, "loss": 0.9028, "step": 6011 }, { "epoch": 0.97, "grad_norm": 2.920863175251042, "learning_rate": 5.1221632954765455e-08, "loss": 0.8194, "step": 6012 }, { "epoch": 0.97, "grad_norm": 2.9803613965860336, "learning_rate": 5.069538133019247e-08, "loss": 0.8642, "step": 6013 }, { "epoch": 0.97, "grad_norm": 4.05824056489311, "learning_rate": 5.017184017045207e-08, "loss": 0.8084, "step": 6014 }, { "epoch": 0.97, "grad_norm": 3.6415969795408945, "learning_rate": 4.965100961817126e-08, "loss": 0.9242, "step": 6015 }, { "epoch": 0.97, "grad_norm": 2.442804092016021, "learning_rate": 4.913288981523878e-08, "loss": 0.9411, "step": 6016 }, { "epoch": 0.97, "grad_norm": 2.5792631469166696, "learning_rate": 4.8617480902802826e-08, "loss": 0.8148, "step": 6017 }, { "epoch": 0.97, "grad_norm": 3.451882531528935, "learning_rate": 4.8104783021277746e-08, "loss": 0.9336, "step": 6018 }, { "epoch": 0.97, "grad_norm": 3.010572242456703, "learning_rate": 4.759479631033514e-08, "loss": 0.899, "step": 6019 }, { "epoch": 0.97, "grad_norm": 3.9557772096282617, "learning_rate": 4.708752090890944e-08, "loss": 0.8018, "step": 6020 }, { "epoch": 0.97, "grad_norm": 2.8671061740743973, "learning_rate": 4.6582956955196765e-08, "loss": 0.8888, "step": 6021 }, { "epoch": 0.97, "grad_norm": 3.727493649636583, "learning_rate": 4.608110458665382e-08, "loss": 0.8958, "step": 6022 }, { "epoch": 0.97, "grad_norm": 4.0009940819499885, "learning_rate": 4.558196394000014e-08, "loss": 0.889, "step": 6023 }, { "epoch": 0.97, "grad_norm": 3.17576443338106, "learning_rate": 4.508553515121472e-08, "loss": 0.8575, "step": 6024 }, { "epoch": 0.97, "grad_norm": 1.9660243588410278, "learning_rate": 4.4591818355538276e-08, "loss": 0.8951, "step": 6025 }, { "epoch": 0.97, "grad_norm": 2.673737885963565, "learning_rate": 4.41008136874721e-08, "loss": 0.8157, "step": 6026 }, { "epoch": 0.97, "grad_norm": 2.4971926185155415, "learning_rate": 4.361252128078031e-08, "loss": 0.8646, "step": 6027 }, { "epoch": 0.97, "grad_norm": 1.7084652531869198, "learning_rate": 4.31269412684876e-08, "loss": 0.8121, "step": 6028 }, { "epoch": 0.97, "grad_norm": 3.5065930802150973, "learning_rate": 4.264407378287927e-08, "loss": 0.8749, "step": 6029 }, { "epoch": 0.97, "grad_norm": 2.8953933564541163, "learning_rate": 4.216391895550121e-08, "loss": 0.9388, "step": 6030 }, { "epoch": 0.97, "grad_norm": 3.6891985068804507, "learning_rate": 4.168647691716099e-08, "loss": 0.9354, "step": 6031 }, { "epoch": 0.97, "grad_norm": 3.332694485162585, "learning_rate": 4.12117477979257e-08, "loss": 0.9195, "step": 6032 }, { "epoch": 0.97, "grad_norm": 2.70090009379853, "learning_rate": 4.0739731727127416e-08, "loss": 0.8953, "step": 6033 }, { "epoch": 0.97, "grad_norm": 3.716829204569064, "learning_rate": 4.027042883335441e-08, "loss": 0.8597, "step": 6034 }, { "epoch": 0.97, "grad_norm": 2.9419577032247552, "learning_rate": 3.980383924445774e-08, "loss": 0.939, "step": 6035 }, { "epoch": 0.97, "grad_norm": 3.5937624723798516, "learning_rate": 3.933996308755017e-08, "loss": 0.9177, "step": 6036 }, { "epoch": 0.97, "grad_norm": 2.088907021191315, "learning_rate": 3.887880048900394e-08, "loss": 0.8588, "step": 6037 }, { "epoch": 0.97, "grad_norm": 4.046493217693543, "learning_rate": 3.8420351574453005e-08, "loss": 0.8752, "step": 6038 }, { "epoch": 0.97, "grad_norm": 1.962505245809165, "learning_rate": 3.796461646878968e-08, "loss": 0.8479, "step": 6039 }, { "epoch": 0.97, "grad_norm": 3.5488427707199857, "learning_rate": 3.751159529617021e-08, "loss": 0.8606, "step": 6040 }, { "epoch": 0.97, "grad_norm": 3.812339529193596, "learning_rate": 3.706128818001031e-08, "loss": 0.8754, "step": 6041 }, { "epoch": 0.97, "grad_norm": 3.3602915378520883, "learning_rate": 3.6613695242984085e-08, "loss": 0.8545, "step": 6042 }, { "epoch": 0.97, "grad_norm": 2.556679138872676, "learning_rate": 3.616881660703064e-08, "loss": 0.8761, "step": 6043 }, { "epoch": 0.97, "grad_norm": 2.374560968075399, "learning_rate": 3.5726652393346385e-08, "loss": 0.8323, "step": 6044 }, { "epoch": 0.97, "grad_norm": 2.477128021461687, "learning_rate": 3.528720272238828e-08, "loss": 0.8976, "step": 6045 }, { "epoch": 0.97, "grad_norm": 2.091886345724584, "learning_rate": 3.485046771387612e-08, "loss": 0.8795, "step": 6046 }, { "epoch": 0.97, "grad_norm": 2.873442020381589, "learning_rate": 3.441644748678585e-08, "loss": 0.8111, "step": 6047 }, { "epoch": 0.97, "grad_norm": 4.264231695310885, "learning_rate": 3.398514215935955e-08, "loss": 0.8228, "step": 6048 }, { "epoch": 0.97, "grad_norm": 3.87057969022299, "learning_rate": 3.355655184909545e-08, "loss": 0.8809, "step": 6049 }, { "epoch": 0.97, "grad_norm": 3.0923352330073044, "learning_rate": 3.313067667275238e-08, "loss": 0.9066, "step": 6050 }, { "epoch": 0.97, "grad_norm": 2.5400511709450435, "learning_rate": 3.270751674635197e-08, "loss": 0.9255, "step": 6051 }, { "epoch": 0.98, "grad_norm": 3.600889196782834, "learning_rate": 3.228707218517313e-08, "loss": 0.8872, "step": 6052 }, { "epoch": 0.98, "grad_norm": 2.703259415123547, "learning_rate": 3.186934310375866e-08, "loss": 0.8852, "step": 6053 }, { "epoch": 0.98, "grad_norm": 2.299892493513549, "learning_rate": 3.1454329615907554e-08, "loss": 0.7734, "step": 6054 }, { "epoch": 0.98, "grad_norm": 2.3759615106258742, "learning_rate": 3.104203183468157e-08, "loss": 0.8376, "step": 6055 }, { "epoch": 0.98, "grad_norm": 3.113589703489335, "learning_rate": 3.0632449872401994e-08, "loss": 0.8692, "step": 6056 }, { "epoch": 0.98, "grad_norm": 2.037814990075609, "learning_rate": 3.0225583840650665e-08, "loss": 0.8749, "step": 6057 }, { "epoch": 0.98, "grad_norm": 3.71872589359965, "learning_rate": 2.982143385026892e-08, "loss": 0.8682, "step": 6058 }, { "epoch": 0.98, "grad_norm": 3.504854378062772, "learning_rate": 2.9420000011357585e-08, "loss": 0.8433, "step": 6059 }, { "epoch": 0.98, "grad_norm": 3.7155645495503102, "learning_rate": 2.9021282433279173e-08, "loss": 0.8434, "step": 6060 }, { "epoch": 0.98, "grad_norm": 2.802182579882784, "learning_rate": 2.8625281224654578e-08, "loss": 0.9325, "step": 6061 }, { "epoch": 0.98, "grad_norm": 2.8680136007627572, "learning_rate": 2.8231996493366387e-08, "loss": 0.8831, "step": 6062 }, { "epoch": 0.98, "grad_norm": 2.7825599757171693, "learning_rate": 2.7841428346556676e-08, "loss": 0.9238, "step": 6063 }, { "epoch": 0.98, "grad_norm": 4.165305919975437, "learning_rate": 2.745357689062478e-08, "loss": 0.7728, "step": 6064 }, { "epoch": 0.98, "grad_norm": 4.275186965155575, "learning_rate": 2.706844223123395e-08, "loss": 0.8179, "step": 6065 }, { "epoch": 0.98, "grad_norm": 2.00423483729328, "learning_rate": 2.6686024473304706e-08, "loss": 0.8659, "step": 6066 }, { "epoch": 0.98, "grad_norm": 3.301990752421507, "learning_rate": 2.6306323721018156e-08, "loss": 0.8869, "step": 6067 }, { "epoch": 0.98, "grad_norm": 2.48684454481982, "learning_rate": 2.5929340077816e-08, "loss": 0.8282, "step": 6068 }, { "epoch": 0.98, "grad_norm": 3.688210629023642, "learning_rate": 2.555507364639831e-08, "loss": 0.8268, "step": 6069 }, { "epoch": 0.98, "grad_norm": 2.7910286093557315, "learning_rate": 2.5183524528725744e-08, "loss": 0.8593, "step": 6070 }, { "epoch": 0.98, "grad_norm": 3.349446373357186, "learning_rate": 2.481469282601845e-08, "loss": 0.8232, "step": 6071 }, { "epoch": 0.98, "grad_norm": 3.771448720944236, "learning_rate": 2.444857863875605e-08, "loss": 0.8311, "step": 6072 }, { "epoch": 0.98, "grad_norm": 2.6160422481135113, "learning_rate": 2.408518206667876e-08, "loss": 0.9057, "step": 6073 }, { "epoch": 0.98, "grad_norm": 2.080675438739082, "learning_rate": 2.3724503208786276e-08, "loss": 0.8278, "step": 6074 }, { "epoch": 0.98, "grad_norm": 3.1274829918317346, "learning_rate": 2.3366542163336668e-08, "loss": 0.8449, "step": 6075 }, { "epoch": 0.98, "grad_norm": 2.4228323736203548, "learning_rate": 2.3011299027847488e-08, "loss": 0.8546, "step": 6076 }, { "epoch": 0.98, "grad_norm": 2.8039511561418204, "learning_rate": 2.2658773899097986e-08, "loss": 0.8308, "step": 6077 }, { "epoch": 0.98, "grad_norm": 3.1362433542399355, "learning_rate": 2.230896687312578e-08, "loss": 0.889, "step": 6078 }, { "epoch": 0.98, "grad_norm": 3.039314789094251, "learning_rate": 2.196187804522798e-08, "loss": 0.8744, "step": 6079 }, { "epoch": 0.98, "grad_norm": 2.6822124185552405, "learning_rate": 2.1617507509960058e-08, "loss": 0.863, "step": 6080 }, { "epoch": 0.98, "grad_norm": 3.049713064986206, "learning_rate": 2.1275855361140297e-08, "loss": 0.8294, "step": 6081 }, { "epoch": 0.98, "grad_norm": 3.646078323125643, "learning_rate": 2.0936921691842028e-08, "loss": 0.7864, "step": 6082 }, { "epoch": 0.98, "grad_norm": 3.5798719381092, "learning_rate": 2.0600706594400278e-08, "loss": 0.836, "step": 6083 }, { "epoch": 0.98, "grad_norm": 2.784370472023447, "learning_rate": 2.0267210160409557e-08, "loss": 0.9152, "step": 6084 }, { "epoch": 0.98, "grad_norm": 2.85566362167427, "learning_rate": 1.9936432480723854e-08, "loss": 0.8802, "step": 6085 }, { "epoch": 0.98, "grad_norm": 2.9195217509085833, "learning_rate": 1.9608373645456648e-08, "loss": 0.8749, "step": 6086 }, { "epoch": 0.98, "grad_norm": 3.4209426236708858, "learning_rate": 1.9283033743978663e-08, "loss": 0.8442, "step": 6087 }, { "epoch": 0.98, "grad_norm": 2.5216120208177775, "learning_rate": 1.896041286492345e-08, "loss": 0.8585, "step": 6088 }, { "epoch": 0.98, "grad_norm": 2.5674134575838417, "learning_rate": 1.86405110961807e-08, "loss": 0.8647, "step": 6089 }, { "epoch": 0.98, "grad_norm": 1.6390913561670974, "learning_rate": 1.8323328524899597e-08, "loss": 0.9172, "step": 6090 }, { "epoch": 0.98, "grad_norm": 2.907253133276293, "learning_rate": 1.8008865237491014e-08, "loss": 0.8276, "step": 6091 }, { "epoch": 0.98, "grad_norm": 1.6514901916785487, "learning_rate": 1.7697121319621978e-08, "loss": 0.8951, "step": 6092 }, { "epoch": 0.98, "grad_norm": 2.787082001004324, "learning_rate": 1.7388096856221227e-08, "loss": 0.87, "step": 6093 }, { "epoch": 0.98, "grad_norm": 1.9756691463067229, "learning_rate": 1.7081791931475855e-08, "loss": 0.8446, "step": 6094 }, { "epoch": 0.98, "grad_norm": 2.8474318595148294, "learning_rate": 1.677820662883134e-08, "loss": 0.9072, "step": 6095 }, { "epoch": 0.98, "grad_norm": 3.306013269728968, "learning_rate": 1.647734103099152e-08, "loss": 0.8908, "step": 6096 }, { "epoch": 0.98, "grad_norm": 1.76880467468517, "learning_rate": 1.6179195219921952e-08, "loss": 0.8712, "step": 6097 }, { "epoch": 0.98, "grad_norm": 3.4865146790073314, "learning_rate": 1.588376927684432e-08, "loss": 0.8545, "step": 6098 }, { "epoch": 0.98, "grad_norm": 2.834092898561764, "learning_rate": 1.5591063282242026e-08, "loss": 0.8539, "step": 6099 }, { "epoch": 0.98, "grad_norm": 2.8152282198175307, "learning_rate": 1.530107731585684e-08, "loss": 0.8658, "step": 6100 }, { "epoch": 0.98, "grad_norm": 2.725177489633676, "learning_rate": 1.5013811456687787e-08, "loss": 0.7972, "step": 6101 }, { "epoch": 0.98, "grad_norm": 3.3694923106755503, "learning_rate": 1.4729265782993384e-08, "loss": 0.8411, "step": 6102 }, { "epoch": 0.98, "grad_norm": 1.0912185802502736, "learning_rate": 1.4447440372292732e-08, "loss": 0.8656, "step": 6103 }, { "epoch": 0.98, "grad_norm": 3.3824144089520902, "learning_rate": 1.41683353013633e-08, "loss": 0.8232, "step": 6104 }, { "epoch": 0.98, "grad_norm": 2.9334411175027797, "learning_rate": 1.3891950646239827e-08, "loss": 0.8712, "step": 6105 }, { "epoch": 0.98, "grad_norm": 3.1348675387444414, "learning_rate": 1.3618286482218745e-08, "loss": 0.8499, "step": 6106 }, { "epoch": 0.98, "grad_norm": 3.300115213405879, "learning_rate": 1.3347342883851532e-08, "loss": 0.8839, "step": 6107 }, { "epoch": 0.98, "grad_norm": 3.0892534041664192, "learning_rate": 1.3079119924952477e-08, "loss": 0.9021, "step": 6108 }, { "epoch": 0.98, "grad_norm": 2.8172318390008613, "learning_rate": 1.2813617678592017e-08, "loss": 0.7187, "step": 6109 }, { "epoch": 0.98, "grad_norm": 2.298568101565712, "learning_rate": 1.2550836217101182e-08, "loss": 0.8189, "step": 6110 }, { "epoch": 0.98, "grad_norm": 4.482289111595954, "learning_rate": 1.2290775612067151e-08, "loss": 0.8422, "step": 6111 }, { "epoch": 0.98, "grad_norm": 3.2328645431879837, "learning_rate": 1.2033435934338811e-08, "loss": 0.914, "step": 6112 }, { "epoch": 0.98, "grad_norm": 1.2823965013766858, "learning_rate": 1.1778817254022301e-08, "loss": 0.3204, "step": 6113 }, { "epoch": 0.99, "grad_norm": 2.8166173678566317, "learning_rate": 1.1526919640483247e-08, "loss": 0.9145, "step": 6114 }, { "epoch": 0.99, "grad_norm": 2.0914266420723253, "learning_rate": 1.1277743162345644e-08, "loss": 0.3054, "step": 6115 }, { "epoch": 0.99, "grad_norm": 3.2146983895980017, "learning_rate": 1.103128788749075e-08, "loss": 0.8953, "step": 6116 }, { "epoch": 0.99, "grad_norm": 3.7060793909749794, "learning_rate": 1.0787553883061519e-08, "loss": 0.91, "step": 6117 }, { "epoch": 0.99, "grad_norm": 3.304529064519911, "learning_rate": 1.0546541215455952e-08, "loss": 0.898, "step": 6118 }, { "epoch": 0.99, "grad_norm": 3.5438616215317884, "learning_rate": 1.0308249950333749e-08, "loss": 0.8395, "step": 6119 }, { "epoch": 0.99, "grad_norm": 1.0855396008325011, "learning_rate": 1.0072680152611869e-08, "loss": 0.8698, "step": 6120 }, { "epoch": 0.99, "grad_norm": 1.6677869210757494, "learning_rate": 9.839831886465644e-09, "loss": 0.3498, "step": 6121 }, { "epoch": 0.99, "grad_norm": 2.542716721381938, "learning_rate": 9.60970521532878e-09, "loss": 0.8647, "step": 6122 }, { "epoch": 0.99, "grad_norm": 3.849254109843677, "learning_rate": 9.382300201896676e-09, "loss": 0.8544, "step": 6123 }, { "epoch": 0.99, "grad_norm": 1.3765646095562811, "learning_rate": 9.157616908117562e-09, "loss": 0.3242, "step": 6124 }, { "epoch": 0.99, "grad_norm": 2.0427882039978646, "learning_rate": 8.935655395203579e-09, "loss": 0.8527, "step": 6125 }, { "epoch": 0.99, "grad_norm": 4.412685807506195, "learning_rate": 8.716415723621918e-09, "loss": 0.8741, "step": 6126 }, { "epoch": 0.99, "grad_norm": 2.9218005818564547, "learning_rate": 8.499897953100355e-09, "loss": 0.8334, "step": 6127 }, { "epoch": 0.99, "grad_norm": 2.5685299892832023, "learning_rate": 8.28610214262393e-09, "loss": 0.8855, "step": 6128 }, { "epoch": 0.99, "grad_norm": 1.791205442929911, "learning_rate": 8.075028350436054e-09, "loss": 0.8442, "step": 6129 }, { "epoch": 0.99, "grad_norm": 1.0511168897544763, "learning_rate": 7.866676634039617e-09, "loss": 0.8907, "step": 6130 }, { "epoch": 0.99, "grad_norm": 3.4012471828708386, "learning_rate": 7.66104705019588e-09, "loss": 0.919, "step": 6131 }, { "epoch": 0.99, "grad_norm": 2.662027157628498, "learning_rate": 7.45813965492337e-09, "loss": 0.7835, "step": 6132 }, { "epoch": 0.99, "grad_norm": 2.973812565123322, "learning_rate": 7.257954503498976e-09, "loss": 0.8583, "step": 6133 }, { "epoch": 0.99, "grad_norm": 1.9741099826935027, "learning_rate": 7.060491650459078e-09, "loss": 0.3129, "step": 6134 }, { "epoch": 0.99, "grad_norm": 1.7983456108421307, "learning_rate": 6.8657511495984205e-09, "loss": 0.9305, "step": 6135 }, { "epoch": 0.99, "grad_norm": 3.000267175857157, "learning_rate": 6.673733053970122e-09, "loss": 0.8213, "step": 6136 }, { "epoch": 0.99, "grad_norm": 2.3459227599675234, "learning_rate": 6.4844374158834485e-09, "loss": 0.8689, "step": 6137 }, { "epoch": 0.99, "grad_norm": 1.9444290054556068, "learning_rate": 6.297864286910482e-09, "loss": 0.8685, "step": 6138 }, { "epoch": 0.99, "grad_norm": 1.9746280295738823, "learning_rate": 6.114013717876121e-09, "loss": 0.9159, "step": 6139 }, { "epoch": 0.99, "grad_norm": 2.386430239057633, "learning_rate": 5.9328857588680785e-09, "loss": 0.8741, "step": 6140 }, { "epoch": 0.99, "grad_norm": 2.5863631436953463, "learning_rate": 5.754480459229106e-09, "loss": 0.8127, "step": 6141 }, { "epoch": 0.99, "grad_norm": 1.8117194632610645, "learning_rate": 5.5787978675636566e-09, "loss": 0.8067, "step": 6142 }, { "epoch": 0.99, "grad_norm": 2.580684801343294, "learning_rate": 5.405838031731225e-09, "loss": 0.9336, "step": 6143 }, { "epoch": 0.99, "grad_norm": 2.0906778827815637, "learning_rate": 5.235600998850787e-09, "loss": 0.8549, "step": 6144 }, { "epoch": 0.99, "grad_norm": 2.3565724888301407, "learning_rate": 5.068086815300799e-09, "loss": 0.8938, "step": 6145 }, { "epoch": 0.99, "grad_norm": 1.898847621997337, "learning_rate": 4.9032955267158675e-09, "loss": 0.3447, "step": 6146 }, { "epoch": 0.99, "grad_norm": 3.8352824659903613, "learning_rate": 4.741227177988972e-09, "loss": 0.8021, "step": 6147 }, { "epoch": 0.99, "grad_norm": 3.151752553570084, "learning_rate": 4.581881813272571e-09, "loss": 0.9502, "step": 6148 }, { "epoch": 0.99, "grad_norm": 2.662073640310645, "learning_rate": 4.4252594759774945e-09, "loss": 0.9312, "step": 6149 }, { "epoch": 0.99, "grad_norm": 3.4576571312847686, "learning_rate": 4.271360208770725e-09, "loss": 0.8688, "step": 6150 }, { "epoch": 0.99, "grad_norm": 2.7258758117419877, "learning_rate": 4.120184053579834e-09, "loss": 0.9123, "step": 6151 }, { "epoch": 0.99, "grad_norm": 4.276272518541356, "learning_rate": 3.971731051588545e-09, "loss": 0.8437, "step": 6152 }, { "epoch": 0.99, "grad_norm": 3.133929350825311, "learning_rate": 3.826001243240063e-09, "loss": 0.8659, "step": 6153 }, { "epoch": 0.99, "grad_norm": 1.8931391369709603, "learning_rate": 3.682994668234852e-09, "loss": 0.8009, "step": 6154 }, { "epoch": 0.99, "grad_norm": 3.1216035314374744, "learning_rate": 3.542711365531748e-09, "loss": 0.9007, "step": 6155 }, { "epoch": 0.99, "grad_norm": 3.0764070160625696, "learning_rate": 3.405151373347959e-09, "loss": 0.8187, "step": 6156 }, { "epoch": 0.99, "grad_norm": 2.7848482322774517, "learning_rate": 3.270314729159063e-09, "loss": 0.9231, "step": 6157 }, { "epoch": 0.99, "grad_norm": 2.332875570171568, "learning_rate": 3.138201469697899e-09, "loss": 0.8158, "step": 6158 }, { "epoch": 0.99, "grad_norm": 2.5916823453112077, "learning_rate": 3.008811630955677e-09, "loss": 0.8341, "step": 6159 }, { "epoch": 0.99, "grad_norm": 3.980060230738678, "learning_rate": 2.882145248181978e-09, "loss": 0.8527, "step": 6160 }, { "epoch": 0.99, "grad_norm": 2.177369228250926, "learning_rate": 2.7582023558847537e-09, "loss": 0.8811, "step": 6161 }, { "epoch": 0.99, "grad_norm": 4.706874430089968, "learning_rate": 2.6369829878281074e-09, "loss": 0.9104, "step": 6162 }, { "epoch": 0.99, "grad_norm": 5.141888216872713, "learning_rate": 2.5184871770356224e-09, "loss": 0.7705, "step": 6163 }, { "epoch": 0.99, "grad_norm": 1.595570963845963, "learning_rate": 2.4027149557903638e-09, "loss": 0.8869, "step": 6164 }, { "epoch": 0.99, "grad_norm": 3.486806525737384, "learning_rate": 2.2896663556304378e-09, "loss": 0.7803, "step": 6165 }, { "epoch": 0.99, "grad_norm": 1.6320699801307372, "learning_rate": 2.1793414073545407e-09, "loss": 0.9302, "step": 6166 }, { "epoch": 0.99, "grad_norm": 3.3429584457548893, "learning_rate": 2.0717401410164097e-09, "loss": 0.9134, "step": 6167 }, { "epoch": 0.99, "grad_norm": 3.670304502281157, "learning_rate": 1.9668625859314838e-09, "loss": 0.8797, "step": 6168 }, { "epoch": 0.99, "grad_norm": 2.3872297523495574, "learning_rate": 1.8647087706702425e-09, "loss": 0.9742, "step": 6169 }, { "epoch": 0.99, "grad_norm": 2.076833567497343, "learning_rate": 1.7652787230637569e-09, "loss": 0.8671, "step": 6170 }, { "epoch": 0.99, "grad_norm": 3.3092095208513266, "learning_rate": 1.6685724701970274e-09, "loss": 0.817, "step": 6171 }, { "epoch": 0.99, "grad_norm": 3.5802629904875265, "learning_rate": 1.5745900384167567e-09, "loss": 0.869, "step": 6172 }, { "epoch": 0.99, "grad_norm": 4.275903858313701, "learning_rate": 1.4833314533269084e-09, "loss": 0.8245, "step": 6173 }, { "epoch": 0.99, "grad_norm": 4.0954833110422255, "learning_rate": 1.3947967397887064e-09, "loss": 0.8461, "step": 6174 }, { "epoch": 0.99, "grad_norm": 2.8125271266052927, "learning_rate": 1.3089859219195256e-09, "loss": 0.9167, "step": 6175 }, { "epoch": 1.0, "grad_norm": 2.949250277925436, "learning_rate": 1.2258990230995527e-09, "loss": 0.8903, "step": 6176 }, { "epoch": 1.0, "grad_norm": 2.4436242854180574, "learning_rate": 1.1455360659617942e-09, "loss": 0.9263, "step": 6177 }, { "epoch": 1.0, "grad_norm": 3.5172264690096027, "learning_rate": 1.0678970724009585e-09, "loss": 0.8855, "step": 6178 }, { "epoch": 1.0, "grad_norm": 2.4893629756007845, "learning_rate": 9.92982063565684e-10, "loss": 0.9236, "step": 6179 }, { "epoch": 1.0, "grad_norm": 3.0835327049612946, "learning_rate": 9.207910598674208e-10, "loss": 0.9303, "step": 6180 }, { "epoch": 1.0, "grad_norm": 2.119394988514432, "learning_rate": 8.513240809715495e-10, "loss": 0.2993, "step": 6181 }, { "epoch": 1.0, "grad_norm": 2.3415452887232067, "learning_rate": 7.845811458029317e-10, "loss": 0.9192, "step": 6182 }, { "epoch": 1.0, "grad_norm": 2.0050020846691377, "learning_rate": 7.205622725448003e-10, "loss": 0.3202, "step": 6183 }, { "epoch": 1.0, "grad_norm": 3.059818884781584, "learning_rate": 6.592674786376485e-10, "loss": 0.9016, "step": 6184 }, { "epoch": 1.0, "grad_norm": 4.6409979776329955, "learning_rate": 6.006967807781206e-10, "loss": 0.864, "step": 6185 }, { "epoch": 1.0, "grad_norm": 2.4092876532291028, "learning_rate": 5.448501949256724e-10, "loss": 0.8435, "step": 6186 }, { "epoch": 1.0, "grad_norm": 2.8714278876568833, "learning_rate": 4.917277362914696e-10, "loss": 0.8626, "step": 6187 }, { "epoch": 1.0, "grad_norm": 2.209221919042844, "learning_rate": 4.413294193483797e-10, "loss": 0.7608, "step": 6188 }, { "epoch": 1.0, "grad_norm": 3.246588016343276, "learning_rate": 3.936552578276409e-10, "loss": 0.928, "step": 6189 }, { "epoch": 1.0, "grad_norm": 2.3737058627186896, "learning_rate": 3.48705264715532e-10, "loss": 0.8737, "step": 6190 }, { "epoch": 1.0, "grad_norm": 3.3948225767716917, "learning_rate": 3.0647945225781294e-10, "loss": 0.8765, "step": 6191 }, { "epoch": 1.0, "grad_norm": 2.962670294780324, "learning_rate": 2.669778319586147e-10, "loss": 0.8811, "step": 6192 }, { "epoch": 1.0, "grad_norm": 2.3352701005592302, "learning_rate": 2.3020041457821885e-10, "loss": 0.8975, "step": 6193 }, { "epoch": 1.0, "grad_norm": 1.8147945349757522, "learning_rate": 1.9614721013749838e-10, "loss": 0.8824, "step": 6194 }, { "epoch": 1.0, "grad_norm": 3.809401581994583, "learning_rate": 1.6481822791125646e-10, "loss": 0.8699, "step": 6195 }, { "epoch": 1.0, "grad_norm": 2.3311974649927394, "learning_rate": 1.3621347643710814e-10, "loss": 0.9328, "step": 6196 }, { "epoch": 1.0, "grad_norm": 2.982332819100557, "learning_rate": 1.1033296350548839e-10, "loss": 0.8597, "step": 6197 }, { "epoch": 1.0, "grad_norm": 3.8294849704836493, "learning_rate": 8.717669616742364e-11, "loss": 0.9127, "step": 6198 }, { "epoch": 1.0, "grad_norm": 2.2060131864456234, "learning_rate": 6.674468073231133e-11, "loss": 0.8326, "step": 6199 }, { "epoch": 1.0, "grad_norm": 2.6595692259960053, "learning_rate": 4.903692276569949e-11, "loss": 0.9079, "step": 6200 }, { "epoch": 1.0, "grad_norm": 2.5557888818272296, "learning_rate": 3.405342709150716e-11, "loss": 0.8926, "step": 6201 }, { "epoch": 1.0, "grad_norm": 3.5447535955667906, "learning_rate": 2.1794197790914183e-11, "loss": 0.928, "step": 6202 }, { "epoch": 1.0, "grad_norm": 2.3382899454033526, "learning_rate": 1.2259238205691858e-11, "loss": 0.8979, "step": 6203 }, { "epoch": 1.0, "grad_norm": 2.2591421046857185, "learning_rate": 5.4485509326518415e-12, "loss": 0.9474, "step": 6204 }, { "epoch": 1.0, "grad_norm": 2.641509500475263, "learning_rate": 1.362137825866583e-12, "loss": 0.8338, "step": 6205 }, { "epoch": 1.0, "grad_norm": 7.775977374401064, "learning_rate": 0.0, "loss": 0.5315, "step": 6206 }, { "epoch": 1.0, "step": 6206, "total_flos": 2.654469138789748e+19, "train_loss": 0.8955248313020439, "train_runtime": 50646.3432, "train_samples_per_second": 15.686, "train_steps_per_second": 0.123 } ], "logging_steps": 1.0, "max_steps": 6206, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20000, "total_flos": 2.654469138789748e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }