{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2278725729175424, "eval_steps": 500, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007263977481669807, "grad_norm": 63.2759549108439, "learning_rate": 3.333333333333334e-08, "loss": 7.3622, "step": 1 }, { "epoch": 0.0014527954963339613, "grad_norm": 63.41948831357221, "learning_rate": 6.666666666666668e-08, "loss": 7.3563, "step": 2 }, { "epoch": 0.002179193244500942, "grad_norm": 63.20335169729121, "learning_rate": 1.0000000000000001e-07, "loss": 7.3613, "step": 3 }, { "epoch": 0.0029055909926679227, "grad_norm": 63.088082938073725, "learning_rate": 1.3333333333333336e-07, "loss": 7.3498, "step": 4 }, { "epoch": 0.0036319887408349035, "grad_norm": 66.62993398865497, "learning_rate": 1.6666666666666668e-07, "loss": 7.4933, "step": 5 }, { "epoch": 0.004358386489001884, "grad_norm": 64.28207217419981, "learning_rate": 2.0000000000000002e-07, "loss": 7.3739, "step": 6 }, { "epoch": 0.005084784237168865, "grad_norm": 64.86547058325266, "learning_rate": 2.3333333333333336e-07, "loss": 7.4356, "step": 7 }, { "epoch": 0.005811181985335845, "grad_norm": 62.520037864532036, "learning_rate": 2.666666666666667e-07, "loss": 7.3069, "step": 8 }, { "epoch": 0.006537579733502826, "grad_norm": 64.86566194865063, "learning_rate": 3.0000000000000004e-07, "loss": 7.4228, "step": 9 }, { "epoch": 0.007263977481669807, "grad_norm": 62.970846908905436, "learning_rate": 3.3333333333333335e-07, "loss": 7.3558, "step": 10 }, { "epoch": 0.007990375229836788, "grad_norm": 63.631610429531904, "learning_rate": 3.666666666666667e-07, "loss": 7.3535, "step": 11 }, { "epoch": 0.008716772978003768, "grad_norm": 63.25811371761601, "learning_rate": 4.0000000000000003e-07, "loss": 7.3819, "step": 12 }, { "epoch": 0.00944317072617075, "grad_norm": 63.66259077849509, "learning_rate": 4.333333333333334e-07, "loss": 7.3795, "step": 13 }, { "epoch": 0.01016956847433773, "grad_norm": 64.15289940053654, "learning_rate": 4.666666666666667e-07, "loss": 7.3683, "step": 14 }, { "epoch": 0.01089596622250471, "grad_norm": 64.59626603451477, "learning_rate": 5.000000000000001e-07, "loss": 7.3799, "step": 15 }, { "epoch": 0.01162236397067169, "grad_norm": 63.26977989702329, "learning_rate": 5.333333333333335e-07, "loss": 7.3623, "step": 16 }, { "epoch": 0.012348761718838672, "grad_norm": 60.067823922331, "learning_rate": 5.666666666666667e-07, "loss": 7.1842, "step": 17 }, { "epoch": 0.013075159467005652, "grad_norm": 63.99871264593463, "learning_rate": 6.000000000000001e-07, "loss": 7.236, "step": 18 }, { "epoch": 0.013801557215172632, "grad_norm": 59.629646935909086, "learning_rate": 6.333333333333334e-07, "loss": 7.1603, "step": 19 }, { "epoch": 0.014527954963339614, "grad_norm": 62.1060954445667, "learning_rate": 6.666666666666667e-07, "loss": 7.283, "step": 20 }, { "epoch": 0.015254352711506594, "grad_norm": 60.05206589269812, "learning_rate": 7.000000000000001e-07, "loss": 7.186, "step": 21 }, { "epoch": 0.015980750459673575, "grad_norm": 59.645318014317375, "learning_rate": 7.333333333333334e-07, "loss": 7.1921, "step": 22 }, { "epoch": 0.016707148207840555, "grad_norm": 57.48650185086973, "learning_rate": 7.666666666666667e-07, "loss": 6.9704, "step": 23 }, { "epoch": 0.017433545956007535, "grad_norm": 53.42583321263901, "learning_rate": 8.000000000000001e-07, "loss": 6.8416, "step": 24 }, { "epoch": 0.01815994370417452, "grad_norm": 53.633682206139234, "learning_rate": 8.333333333333333e-07, "loss": 6.8657, "step": 25 }, { "epoch": 0.0188863414523415, "grad_norm": 53.611957520088204, "learning_rate": 8.666666666666668e-07, "loss": 6.8391, "step": 26 }, { "epoch": 0.01961273920050848, "grad_norm": 53.90927959387481, "learning_rate": 9.000000000000001e-07, "loss": 6.8457, "step": 27 }, { "epoch": 0.02033913694867546, "grad_norm": 52.34299208362781, "learning_rate": 9.333333333333334e-07, "loss": 6.7246, "step": 28 }, { "epoch": 0.021065534696842438, "grad_norm": 52.44583643778421, "learning_rate": 9.666666666666668e-07, "loss": 6.7693, "step": 29 }, { "epoch": 0.02179193244500942, "grad_norm": 50.904404917626735, "learning_rate": 1.0000000000000002e-06, "loss": 6.6598, "step": 30 }, { "epoch": 0.0225183301931764, "grad_norm": 51.76963606843732, "learning_rate": 1.0333333333333333e-06, "loss": 6.5898, "step": 31 }, { "epoch": 0.02324472794134338, "grad_norm": 51.843062752094205, "learning_rate": 1.066666666666667e-06, "loss": 6.4639, "step": 32 }, { "epoch": 0.02397112568951036, "grad_norm": 48.38768669965513, "learning_rate": 1.1e-06, "loss": 6.1057, "step": 33 }, { "epoch": 0.024697523437677345, "grad_norm": 47.75728492688787, "learning_rate": 1.1333333333333334e-06, "loss": 6.0672, "step": 34 }, { "epoch": 0.025423921185844325, "grad_norm": 47.1983740094125, "learning_rate": 1.1666666666666668e-06, "loss": 5.9977, "step": 35 }, { "epoch": 0.026150318934011305, "grad_norm": 47.80807555427671, "learning_rate": 1.2000000000000002e-06, "loss": 6.0389, "step": 36 }, { "epoch": 0.026876716682178284, "grad_norm": 46.78515259105002, "learning_rate": 1.2333333333333335e-06, "loss": 5.9493, "step": 37 }, { "epoch": 0.027603114430345264, "grad_norm": 46.44941548286642, "learning_rate": 1.2666666666666669e-06, "loss": 5.9096, "step": 38 }, { "epoch": 0.028329512178512248, "grad_norm": 47.87743744982369, "learning_rate": 1.3e-06, "loss": 5.957, "step": 39 }, { "epoch": 0.029055909926679228, "grad_norm": 47.69020729829381, "learning_rate": 1.3333333333333334e-06, "loss": 5.9052, "step": 40 }, { "epoch": 0.029782307674846208, "grad_norm": 45.65840416434142, "learning_rate": 1.3666666666666668e-06, "loss": 5.7094, "step": 41 }, { "epoch": 0.030508705423013188, "grad_norm": 46.52695760405757, "learning_rate": 1.4000000000000001e-06, "loss": 5.6376, "step": 42 }, { "epoch": 0.03123510317118017, "grad_norm": 47.27139026128274, "learning_rate": 1.4333333333333335e-06, "loss": 5.5706, "step": 43 }, { "epoch": 0.03196150091934715, "grad_norm": 46.74277727155727, "learning_rate": 1.4666666666666669e-06, "loss": 5.4399, "step": 44 }, { "epoch": 0.03268789866751413, "grad_norm": 46.31302781518561, "learning_rate": 1.5e-06, "loss": 5.305, "step": 45 }, { "epoch": 0.03341429641568111, "grad_norm": 47.29508573577366, "learning_rate": 1.5333333333333334e-06, "loss": 5.2508, "step": 46 }, { "epoch": 0.034140694163848094, "grad_norm": 46.5497242137799, "learning_rate": 1.566666666666667e-06, "loss": 5.1042, "step": 47 }, { "epoch": 0.03486709191201507, "grad_norm": 47.310997403673, "learning_rate": 1.6000000000000001e-06, "loss": 5.0298, "step": 48 }, { "epoch": 0.035593489660182054, "grad_norm": 47.85952489756574, "learning_rate": 1.6333333333333335e-06, "loss": 4.9936, "step": 49 }, { "epoch": 0.03631988740834904, "grad_norm": 48.28390256638122, "learning_rate": 1.6666666666666667e-06, "loss": 4.9282, "step": 50 }, { "epoch": 0.037046285156516014, "grad_norm": 47.36438783340036, "learning_rate": 1.7000000000000002e-06, "loss": 4.7947, "step": 51 }, { "epoch": 0.037772682904683, "grad_norm": 47.97444500743773, "learning_rate": 1.7333333333333336e-06, "loss": 4.7598, "step": 52 }, { "epoch": 0.038499080652849973, "grad_norm": 48.032965049328915, "learning_rate": 1.7666666666666668e-06, "loss": 4.7174, "step": 53 }, { "epoch": 0.03922547840101696, "grad_norm": 48.54449670206776, "learning_rate": 1.8000000000000001e-06, "loss": 4.6641, "step": 54 }, { "epoch": 0.03995187614918394, "grad_norm": 48.098423617340245, "learning_rate": 1.8333333333333333e-06, "loss": 4.5518, "step": 55 }, { "epoch": 0.04067827389735092, "grad_norm": 48.398816364766674, "learning_rate": 1.8666666666666669e-06, "loss": 4.4794, "step": 56 }, { "epoch": 0.0414046716455179, "grad_norm": 49.27527728561581, "learning_rate": 1.9000000000000002e-06, "loss": 4.4303, "step": 57 }, { "epoch": 0.042131069393684877, "grad_norm": 48.371954013728626, "learning_rate": 1.9333333333333336e-06, "loss": 4.3185, "step": 58 }, { "epoch": 0.04285746714185186, "grad_norm": 48.5608446730172, "learning_rate": 1.9666666666666668e-06, "loss": 4.2362, "step": 59 }, { "epoch": 0.04358386489001884, "grad_norm": 48.368964459448684, "learning_rate": 2.0000000000000003e-06, "loss": 4.1383, "step": 60 }, { "epoch": 0.04431026263818582, "grad_norm": 48.57285815251858, "learning_rate": 2.0333333333333335e-06, "loss": 4.0723, "step": 61 }, { "epoch": 0.0450366603863528, "grad_norm": 48.76734862112584, "learning_rate": 2.0666666666666666e-06, "loss": 4.0063, "step": 62 }, { "epoch": 0.04576305813451978, "grad_norm": 49.26145502427768, "learning_rate": 2.1000000000000002e-06, "loss": 3.957, "step": 63 }, { "epoch": 0.04648945588268676, "grad_norm": 49.50243947973019, "learning_rate": 2.133333333333334e-06, "loss": 3.8785, "step": 64 }, { "epoch": 0.047215853630853746, "grad_norm": 49.828107092810185, "learning_rate": 2.166666666666667e-06, "loss": 3.8176, "step": 65 }, { "epoch": 0.04794225137902072, "grad_norm": 49.26466685923202, "learning_rate": 2.2e-06, "loss": 3.7495, "step": 66 }, { "epoch": 0.048668649127187706, "grad_norm": 48.62847142711153, "learning_rate": 2.2333333333333333e-06, "loss": 3.6826, "step": 67 }, { "epoch": 0.04939504687535469, "grad_norm": 49.05074040352798, "learning_rate": 2.266666666666667e-06, "loss": 3.6296, "step": 68 }, { "epoch": 0.050121444623521666, "grad_norm": 49.862009737363984, "learning_rate": 2.3000000000000004e-06, "loss": 3.5896, "step": 69 }, { "epoch": 0.05084784237168865, "grad_norm": 50.69198753633318, "learning_rate": 2.3333333333333336e-06, "loss": 3.5724, "step": 70 }, { "epoch": 0.051574240119855626, "grad_norm": 50.03195157652246, "learning_rate": 2.3666666666666667e-06, "loss": 3.5113, "step": 71 }, { "epoch": 0.05230063786802261, "grad_norm": 49.23258661177582, "learning_rate": 2.4000000000000003e-06, "loss": 3.4314, "step": 72 }, { "epoch": 0.05302703561618959, "grad_norm": 50.580583101959995, "learning_rate": 2.4333333333333335e-06, "loss": 3.411, "step": 73 }, { "epoch": 0.05375343336435657, "grad_norm": 50.72488608293446, "learning_rate": 2.466666666666667e-06, "loss": 3.3533, "step": 74 }, { "epoch": 0.05447983111252355, "grad_norm": 50.6273218587908, "learning_rate": 2.5e-06, "loss": 3.3145, "step": 75 }, { "epoch": 0.05520622886069053, "grad_norm": 50.73713708166495, "learning_rate": 2.5333333333333338e-06, "loss": 3.2674, "step": 76 }, { "epoch": 0.05593262660885751, "grad_norm": 50.698736069610405, "learning_rate": 2.566666666666667e-06, "loss": 3.2224, "step": 77 }, { "epoch": 0.056659024357024496, "grad_norm": 49.95915915848869, "learning_rate": 2.6e-06, "loss": 3.1458, "step": 78 }, { "epoch": 0.05738542210519147, "grad_norm": 50.41144470850246, "learning_rate": 2.6333333333333332e-06, "loss": 3.1436, "step": 79 }, { "epoch": 0.058111819853358455, "grad_norm": 50.69282224043935, "learning_rate": 2.666666666666667e-06, "loss": 3.0908, "step": 80 }, { "epoch": 0.05883821760152543, "grad_norm": 49.800949375863084, "learning_rate": 2.7000000000000004e-06, "loss": 3.0296, "step": 81 }, { "epoch": 0.059564615349692415, "grad_norm": 51.039912830570024, "learning_rate": 2.7333333333333336e-06, "loss": 2.9943, "step": 82 }, { "epoch": 0.0602910130978594, "grad_norm": 49.70036616878881, "learning_rate": 2.766666666666667e-06, "loss": 2.9383, "step": 83 }, { "epoch": 0.061017410846026375, "grad_norm": 50.93003291238077, "learning_rate": 2.8000000000000003e-06, "loss": 2.9363, "step": 84 }, { "epoch": 0.06174380859419336, "grad_norm": 50.34453682192858, "learning_rate": 2.8333333333333335e-06, "loss": 2.8795, "step": 85 }, { "epoch": 0.06247020634236034, "grad_norm": 50.87658557775924, "learning_rate": 2.866666666666667e-06, "loss": 2.8499, "step": 86 }, { "epoch": 0.06319660409052733, "grad_norm": 49.93087461716983, "learning_rate": 2.9e-06, "loss": 2.8086, "step": 87 }, { "epoch": 0.0639230018386943, "grad_norm": 50.674279843425246, "learning_rate": 2.9333333333333338e-06, "loss": 2.7729, "step": 88 }, { "epoch": 0.06464939958686128, "grad_norm": 50.663286782284274, "learning_rate": 2.9666666666666673e-06, "loss": 2.7499, "step": 89 }, { "epoch": 0.06537579733502825, "grad_norm": 50.35183232947615, "learning_rate": 3e-06, "loss": 2.7004, "step": 90 }, { "epoch": 0.06610219508319524, "grad_norm": 50.447339204931495, "learning_rate": 3.0333333333333337e-06, "loss": 2.6782, "step": 91 }, { "epoch": 0.06682859283136222, "grad_norm": 50.08671877556182, "learning_rate": 3.066666666666667e-06, "loss": 2.6386, "step": 92 }, { "epoch": 0.0675549905795292, "grad_norm": 50.85016696537039, "learning_rate": 3.1000000000000004e-06, "loss": 2.6105, "step": 93 }, { "epoch": 0.06828138832769619, "grad_norm": 50.432484997573745, "learning_rate": 3.133333333333334e-06, "loss": 2.579, "step": 94 }, { "epoch": 0.06900778607586316, "grad_norm": 49.822703178081824, "learning_rate": 3.1666666666666667e-06, "loss": 2.5516, "step": 95 }, { "epoch": 0.06973418382403014, "grad_norm": 49.52642010085431, "learning_rate": 3.2000000000000003e-06, "loss": 2.4905, "step": 96 }, { "epoch": 0.07046058157219713, "grad_norm": 49.393946360680815, "learning_rate": 3.2333333333333334e-06, "loss": 2.4803, "step": 97 }, { "epoch": 0.07118697932036411, "grad_norm": 48.897469338046086, "learning_rate": 3.266666666666667e-06, "loss": 2.4483, "step": 98 }, { "epoch": 0.07191337706853108, "grad_norm": 49.378935763876314, "learning_rate": 3.3000000000000006e-06, "loss": 2.4292, "step": 99 }, { "epoch": 0.07263977481669807, "grad_norm": 50.41695005032472, "learning_rate": 3.3333333333333333e-06, "loss": 2.4022, "step": 100 }, { "epoch": 0.07336617256486505, "grad_norm": 50.2049353156204, "learning_rate": 3.366666666666667e-06, "loss": 2.3749, "step": 101 }, { "epoch": 0.07409257031303203, "grad_norm": 49.693566201239186, "learning_rate": 3.4000000000000005e-06, "loss": 2.3459, "step": 102 }, { "epoch": 0.074818968061199, "grad_norm": 48.84685424655744, "learning_rate": 3.4333333333333336e-06, "loss": 2.334, "step": 103 }, { "epoch": 0.075545365809366, "grad_norm": 47.62798075643798, "learning_rate": 3.4666666666666672e-06, "loss": 2.2763, "step": 104 }, { "epoch": 0.07627176355753297, "grad_norm": 48.19573541336029, "learning_rate": 3.5e-06, "loss": 2.265, "step": 105 }, { "epoch": 0.07699816130569995, "grad_norm": 49.45197671503261, "learning_rate": 3.5333333333333335e-06, "loss": 2.2395, "step": 106 }, { "epoch": 0.07772455905386694, "grad_norm": 47.40823487468658, "learning_rate": 3.566666666666667e-06, "loss": 2.2103, "step": 107 }, { "epoch": 0.07845095680203391, "grad_norm": 47.671328668080086, "learning_rate": 3.6000000000000003e-06, "loss": 2.1863, "step": 108 }, { "epoch": 0.07917735455020089, "grad_norm": 47.279964152668605, "learning_rate": 3.633333333333334e-06, "loss": 2.1527, "step": 109 }, { "epoch": 0.07990375229836788, "grad_norm": 46.06802031513997, "learning_rate": 3.6666666666666666e-06, "loss": 2.1353, "step": 110 }, { "epoch": 0.08063015004653486, "grad_norm": 46.73470181620622, "learning_rate": 3.7e-06, "loss": 2.1074, "step": 111 }, { "epoch": 0.08135654779470183, "grad_norm": 47.34253122512511, "learning_rate": 3.7333333333333337e-06, "loss": 2.0864, "step": 112 }, { "epoch": 0.08208294554286882, "grad_norm": 46.64992121387836, "learning_rate": 3.766666666666667e-06, "loss": 2.072, "step": 113 }, { "epoch": 0.0828093432910358, "grad_norm": 46.30918717601416, "learning_rate": 3.8000000000000005e-06, "loss": 2.0391, "step": 114 }, { "epoch": 0.08353574103920278, "grad_norm": 46.61354559014084, "learning_rate": 3.833333333333334e-06, "loss": 2.014, "step": 115 }, { "epoch": 0.08426213878736975, "grad_norm": 45.95154255629814, "learning_rate": 3.866666666666667e-06, "loss": 2.0111, "step": 116 }, { "epoch": 0.08498853653553674, "grad_norm": 45.17776725607547, "learning_rate": 3.900000000000001e-06, "loss": 1.9856, "step": 117 }, { "epoch": 0.08571493428370372, "grad_norm": 45.131837668001495, "learning_rate": 3.9333333333333335e-06, "loss": 1.9698, "step": 118 }, { "epoch": 0.0864413320318707, "grad_norm": 44.877128843538, "learning_rate": 3.966666666666667e-06, "loss": 1.9418, "step": 119 }, { "epoch": 0.08716772978003769, "grad_norm": 43.42147842225612, "learning_rate": 4.000000000000001e-06, "loss": 1.9271, "step": 120 }, { "epoch": 0.08789412752820466, "grad_norm": 43.9251239135038, "learning_rate": 4.033333333333333e-06, "loss": 1.9074, "step": 121 }, { "epoch": 0.08862052527637164, "grad_norm": 45.52313461713586, "learning_rate": 4.066666666666667e-06, "loss": 1.8972, "step": 122 }, { "epoch": 0.08934692302453863, "grad_norm": 43.96000925438749, "learning_rate": 4.1e-06, "loss": 1.867, "step": 123 }, { "epoch": 0.0900733207727056, "grad_norm": 43.27066380698389, "learning_rate": 4.133333333333333e-06, "loss": 1.8483, "step": 124 }, { "epoch": 0.09079971852087258, "grad_norm": 42.70094664578436, "learning_rate": 4.166666666666667e-06, "loss": 1.8224, "step": 125 }, { "epoch": 0.09152611626903956, "grad_norm": 41.5753156714769, "learning_rate": 4.2000000000000004e-06, "loss": 1.8262, "step": 126 }, { "epoch": 0.09225251401720655, "grad_norm": 41.898261769934656, "learning_rate": 4.233333333333334e-06, "loss": 1.787, "step": 127 }, { "epoch": 0.09297891176537353, "grad_norm": 42.0872491446625, "learning_rate": 4.266666666666668e-06, "loss": 1.7713, "step": 128 }, { "epoch": 0.0937053095135405, "grad_norm": 41.19380423975876, "learning_rate": 4.3e-06, "loss": 1.7568, "step": 129 }, { "epoch": 0.09443170726170749, "grad_norm": 40.680968528405074, "learning_rate": 4.333333333333334e-06, "loss": 1.7495, "step": 130 }, { "epoch": 0.09515810500987447, "grad_norm": 40.16939074899239, "learning_rate": 4.366666666666667e-06, "loss": 1.7404, "step": 131 }, { "epoch": 0.09588450275804145, "grad_norm": 39.72263478772674, "learning_rate": 4.4e-06, "loss": 1.7212, "step": 132 }, { "epoch": 0.09661090050620844, "grad_norm": 39.49181775364284, "learning_rate": 4.433333333333334e-06, "loss": 1.6969, "step": 133 }, { "epoch": 0.09733729825437541, "grad_norm": 39.04439034347531, "learning_rate": 4.4666666666666665e-06, "loss": 1.6875, "step": 134 }, { "epoch": 0.09806369600254239, "grad_norm": 38.443498008849005, "learning_rate": 4.5e-06, "loss": 1.693, "step": 135 }, { "epoch": 0.09879009375070938, "grad_norm": 38.36227491759409, "learning_rate": 4.533333333333334e-06, "loss": 1.659, "step": 136 }, { "epoch": 0.09951649149887636, "grad_norm": 37.3629590490201, "learning_rate": 4.566666666666667e-06, "loss": 1.6459, "step": 137 }, { "epoch": 0.10024288924704333, "grad_norm": 37.54793761307316, "learning_rate": 4.600000000000001e-06, "loss": 1.6365, "step": 138 }, { "epoch": 0.10096928699521031, "grad_norm": 36.18933616176955, "learning_rate": 4.633333333333334e-06, "loss": 1.631, "step": 139 }, { "epoch": 0.1016956847433773, "grad_norm": 36.69815961845787, "learning_rate": 4.666666666666667e-06, "loss": 1.6211, "step": 140 }, { "epoch": 0.10242208249154428, "grad_norm": 36.12103329793505, "learning_rate": 4.7e-06, "loss": 1.6008, "step": 141 }, { "epoch": 0.10314848023971125, "grad_norm": 34.75085594638176, "learning_rate": 4.7333333333333335e-06, "loss": 1.5941, "step": 142 }, { "epoch": 0.10387487798787824, "grad_norm": 35.264396011673654, "learning_rate": 4.766666666666667e-06, "loss": 1.5712, "step": 143 }, { "epoch": 0.10460127573604522, "grad_norm": 35.15237015020466, "learning_rate": 4.800000000000001e-06, "loss": 1.5638, "step": 144 }, { "epoch": 0.1053276734842122, "grad_norm": 34.263602593586796, "learning_rate": 4.833333333333333e-06, "loss": 1.554, "step": 145 }, { "epoch": 0.10605407123237919, "grad_norm": 33.753618127877616, "learning_rate": 4.866666666666667e-06, "loss": 1.5283, "step": 146 }, { "epoch": 0.10678046898054616, "grad_norm": 33.839298156814806, "learning_rate": 4.9000000000000005e-06, "loss": 1.5213, "step": 147 }, { "epoch": 0.10750686672871314, "grad_norm": 33.52850494542813, "learning_rate": 4.933333333333334e-06, "loss": 1.5176, "step": 148 }, { "epoch": 0.10823326447688013, "grad_norm": 32.69335919755698, "learning_rate": 4.966666666666667e-06, "loss": 1.5137, "step": 149 }, { "epoch": 0.1089596622250471, "grad_norm": 32.83997712477471, "learning_rate": 5e-06, "loss": 1.4922, "step": 150 }, { "epoch": 0.10968605997321408, "grad_norm": 31.70298573457259, "learning_rate": 5.033333333333333e-06, "loss": 1.4865, "step": 151 }, { "epoch": 0.11041245772138106, "grad_norm": 31.601502772523528, "learning_rate": 5.0666666666666676e-06, "loss": 1.4676, "step": 152 }, { "epoch": 0.11113885546954805, "grad_norm": 31.921384648076824, "learning_rate": 5.1e-06, "loss": 1.4523, "step": 153 }, { "epoch": 0.11186525321771502, "grad_norm": 31.225511458265583, "learning_rate": 5.133333333333334e-06, "loss": 1.4455, "step": 154 }, { "epoch": 0.112591650965882, "grad_norm": 30.32788022406298, "learning_rate": 5.1666666666666675e-06, "loss": 1.4522, "step": 155 }, { "epoch": 0.11331804871404899, "grad_norm": 30.167607122123012, "learning_rate": 5.2e-06, "loss": 1.4346, "step": 156 }, { "epoch": 0.11404444646221597, "grad_norm": 29.780741495847135, "learning_rate": 5.233333333333334e-06, "loss": 1.4242, "step": 157 }, { "epoch": 0.11477084421038294, "grad_norm": 29.145633047690872, "learning_rate": 5.2666666666666665e-06, "loss": 1.4239, "step": 158 }, { "epoch": 0.11549724195854993, "grad_norm": 28.905395314413873, "learning_rate": 5.300000000000001e-06, "loss": 1.4028, "step": 159 }, { "epoch": 0.11622363970671691, "grad_norm": 27.95492252225459, "learning_rate": 5.333333333333334e-06, "loss": 1.4024, "step": 160 }, { "epoch": 0.11695003745488389, "grad_norm": 28.364454949347795, "learning_rate": 5.366666666666666e-06, "loss": 1.3966, "step": 161 }, { "epoch": 0.11767643520305086, "grad_norm": 27.595047874488912, "learning_rate": 5.400000000000001e-06, "loss": 1.3804, "step": 162 }, { "epoch": 0.11840283295121785, "grad_norm": 27.224371964119193, "learning_rate": 5.4333333333333335e-06, "loss": 1.374, "step": 163 }, { "epoch": 0.11912923069938483, "grad_norm": 26.950172876003883, "learning_rate": 5.466666666666667e-06, "loss": 1.3664, "step": 164 }, { "epoch": 0.1198556284475518, "grad_norm": 26.807043896561844, "learning_rate": 5.500000000000001e-06, "loss": 1.3463, "step": 165 }, { "epoch": 0.1205820261957188, "grad_norm": 25.770582681589264, "learning_rate": 5.533333333333334e-06, "loss": 1.3464, "step": 166 }, { "epoch": 0.12130842394388577, "grad_norm": 26.002994801093113, "learning_rate": 5.566666666666667e-06, "loss": 1.3223, "step": 167 }, { "epoch": 0.12203482169205275, "grad_norm": 25.487072733988494, "learning_rate": 5.600000000000001e-06, "loss": 1.3284, "step": 168 }, { "epoch": 0.12276121944021974, "grad_norm": 24.657995803131406, "learning_rate": 5.633333333333334e-06, "loss": 1.3275, "step": 169 }, { "epoch": 0.12348761718838672, "grad_norm": 24.472525282080856, "learning_rate": 5.666666666666667e-06, "loss": 1.3198, "step": 170 }, { "epoch": 0.1242140149365537, "grad_norm": 24.396214212939338, "learning_rate": 5.7e-06, "loss": 1.3064, "step": 171 }, { "epoch": 0.12494041268472068, "grad_norm": 23.954881409211772, "learning_rate": 5.733333333333334e-06, "loss": 1.302, "step": 172 }, { "epoch": 0.12566681043288766, "grad_norm": 23.260344993149822, "learning_rate": 5.766666666666667e-06, "loss": 1.2848, "step": 173 }, { "epoch": 0.12639320818105465, "grad_norm": 23.023966618398276, "learning_rate": 5.8e-06, "loss": 1.284, "step": 174 }, { "epoch": 0.1271196059292216, "grad_norm": 22.329009297749824, "learning_rate": 5.833333333333334e-06, "loss": 1.2843, "step": 175 }, { "epoch": 0.1278460036773886, "grad_norm": 22.5351171358773, "learning_rate": 5.8666666666666675e-06, "loss": 1.2682, "step": 176 }, { "epoch": 0.1285724014255556, "grad_norm": 22.009496715465836, "learning_rate": 5.9e-06, "loss": 1.2625, "step": 177 }, { "epoch": 0.12929879917372256, "grad_norm": 21.965579290780184, "learning_rate": 5.933333333333335e-06, "loss": 1.2579, "step": 178 }, { "epoch": 0.13002519692188955, "grad_norm": 21.682618987574696, "learning_rate": 5.966666666666667e-06, "loss": 1.2549, "step": 179 }, { "epoch": 0.1307515946700565, "grad_norm": 21.080469805135827, "learning_rate": 6e-06, "loss": 1.2388, "step": 180 }, { "epoch": 0.1314779924182235, "grad_norm": 20.967610245749125, "learning_rate": 6.033333333333335e-06, "loss": 1.2265, "step": 181 }, { "epoch": 0.1322043901663905, "grad_norm": 20.1492358534322, "learning_rate": 6.066666666666667e-06, "loss": 1.2454, "step": 182 }, { "epoch": 0.13293078791455745, "grad_norm": 19.607412502409314, "learning_rate": 6.1e-06, "loss": 1.2614, "step": 183 }, { "epoch": 0.13365718566272444, "grad_norm": 19.346758211452215, "learning_rate": 6.133333333333334e-06, "loss": 1.2399, "step": 184 }, { "epoch": 0.13438358341089143, "grad_norm": 19.257583489746565, "learning_rate": 6.166666666666667e-06, "loss": 1.232, "step": 185 }, { "epoch": 0.1351099811590584, "grad_norm": 18.86454673675082, "learning_rate": 6.200000000000001e-06, "loss": 1.219, "step": 186 }, { "epoch": 0.13583637890722539, "grad_norm": 18.46080931559179, "learning_rate": 6.2333333333333335e-06, "loss": 1.2205, "step": 187 }, { "epoch": 0.13656277665539238, "grad_norm": 18.28704328355964, "learning_rate": 6.266666666666668e-06, "loss": 1.2031, "step": 188 }, { "epoch": 0.13728917440355934, "grad_norm": 18.229753602356926, "learning_rate": 6.300000000000001e-06, "loss": 1.2197, "step": 189 }, { "epoch": 0.13801557215172633, "grad_norm": 17.836618035828575, "learning_rate": 6.333333333333333e-06, "loss": 1.1986, "step": 190 }, { "epoch": 0.13874196989989332, "grad_norm": 17.524100548365034, "learning_rate": 6.366666666666668e-06, "loss": 1.2009, "step": 191 }, { "epoch": 0.13946836764806028, "grad_norm": 17.365264025546907, "learning_rate": 6.4000000000000006e-06, "loss": 1.1699, "step": 192 }, { "epoch": 0.14019476539622727, "grad_norm": 17.146603082680905, "learning_rate": 6.433333333333333e-06, "loss": 1.1738, "step": 193 }, { "epoch": 0.14092116314439426, "grad_norm": 16.859962202084272, "learning_rate": 6.466666666666667e-06, "loss": 1.1783, "step": 194 }, { "epoch": 0.14164756089256123, "grad_norm": 16.420121457022596, "learning_rate": 6.5000000000000004e-06, "loss": 1.1685, "step": 195 }, { "epoch": 0.14237395864072822, "grad_norm": 16.275858682293666, "learning_rate": 6.533333333333334e-06, "loss": 1.1689, "step": 196 }, { "epoch": 0.1431003563888952, "grad_norm": 15.72759862599527, "learning_rate": 6.566666666666667e-06, "loss": 1.1618, "step": 197 }, { "epoch": 0.14382675413706217, "grad_norm": 15.213709034432256, "learning_rate": 6.600000000000001e-06, "loss": 1.1834, "step": 198 }, { "epoch": 0.14455315188522916, "grad_norm": 15.44383632151497, "learning_rate": 6.633333333333334e-06, "loss": 1.1606, "step": 199 }, { "epoch": 0.14527954963339615, "grad_norm": 15.0297402495118, "learning_rate": 6.666666666666667e-06, "loss": 1.1643, "step": 200 }, { "epoch": 0.1460059473815631, "grad_norm": 14.616753595774137, "learning_rate": 6.700000000000001e-06, "loss": 1.1572, "step": 201 }, { "epoch": 0.1467323451297301, "grad_norm": 14.988102097685823, "learning_rate": 6.733333333333334e-06, "loss": 1.1223, "step": 202 }, { "epoch": 0.1474587428778971, "grad_norm": 14.154002560143741, "learning_rate": 6.7666666666666665e-06, "loss": 1.1482, "step": 203 }, { "epoch": 0.14818514062606405, "grad_norm": 14.070198384045403, "learning_rate": 6.800000000000001e-06, "loss": 1.137, "step": 204 }, { "epoch": 0.14891153837423105, "grad_norm": 14.00683253820219, "learning_rate": 6.833333333333334e-06, "loss": 1.1349, "step": 205 }, { "epoch": 0.149637936122398, "grad_norm": 13.651960917625363, "learning_rate": 6.866666666666667e-06, "loss": 1.1313, "step": 206 }, { "epoch": 0.150364333870565, "grad_norm": 13.649166736813594, "learning_rate": 6.9e-06, "loss": 1.1372, "step": 207 }, { "epoch": 0.151090731618732, "grad_norm": 13.321459953919788, "learning_rate": 6.9333333333333344e-06, "loss": 1.121, "step": 208 }, { "epoch": 0.15181712936689895, "grad_norm": 13.18972666324515, "learning_rate": 6.966666666666667e-06, "loss": 1.1196, "step": 209 }, { "epoch": 0.15254352711506594, "grad_norm": 13.023718213326685, "learning_rate": 7e-06, "loss": 1.1161, "step": 210 }, { "epoch": 0.15326992486323293, "grad_norm": 12.778850791700851, "learning_rate": 7.033333333333334e-06, "loss": 1.1333, "step": 211 }, { "epoch": 0.1539963226113999, "grad_norm": 12.636516960484471, "learning_rate": 7.066666666666667e-06, "loss": 1.1124, "step": 212 }, { "epoch": 0.15472272035956688, "grad_norm": 12.497406739744827, "learning_rate": 7.100000000000001e-06, "loss": 1.1297, "step": 213 }, { "epoch": 0.15544911810773387, "grad_norm": 12.253086279848734, "learning_rate": 7.133333333333334e-06, "loss": 1.108, "step": 214 }, { "epoch": 0.15617551585590084, "grad_norm": 12.301343331028344, "learning_rate": 7.166666666666667e-06, "loss": 1.0704, "step": 215 }, { "epoch": 0.15690191360406783, "grad_norm": 11.856234190055382, "learning_rate": 7.2000000000000005e-06, "loss": 1.106, "step": 216 }, { "epoch": 0.15762831135223482, "grad_norm": 11.58489034219279, "learning_rate": 7.233333333333334e-06, "loss": 1.1039, "step": 217 }, { "epoch": 0.15835470910040178, "grad_norm": 11.40452942412662, "learning_rate": 7.266666666666668e-06, "loss": 1.107, "step": 218 }, { "epoch": 0.15908110684856877, "grad_norm": 11.492550763214666, "learning_rate": 7.3e-06, "loss": 1.0881, "step": 219 }, { "epoch": 0.15980750459673576, "grad_norm": 11.269901967107916, "learning_rate": 7.333333333333333e-06, "loss": 1.0904, "step": 220 }, { "epoch": 0.16053390234490272, "grad_norm": 10.96158775655295, "learning_rate": 7.3666666666666676e-06, "loss": 1.0952, "step": 221 }, { "epoch": 0.16126030009306971, "grad_norm": 11.13901163124956, "learning_rate": 7.4e-06, "loss": 1.074, "step": 222 }, { "epoch": 0.1619866978412367, "grad_norm": 10.84361490879539, "learning_rate": 7.433333333333334e-06, "loss": 1.0775, "step": 223 }, { "epoch": 0.16271309558940367, "grad_norm": 10.809242398633671, "learning_rate": 7.4666666666666675e-06, "loss": 1.0599, "step": 224 }, { "epoch": 0.16343949333757066, "grad_norm": 10.357828962160399, "learning_rate": 7.500000000000001e-06, "loss": 1.0719, "step": 225 }, { "epoch": 0.16416589108573765, "grad_norm": 10.16294538034498, "learning_rate": 7.533333333333334e-06, "loss": 1.0737, "step": 226 }, { "epoch": 0.1648922888339046, "grad_norm": 10.39061500039731, "learning_rate": 7.566666666666667e-06, "loss": 1.0556, "step": 227 }, { "epoch": 0.1656186865820716, "grad_norm": 10.221493241719058, "learning_rate": 7.600000000000001e-06, "loss": 1.0476, "step": 228 }, { "epoch": 0.16634508433023856, "grad_norm": 9.907815020465414, "learning_rate": 7.633333333333334e-06, "loss": 1.0553, "step": 229 }, { "epoch": 0.16707148207840555, "grad_norm": 9.581041133597106, "learning_rate": 7.666666666666667e-06, "loss": 1.0775, "step": 230 }, { "epoch": 0.16779787982657254, "grad_norm": 9.71168279973024, "learning_rate": 7.7e-06, "loss": 1.0639, "step": 231 }, { "epoch": 0.1685242775747395, "grad_norm": 9.551789851749682, "learning_rate": 7.733333333333334e-06, "loss": 1.0503, "step": 232 }, { "epoch": 0.1692506753229065, "grad_norm": 9.376979470265642, "learning_rate": 7.766666666666666e-06, "loss": 1.0605, "step": 233 }, { "epoch": 0.1699770730710735, "grad_norm": 9.215487743984946, "learning_rate": 7.800000000000002e-06, "loss": 1.0529, "step": 234 }, { "epoch": 0.17070347081924045, "grad_norm": 9.023321477961552, "learning_rate": 7.833333333333333e-06, "loss": 1.06, "step": 235 }, { "epoch": 0.17142986856740744, "grad_norm": 8.978074772661731, "learning_rate": 7.866666666666667e-06, "loss": 1.0296, "step": 236 }, { "epoch": 0.17215626631557443, "grad_norm": 8.837716165817618, "learning_rate": 7.9e-06, "loss": 1.0255, "step": 237 }, { "epoch": 0.1728826640637414, "grad_norm": 8.634089579695464, "learning_rate": 7.933333333333334e-06, "loss": 1.0436, "step": 238 }, { "epoch": 0.17360906181190838, "grad_norm": 8.239794943770667, "learning_rate": 7.966666666666668e-06, "loss": 1.0565, "step": 239 }, { "epoch": 0.17433545956007537, "grad_norm": 8.517474915317166, "learning_rate": 8.000000000000001e-06, "loss": 1.0147, "step": 240 }, { "epoch": 0.17506185730824234, "grad_norm": 8.270883045328144, "learning_rate": 8.033333333333335e-06, "loss": 1.0425, "step": 241 }, { "epoch": 0.17578825505640933, "grad_norm": 8.19007825276564, "learning_rate": 8.066666666666667e-06, "loss": 1.0255, "step": 242 }, { "epoch": 0.17651465280457632, "grad_norm": 8.020665101381212, "learning_rate": 8.1e-06, "loss": 1.0372, "step": 243 }, { "epoch": 0.17724105055274328, "grad_norm": 8.078219951557776, "learning_rate": 8.133333333333334e-06, "loss": 1.023, "step": 244 }, { "epoch": 0.17796744830091027, "grad_norm": 7.830702554013111, "learning_rate": 8.166666666666668e-06, "loss": 1.0366, "step": 245 }, { "epoch": 0.17869384604907726, "grad_norm": 7.820391288845364, "learning_rate": 8.2e-06, "loss": 1.0234, "step": 246 }, { "epoch": 0.17942024379724422, "grad_norm": 7.607710068511707, "learning_rate": 8.233333333333335e-06, "loss": 1.0226, "step": 247 }, { "epoch": 0.1801466415454112, "grad_norm": 7.541222252515193, "learning_rate": 8.266666666666667e-06, "loss": 1.0205, "step": 248 }, { "epoch": 0.1808730392935782, "grad_norm": 7.602914271454926, "learning_rate": 8.3e-06, "loss": 1.0057, "step": 249 }, { "epoch": 0.18159943704174517, "grad_norm": 7.41561445398044, "learning_rate": 8.333333333333334e-06, "loss": 1.0091, "step": 250 }, { "epoch": 0.18232583478991216, "grad_norm": 7.428385752483024, "learning_rate": 8.366666666666667e-06, "loss": 1.0091, "step": 251 }, { "epoch": 0.18305223253807912, "grad_norm": 7.215990147777718, "learning_rate": 8.400000000000001e-06, "loss": 1.0165, "step": 252 }, { "epoch": 0.1837786302862461, "grad_norm": 7.137875961690411, "learning_rate": 8.433333333333334e-06, "loss": 0.9942, "step": 253 }, { "epoch": 0.1845050280344131, "grad_norm": 7.178095525054059, "learning_rate": 8.466666666666668e-06, "loss": 0.9913, "step": 254 }, { "epoch": 0.18523142578258006, "grad_norm": 6.8632846366421285, "learning_rate": 8.5e-06, "loss": 1.0196, "step": 255 }, { "epoch": 0.18595782353074705, "grad_norm": 6.91059218104985, "learning_rate": 8.533333333333335e-06, "loss": 1.0035, "step": 256 }, { "epoch": 0.18668422127891404, "grad_norm": 6.756354737848418, "learning_rate": 8.566666666666667e-06, "loss": 1.0071, "step": 257 }, { "epoch": 0.187410619027081, "grad_norm": 6.672245371900697, "learning_rate": 8.6e-06, "loss": 1.0146, "step": 258 }, { "epoch": 0.188137016775248, "grad_norm": 6.664492580555999, "learning_rate": 8.633333333333334e-06, "loss": 1.0041, "step": 259 }, { "epoch": 0.18886341452341499, "grad_norm": 6.630116579252964, "learning_rate": 8.666666666666668e-06, "loss": 0.9909, "step": 260 }, { "epoch": 0.18958981227158195, "grad_norm": 6.540452718326813, "learning_rate": 8.700000000000001e-06, "loss": 0.9957, "step": 261 }, { "epoch": 0.19031621001974894, "grad_norm": 6.341367627655762, "learning_rate": 8.733333333333333e-06, "loss": 1.0009, "step": 262 }, { "epoch": 0.19104260776791593, "grad_norm": 6.234409930165991, "learning_rate": 8.766666666666669e-06, "loss": 1.001, "step": 263 }, { "epoch": 0.1917690055160829, "grad_norm": 6.305502441912889, "learning_rate": 8.8e-06, "loss": 0.9935, "step": 264 }, { "epoch": 0.19249540326424988, "grad_norm": 6.225158071475369, "learning_rate": 8.833333333333334e-06, "loss": 1.0081, "step": 265 }, { "epoch": 0.19322180101241687, "grad_norm": 6.058144207484882, "learning_rate": 8.866666666666668e-06, "loss": 0.9841, "step": 266 }, { "epoch": 0.19394819876058383, "grad_norm": 5.98628355833425, "learning_rate": 8.900000000000001e-06, "loss": 0.9876, "step": 267 }, { "epoch": 0.19467459650875082, "grad_norm": 5.943991083458489, "learning_rate": 8.933333333333333e-06, "loss": 0.997, "step": 268 }, { "epoch": 0.19540099425691781, "grad_norm": 5.873982099655574, "learning_rate": 8.966666666666667e-06, "loss": 0.978, "step": 269 }, { "epoch": 0.19612739200508478, "grad_norm": 5.895132355616301, "learning_rate": 9e-06, "loss": 0.9741, "step": 270 }, { "epoch": 0.19685378975325177, "grad_norm": 5.769369464159913, "learning_rate": 9.033333333333334e-06, "loss": 0.9732, "step": 271 }, { "epoch": 0.19758018750141876, "grad_norm": 5.576420099083153, "learning_rate": 9.066666666666667e-06, "loss": 0.9892, "step": 272 }, { "epoch": 0.19830658524958572, "grad_norm": 5.674605536835416, "learning_rate": 9.100000000000001e-06, "loss": 0.9647, "step": 273 }, { "epoch": 0.1990329829977527, "grad_norm": 5.689962851483039, "learning_rate": 9.133333333333335e-06, "loss": 0.9561, "step": 274 }, { "epoch": 0.1997593807459197, "grad_norm": 5.451787863746172, "learning_rate": 9.166666666666666e-06, "loss": 0.9693, "step": 275 }, { "epoch": 0.20048577849408666, "grad_norm": 5.508273579548314, "learning_rate": 9.200000000000002e-06, "loss": 0.9476, "step": 276 }, { "epoch": 0.20121217624225365, "grad_norm": 5.346002804833987, "learning_rate": 9.233333333333334e-06, "loss": 0.9492, "step": 277 }, { "epoch": 0.20193857399042062, "grad_norm": 5.315336149698185, "learning_rate": 9.266666666666667e-06, "loss": 0.9608, "step": 278 }, { "epoch": 0.2026649717385876, "grad_norm": 5.215861439520431, "learning_rate": 9.3e-06, "loss": 0.9854, "step": 279 }, { "epoch": 0.2033913694867546, "grad_norm": 5.193177826479856, "learning_rate": 9.333333333333334e-06, "loss": 0.9604, "step": 280 }, { "epoch": 0.20411776723492156, "grad_norm": 5.123020615984874, "learning_rate": 9.366666666666668e-06, "loss": 0.9585, "step": 281 }, { "epoch": 0.20484416498308855, "grad_norm": 5.029202178322807, "learning_rate": 9.4e-06, "loss": 0.9733, "step": 282 }, { "epoch": 0.20557056273125554, "grad_norm": 5.013483166254676, "learning_rate": 9.433333333333335e-06, "loss": 0.9684, "step": 283 }, { "epoch": 0.2062969604794225, "grad_norm": 4.887824924314841, "learning_rate": 9.466666666666667e-06, "loss": 0.9734, "step": 284 }, { "epoch": 0.2070233582275895, "grad_norm": 4.975153535316991, "learning_rate": 9.5e-06, "loss": 0.9635, "step": 285 }, { "epoch": 0.20774975597575648, "grad_norm": 4.856621018396769, "learning_rate": 9.533333333333334e-06, "loss": 0.9523, "step": 286 }, { "epoch": 0.20847615372392345, "grad_norm": 4.921703565848544, "learning_rate": 9.566666666666668e-06, "loss": 0.939, "step": 287 }, { "epoch": 0.20920255147209044, "grad_norm": 4.759014226910551, "learning_rate": 9.600000000000001e-06, "loss": 0.9514, "step": 288 }, { "epoch": 0.20992894922025743, "grad_norm": 4.850649945410091, "learning_rate": 9.633333333333335e-06, "loss": 0.9423, "step": 289 }, { "epoch": 0.2106553469684244, "grad_norm": 4.7510300916535835, "learning_rate": 9.666666666666667e-06, "loss": 0.9375, "step": 290 }, { "epoch": 0.21138174471659138, "grad_norm": 4.631844761995069, "learning_rate": 9.7e-06, "loss": 0.9498, "step": 291 }, { "epoch": 0.21210814246475837, "grad_norm": 4.6840553260562094, "learning_rate": 9.733333333333334e-06, "loss": 0.9318, "step": 292 }, { "epoch": 0.21283454021292533, "grad_norm": 4.64051963705808, "learning_rate": 9.766666666666667e-06, "loss": 0.935, "step": 293 }, { "epoch": 0.21356093796109232, "grad_norm": 4.510960059913492, "learning_rate": 9.800000000000001e-06, "loss": 0.9431, "step": 294 }, { "epoch": 0.2142873357092593, "grad_norm": 4.407943102592227, "learning_rate": 9.833333333333333e-06, "loss": 0.9538, "step": 295 }, { "epoch": 0.21501373345742628, "grad_norm": 4.477665809742677, "learning_rate": 9.866666666666668e-06, "loss": 0.9414, "step": 296 }, { "epoch": 0.21574013120559327, "grad_norm": 4.3658879124763885, "learning_rate": 9.9e-06, "loss": 0.9475, "step": 297 }, { "epoch": 0.21646652895376026, "grad_norm": 4.3068037062213875, "learning_rate": 9.933333333333334e-06, "loss": 0.9423, "step": 298 }, { "epoch": 0.21719292670192722, "grad_norm": 4.285180819974754, "learning_rate": 9.966666666666667e-06, "loss": 0.9399, "step": 299 }, { "epoch": 0.2179193244500942, "grad_norm": 4.229079706272251, "learning_rate": 1e-05, "loss": 0.9482, "step": 300 }, { "epoch": 0.21864572219826117, "grad_norm": 4.281554504080381, "learning_rate": 9.99999943011404e-06, "loss": 0.9175, "step": 301 }, { "epoch": 0.21937211994642816, "grad_norm": 4.169777969455729, "learning_rate": 9.999997720456285e-06, "loss": 0.9479, "step": 302 }, { "epoch": 0.22009851769459515, "grad_norm": 4.1699923475025855, "learning_rate": 9.999994871027128e-06, "loss": 0.925, "step": 303 }, { "epoch": 0.22082491544276212, "grad_norm": 4.125376173668559, "learning_rate": 9.999990881827218e-06, "loss": 0.9263, "step": 304 }, { "epoch": 0.2215513131909291, "grad_norm": 4.075622019823068, "learning_rate": 9.999985752857464e-06, "loss": 0.9415, "step": 305 }, { "epoch": 0.2222777109390961, "grad_norm": 4.087232981051823, "learning_rate": 9.999979484119035e-06, "loss": 0.9023, "step": 306 }, { "epoch": 0.22300410868726306, "grad_norm": 3.9626016761472567, "learning_rate": 9.99997207561336e-06, "loss": 0.9274, "step": 307 }, { "epoch": 0.22373050643543005, "grad_norm": 3.9837800051798036, "learning_rate": 9.999963527342128e-06, "loss": 0.9256, "step": 308 }, { "epoch": 0.22445690418359704, "grad_norm": 3.916777154550035, "learning_rate": 9.999953839307287e-06, "loss": 0.9217, "step": 309 }, { "epoch": 0.225183301931764, "grad_norm": 3.8728958068319006, "learning_rate": 9.999943011511045e-06, "loss": 0.9275, "step": 310 }, { "epoch": 0.225909699679931, "grad_norm": 3.8132974533118, "learning_rate": 9.999931043955876e-06, "loss": 0.9207, "step": 311 }, { "epoch": 0.22663609742809798, "grad_norm": 3.841173198735151, "learning_rate": 9.999917936644498e-06, "loss": 0.9086, "step": 312 }, { "epoch": 0.22736249517626494, "grad_norm": 3.8065529809421452, "learning_rate": 9.999903689579906e-06, "loss": 0.9273, "step": 313 }, { "epoch": 0.22808889292443194, "grad_norm": 3.7651904589221785, "learning_rate": 9.999888302765347e-06, "loss": 0.9189, "step": 314 }, { "epoch": 0.22881529067259893, "grad_norm": 3.7601408395621334, "learning_rate": 9.999871776204325e-06, "loss": 0.9134, "step": 315 }, { "epoch": 0.2295416884207659, "grad_norm": 3.670695667395263, "learning_rate": 9.99985410990061e-06, "loss": 0.916, "step": 316 }, { "epoch": 0.23026808616893288, "grad_norm": 3.6252352338521305, "learning_rate": 9.999835303858231e-06, "loss": 0.9134, "step": 317 }, { "epoch": 0.23099448391709987, "grad_norm": 3.6643475103199106, "learning_rate": 9.99981535808147e-06, "loss": 0.9056, "step": 318 }, { "epoch": 0.23172088166526683, "grad_norm": 3.6490134106546113, "learning_rate": 9.99979427257488e-06, "loss": 0.908, "step": 319 }, { "epoch": 0.23244727941343382, "grad_norm": 3.595270690577472, "learning_rate": 9.999772047343259e-06, "loss": 0.9089, "step": 320 }, { "epoch": 0.2331736771616008, "grad_norm": 3.5270211803260803, "learning_rate": 9.999748682391682e-06, "loss": 0.9042, "step": 321 }, { "epoch": 0.23390007490976777, "grad_norm": 3.526672305447795, "learning_rate": 9.99972417772547e-06, "loss": 0.9027, "step": 322 }, { "epoch": 0.23462647265793476, "grad_norm": 3.4884435048204785, "learning_rate": 9.99969853335021e-06, "loss": 0.9147, "step": 323 }, { "epoch": 0.23535287040610173, "grad_norm": 3.466461119074874, "learning_rate": 9.99967174927175e-06, "loss": 0.8831, "step": 324 }, { "epoch": 0.23607926815426872, "grad_norm": 3.383397947245138, "learning_rate": 9.99964382549619e-06, "loss": 0.9125, "step": 325 }, { "epoch": 0.2368056659024357, "grad_norm": 3.442604009677502, "learning_rate": 9.999614762029901e-06, "loss": 0.8934, "step": 326 }, { "epoch": 0.23753206365060267, "grad_norm": 3.355588421676241, "learning_rate": 9.999584558879507e-06, "loss": 0.9061, "step": 327 }, { "epoch": 0.23825846139876966, "grad_norm": 3.266053823556341, "learning_rate": 9.999553216051892e-06, "loss": 0.9182, "step": 328 }, { "epoch": 0.23898485914693665, "grad_norm": 3.3662912679017603, "learning_rate": 9.9995207335542e-06, "loss": 0.9014, "step": 329 }, { "epoch": 0.2397112568951036, "grad_norm": 3.286115029330833, "learning_rate": 9.999487111393836e-06, "loss": 0.9102, "step": 330 }, { "epoch": 0.2404376546432706, "grad_norm": 3.337503631773769, "learning_rate": 9.999452349578465e-06, "loss": 0.9128, "step": 331 }, { "epoch": 0.2411640523914376, "grad_norm": 3.1928364364917896, "learning_rate": 9.999416448116011e-06, "loss": 0.9107, "step": 332 }, { "epoch": 0.24189045013960456, "grad_norm": 3.1773858850837833, "learning_rate": 9.999379407014656e-06, "loss": 0.9102, "step": 333 }, { "epoch": 0.24261684788777155, "grad_norm": 3.1494952919561117, "learning_rate": 9.999341226282848e-06, "loss": 0.9053, "step": 334 }, { "epoch": 0.24334324563593854, "grad_norm": 3.1585806465130704, "learning_rate": 9.999301905929286e-06, "loss": 0.9027, "step": 335 }, { "epoch": 0.2440696433841055, "grad_norm": 3.153378709522304, "learning_rate": 9.999261445962936e-06, "loss": 0.8965, "step": 336 }, { "epoch": 0.2447960411322725, "grad_norm": 3.1271021116928215, "learning_rate": 9.999219846393018e-06, "loss": 0.9022, "step": 337 }, { "epoch": 0.24552243888043948, "grad_norm": 3.1009031288222775, "learning_rate": 9.999177107229019e-06, "loss": 0.9013, "step": 338 }, { "epoch": 0.24624883662860644, "grad_norm": 3.1087383815587244, "learning_rate": 9.999133228480679e-06, "loss": 0.9037, "step": 339 }, { "epoch": 0.24697523437677343, "grad_norm": 3.080267158750676, "learning_rate": 9.999088210158001e-06, "loss": 0.8849, "step": 340 }, { "epoch": 0.24770163212494042, "grad_norm": 2.999486292773668, "learning_rate": 9.999042052271247e-06, "loss": 0.8937, "step": 341 }, { "epoch": 0.2484280298731074, "grad_norm": 3.013895982835644, "learning_rate": 9.99899475483094e-06, "loss": 0.8888, "step": 342 }, { "epoch": 0.24915442762127438, "grad_norm": 3.073758208806406, "learning_rate": 9.998946317847857e-06, "loss": 0.8602, "step": 343 }, { "epoch": 0.24988082536944137, "grad_norm": 2.9286726313576112, "learning_rate": 9.998896741333047e-06, "loss": 0.9008, "step": 344 }, { "epoch": 0.25060722311760836, "grad_norm": 2.910650348601522, "learning_rate": 9.998846025297804e-06, "loss": 0.895, "step": 345 }, { "epoch": 0.2513336208657753, "grad_norm": 2.8993230794505838, "learning_rate": 9.998794169753694e-06, "loss": 0.8841, "step": 346 }, { "epoch": 0.2520600186139423, "grad_norm": 2.9168647806374617, "learning_rate": 9.998741174712534e-06, "loss": 0.8989, "step": 347 }, { "epoch": 0.2527864163621093, "grad_norm": 2.9351727444896283, "learning_rate": 9.998687040186407e-06, "loss": 0.8851, "step": 348 }, { "epoch": 0.25351281411027626, "grad_norm": 2.823829131544092, "learning_rate": 9.998631766187651e-06, "loss": 0.8873, "step": 349 }, { "epoch": 0.2542392118584432, "grad_norm": 2.8207151518990448, "learning_rate": 9.998575352728869e-06, "loss": 0.8808, "step": 350 }, { "epoch": 0.25496560960661024, "grad_norm": 2.881827280089076, "learning_rate": 9.99851779982292e-06, "loss": 0.8792, "step": 351 }, { "epoch": 0.2556920073547772, "grad_norm": 2.839308859851771, "learning_rate": 9.998459107482922e-06, "loss": 0.8716, "step": 352 }, { "epoch": 0.25641840510294417, "grad_norm": 2.7473965822142743, "learning_rate": 9.998399275722253e-06, "loss": 0.8842, "step": 353 }, { "epoch": 0.2571448028511112, "grad_norm": 2.762628380371553, "learning_rate": 9.998338304554553e-06, "loss": 0.8847, "step": 354 }, { "epoch": 0.25787120059927815, "grad_norm": 2.7322944562152616, "learning_rate": 9.998276193993723e-06, "loss": 0.8605, "step": 355 }, { "epoch": 0.2585975983474451, "grad_norm": 2.666398101224024, "learning_rate": 9.99821294405392e-06, "loss": 0.8867, "step": 356 }, { "epoch": 0.25932399609561213, "grad_norm": 2.710835958236394, "learning_rate": 9.998148554749561e-06, "loss": 0.8634, "step": 357 }, { "epoch": 0.2600503938437791, "grad_norm": 2.6670423652778346, "learning_rate": 9.998083026095323e-06, "loss": 0.8878, "step": 358 }, { "epoch": 0.26077679159194606, "grad_norm": 2.681536519147625, "learning_rate": 9.998016358106147e-06, "loss": 0.8686, "step": 359 }, { "epoch": 0.261503189340113, "grad_norm": 2.597410074827163, "learning_rate": 9.997948550797227e-06, "loss": 0.8868, "step": 360 }, { "epoch": 0.26222958708828004, "grad_norm": 2.621123678822491, "learning_rate": 9.997879604184023e-06, "loss": 0.887, "step": 361 }, { "epoch": 0.262955984836447, "grad_norm": 2.604762010879466, "learning_rate": 9.99780951828225e-06, "loss": 0.8587, "step": 362 }, { "epoch": 0.26368238258461396, "grad_norm": 2.653532685112411, "learning_rate": 9.997738293107882e-06, "loss": 0.8694, "step": 363 }, { "epoch": 0.264408780332781, "grad_norm": 2.5929563189339575, "learning_rate": 9.997665928677159e-06, "loss": 0.8663, "step": 364 }, { "epoch": 0.26513517808094794, "grad_norm": 2.511532491788452, "learning_rate": 9.997592425006574e-06, "loss": 0.8923, "step": 365 }, { "epoch": 0.2658615758291149, "grad_norm": 2.5809881467050313, "learning_rate": 9.997517782112885e-06, "loss": 0.8544, "step": 366 }, { "epoch": 0.2665879735772819, "grad_norm": 2.5451328703553964, "learning_rate": 9.997442000013105e-06, "loss": 0.869, "step": 367 }, { "epoch": 0.2673143713254489, "grad_norm": 2.5718644330923253, "learning_rate": 9.997365078724512e-06, "loss": 0.8576, "step": 368 }, { "epoch": 0.26804076907361585, "grad_norm": 2.5201758471763127, "learning_rate": 9.997287018264637e-06, "loss": 0.8471, "step": 369 }, { "epoch": 0.26876716682178287, "grad_norm": 2.469545330155226, "learning_rate": 9.997207818651273e-06, "loss": 0.8716, "step": 370 }, { "epoch": 0.26949356456994983, "grad_norm": 2.4963246591865556, "learning_rate": 9.99712747990248e-06, "loss": 0.8598, "step": 371 }, { "epoch": 0.2702199623181168, "grad_norm": 2.4316216175071825, "learning_rate": 9.997046002036566e-06, "loss": 0.8687, "step": 372 }, { "epoch": 0.2709463600662838, "grad_norm": 2.4339436736619677, "learning_rate": 9.996963385072108e-06, "loss": 0.8536, "step": 373 }, { "epoch": 0.27167275781445077, "grad_norm": 2.447464955174159, "learning_rate": 9.996879629027934e-06, "loss": 0.8666, "step": 374 }, { "epoch": 0.27239915556261773, "grad_norm": 2.408860632016767, "learning_rate": 9.996794733923141e-06, "loss": 0.858, "step": 375 }, { "epoch": 0.27312555331078475, "grad_norm": 2.381748758234186, "learning_rate": 9.996708699777081e-06, "loss": 0.8671, "step": 376 }, { "epoch": 0.2738519510589517, "grad_norm": 2.547484262667178, "learning_rate": 9.996621526609364e-06, "loss": 0.8385, "step": 377 }, { "epoch": 0.2745783488071187, "grad_norm": 2.4047390006326905, "learning_rate": 9.996533214439864e-06, "loss": 0.8594, "step": 378 }, { "epoch": 0.2753047465552857, "grad_norm": 2.392166393236436, "learning_rate": 9.996443763288708e-06, "loss": 0.855, "step": 379 }, { "epoch": 0.27603114430345266, "grad_norm": 2.314177983313085, "learning_rate": 9.99635317317629e-06, "loss": 0.8613, "step": 380 }, { "epoch": 0.2767575420516196, "grad_norm": 2.2953329879725706, "learning_rate": 9.99626144412326e-06, "loss": 0.8508, "step": 381 }, { "epoch": 0.27748393979978664, "grad_norm": 2.373682548895982, "learning_rate": 9.996168576150527e-06, "loss": 0.8529, "step": 382 }, { "epoch": 0.2782103375479536, "grad_norm": 2.272694418338483, "learning_rate": 9.996074569279261e-06, "loss": 0.8499, "step": 383 }, { "epoch": 0.27893673529612056, "grad_norm": 2.27252146099723, "learning_rate": 9.995979423530893e-06, "loss": 0.8597, "step": 384 }, { "epoch": 0.2796631330442876, "grad_norm": 2.2642442307360624, "learning_rate": 9.995883138927109e-06, "loss": 0.8642, "step": 385 }, { "epoch": 0.28038953079245454, "grad_norm": 2.2782516681714635, "learning_rate": 9.995785715489861e-06, "loss": 0.8433, "step": 386 }, { "epoch": 0.2811159285406215, "grad_norm": 2.2336277825210216, "learning_rate": 9.995687153241353e-06, "loss": 0.856, "step": 387 }, { "epoch": 0.2818423262887885, "grad_norm": 2.195604639250854, "learning_rate": 9.995587452204056e-06, "loss": 0.8668, "step": 388 }, { "epoch": 0.2825687240369555, "grad_norm": 2.1848982031790976, "learning_rate": 9.995486612400695e-06, "loss": 0.8638, "step": 389 }, { "epoch": 0.28329512178512245, "grad_norm": 2.2079370575686292, "learning_rate": 9.995384633854259e-06, "loss": 0.8459, "step": 390 }, { "epoch": 0.28402151953328947, "grad_norm": 2.4403396183778168, "learning_rate": 9.995281516587992e-06, "loss": 0.8442, "step": 391 }, { "epoch": 0.28474791728145643, "grad_norm": 2.190782039428668, "learning_rate": 9.995177260625401e-06, "loss": 0.8571, "step": 392 }, { "epoch": 0.2854743150296234, "grad_norm": 2.1783620914890447, "learning_rate": 9.995071865990255e-06, "loss": 0.8421, "step": 393 }, { "epoch": 0.2862007127777904, "grad_norm": 2.160114786487807, "learning_rate": 9.994965332706574e-06, "loss": 0.8464, "step": 394 }, { "epoch": 0.2869271105259574, "grad_norm": 2.1077114811406363, "learning_rate": 9.994857660798644e-06, "loss": 0.851, "step": 395 }, { "epoch": 0.28765350827412434, "grad_norm": 2.130514190474774, "learning_rate": 9.994748850291013e-06, "loss": 0.8391, "step": 396 }, { "epoch": 0.28837990602229135, "grad_norm": 2.305097773774775, "learning_rate": 9.99463890120848e-06, "loss": 0.8484, "step": 397 }, { "epoch": 0.2891063037704583, "grad_norm": 2.3190227202687557, "learning_rate": 9.994527813576111e-06, "loss": 0.8687, "step": 398 }, { "epoch": 0.2898327015186253, "grad_norm": 2.0697354319941184, "learning_rate": 9.994415587419229e-06, "loss": 0.871, "step": 399 }, { "epoch": 0.2905590992667923, "grad_norm": 2.1179466292046802, "learning_rate": 9.994302222763415e-06, "loss": 0.8427, "step": 400 }, { "epoch": 0.29128549701495926, "grad_norm": 2.0548380672609254, "learning_rate": 9.994187719634512e-06, "loss": 0.851, "step": 401 }, { "epoch": 0.2920118947631262, "grad_norm": 2.004703885482171, "learning_rate": 9.994072078058621e-06, "loss": 0.8572, "step": 402 }, { "epoch": 0.29273829251129324, "grad_norm": 2.040493240651109, "learning_rate": 9.993955298062103e-06, "loss": 0.8497, "step": 403 }, { "epoch": 0.2934646902594602, "grad_norm": 2.0653918004675615, "learning_rate": 9.99383737967158e-06, "loss": 0.8361, "step": 404 }, { "epoch": 0.29419108800762717, "grad_norm": 2.0162210298258088, "learning_rate": 9.99371832291393e-06, "loss": 0.8407, "step": 405 }, { "epoch": 0.2949174857557942, "grad_norm": 2.039149148468403, "learning_rate": 9.993598127816292e-06, "loss": 0.8252, "step": 406 }, { "epoch": 0.29564388350396115, "grad_norm": 1.9921985897639096, "learning_rate": 9.993476794406068e-06, "loss": 0.8467, "step": 407 }, { "epoch": 0.2963702812521281, "grad_norm": 1.9853296697955654, "learning_rate": 9.993354322710914e-06, "loss": 0.8447, "step": 408 }, { "epoch": 0.29709667900029507, "grad_norm": 1.933388097861427, "learning_rate": 9.993230712758748e-06, "loss": 0.8443, "step": 409 }, { "epoch": 0.2978230767484621, "grad_norm": 2.015700970579226, "learning_rate": 9.99310596457775e-06, "loss": 0.8273, "step": 410 }, { "epoch": 0.29854947449662905, "grad_norm": 1.9666969573202417, "learning_rate": 9.992980078196355e-06, "loss": 0.8348, "step": 411 }, { "epoch": 0.299275872244796, "grad_norm": 1.9594216972253362, "learning_rate": 9.992853053643257e-06, "loss": 0.832, "step": 412 }, { "epoch": 0.30000226999296303, "grad_norm": 1.9446630419498114, "learning_rate": 9.992724890947417e-06, "loss": 0.8267, "step": 413 }, { "epoch": 0.30072866774113, "grad_norm": 1.9688606092767582, "learning_rate": 9.992595590138045e-06, "loss": 0.8368, "step": 414 }, { "epoch": 0.30145506548929696, "grad_norm": 1.9423435194261474, "learning_rate": 9.99246515124462e-06, "loss": 0.8242, "step": 415 }, { "epoch": 0.302181463237464, "grad_norm": 1.9125718876356885, "learning_rate": 9.992333574296872e-06, "loss": 0.8276, "step": 416 }, { "epoch": 0.30290786098563094, "grad_norm": 1.883649957859309, "learning_rate": 9.992200859324799e-06, "loss": 0.8298, "step": 417 }, { "epoch": 0.3036342587337979, "grad_norm": 1.879036782487961, "learning_rate": 9.992067006358651e-06, "loss": 0.8353, "step": 418 }, { "epoch": 0.3043606564819649, "grad_norm": 1.8719975868291152, "learning_rate": 9.991932015428941e-06, "loss": 0.8443, "step": 419 }, { "epoch": 0.3050870542301319, "grad_norm": 1.874484854453761, "learning_rate": 9.991795886566443e-06, "loss": 0.8292, "step": 420 }, { "epoch": 0.30581345197829884, "grad_norm": 1.8775152039378569, "learning_rate": 9.991658619802183e-06, "loss": 0.8252, "step": 421 }, { "epoch": 0.30653984972646586, "grad_norm": 1.8441233018367729, "learning_rate": 9.991520215167455e-06, "loss": 0.8272, "step": 422 }, { "epoch": 0.3072662474746328, "grad_norm": 1.8281278187371393, "learning_rate": 9.99138067269381e-06, "loss": 0.8333, "step": 423 }, { "epoch": 0.3079926452227998, "grad_norm": 1.9152044763911662, "learning_rate": 9.991239992413056e-06, "loss": 0.8289, "step": 424 }, { "epoch": 0.3087190429709668, "grad_norm": 1.853675901429331, "learning_rate": 9.99109817435726e-06, "loss": 0.8206, "step": 425 }, { "epoch": 0.30944544071913377, "grad_norm": 1.828312370971533, "learning_rate": 9.990955218558751e-06, "loss": 0.8017, "step": 426 }, { "epoch": 0.31017183846730073, "grad_norm": 1.828150007798247, "learning_rate": 9.99081112505012e-06, "loss": 0.8268, "step": 427 }, { "epoch": 0.31089823621546775, "grad_norm": 2.086629705375154, "learning_rate": 9.990665893864208e-06, "loss": 0.8187, "step": 428 }, { "epoch": 0.3116246339636347, "grad_norm": 1.7980079821122612, "learning_rate": 9.990519525034125e-06, "loss": 0.8181, "step": 429 }, { "epoch": 0.3123510317118017, "grad_norm": 1.8022071581091539, "learning_rate": 9.990372018593236e-06, "loss": 0.8107, "step": 430 }, { "epoch": 0.3130774294599687, "grad_norm": 1.7817457263756484, "learning_rate": 9.990223374575165e-06, "loss": 0.8223, "step": 431 }, { "epoch": 0.31380382720813566, "grad_norm": 1.8186788986310414, "learning_rate": 9.990073593013794e-06, "loss": 0.8306, "step": 432 }, { "epoch": 0.3145302249563026, "grad_norm": 1.78999729473262, "learning_rate": 9.989922673943271e-06, "loss": 0.8218, "step": 433 }, { "epoch": 0.31525662270446964, "grad_norm": 1.7922498131927898, "learning_rate": 9.989770617397994e-06, "loss": 0.8177, "step": 434 }, { "epoch": 0.3159830204526366, "grad_norm": 1.7407032579649269, "learning_rate": 9.989617423412627e-06, "loss": 0.8172, "step": 435 }, { "epoch": 0.31670941820080356, "grad_norm": 1.7377083293660502, "learning_rate": 9.989463092022093e-06, "loss": 0.8097, "step": 436 }, { "epoch": 0.3174358159489706, "grad_norm": 1.8106149891609342, "learning_rate": 9.989307623261571e-06, "loss": 0.8324, "step": 437 }, { "epoch": 0.31816221369713754, "grad_norm": 2.0548792095096537, "learning_rate": 9.989151017166498e-06, "loss": 0.8281, "step": 438 }, { "epoch": 0.3188886114453045, "grad_norm": 1.7202909871253904, "learning_rate": 9.988993273772578e-06, "loss": 0.8152, "step": 439 }, { "epoch": 0.3196150091934715, "grad_norm": 1.9416483022054316, "learning_rate": 9.988834393115768e-06, "loss": 0.8116, "step": 440 }, { "epoch": 0.3203414069416385, "grad_norm": 1.700305712466284, "learning_rate": 9.98867437523228e-06, "loss": 0.8192, "step": 441 }, { "epoch": 0.32106780468980545, "grad_norm": 1.6986113649343069, "learning_rate": 9.9885132201586e-06, "loss": 0.8206, "step": 442 }, { "epoch": 0.32179420243797247, "grad_norm": 1.7568165040916024, "learning_rate": 9.988350927931455e-06, "loss": 0.8159, "step": 443 }, { "epoch": 0.32252060018613943, "grad_norm": 1.6638610925315405, "learning_rate": 9.98818749858785e-06, "loss": 0.8261, "step": 444 }, { "epoch": 0.3232469979343064, "grad_norm": 1.6400851065971984, "learning_rate": 9.988022932165029e-06, "loss": 0.822, "step": 445 }, { "epoch": 0.3239733956824734, "grad_norm": 1.6637785482086707, "learning_rate": 9.987857228700512e-06, "loss": 0.7977, "step": 446 }, { "epoch": 0.32469979343064037, "grad_norm": 1.6527505249510377, "learning_rate": 9.987690388232071e-06, "loss": 0.8109, "step": 447 }, { "epoch": 0.32542619117880733, "grad_norm": 1.6589931492071577, "learning_rate": 9.987522410797736e-06, "loss": 0.8122, "step": 448 }, { "epoch": 0.32615258892697435, "grad_norm": 1.6112609442519945, "learning_rate": 9.987353296435802e-06, "loss": 0.8223, "step": 449 }, { "epoch": 0.3268789866751413, "grad_norm": 1.6299975467243404, "learning_rate": 9.987183045184815e-06, "loss": 0.8148, "step": 450 }, { "epoch": 0.3276053844233083, "grad_norm": 1.6911871741513431, "learning_rate": 9.987011657083587e-06, "loss": 0.812, "step": 451 }, { "epoch": 0.3283317821714753, "grad_norm": 1.6301988035526893, "learning_rate": 9.986839132171186e-06, "loss": 0.7894, "step": 452 }, { "epoch": 0.32905817991964226, "grad_norm": 1.6050588368442675, "learning_rate": 9.98666547048694e-06, "loss": 0.7994, "step": 453 }, { "epoch": 0.3297845776678092, "grad_norm": 1.5935740513279069, "learning_rate": 9.986490672070438e-06, "loss": 0.8133, "step": 454 }, { "epoch": 0.33051097541597624, "grad_norm": 1.5800491187854209, "learning_rate": 9.986314736961522e-06, "loss": 0.8057, "step": 455 }, { "epoch": 0.3312373731641432, "grad_norm": 1.5998802898110331, "learning_rate": 9.986137665200298e-06, "loss": 0.7788, "step": 456 }, { "epoch": 0.33196377091231016, "grad_norm": 1.5993757442652543, "learning_rate": 9.985959456827132e-06, "loss": 0.7947, "step": 457 }, { "epoch": 0.3326901686604771, "grad_norm": 1.599986123833006, "learning_rate": 9.985780111882648e-06, "loss": 0.8124, "step": 458 }, { "epoch": 0.33341656640864414, "grad_norm": 1.567732652499849, "learning_rate": 9.985599630407727e-06, "loss": 0.7983, "step": 459 }, { "epoch": 0.3341429641568111, "grad_norm": 1.568753470069939, "learning_rate": 9.98541801244351e-06, "loss": 0.7993, "step": 460 }, { "epoch": 0.33486936190497807, "grad_norm": 1.53872532512622, "learning_rate": 9.9852352580314e-06, "loss": 0.7954, "step": 461 }, { "epoch": 0.3355957596531451, "grad_norm": 1.573735628732098, "learning_rate": 9.985051367213055e-06, "loss": 0.8028, "step": 462 }, { "epoch": 0.33632215740131205, "grad_norm": 1.5562781601601243, "learning_rate": 9.984866340030393e-06, "loss": 0.7991, "step": 463 }, { "epoch": 0.337048555149479, "grad_norm": 1.5286823486792622, "learning_rate": 9.984680176525591e-06, "loss": 0.8086, "step": 464 }, { "epoch": 0.33777495289764603, "grad_norm": 1.5206427472940696, "learning_rate": 9.98449287674109e-06, "loss": 0.7931, "step": 465 }, { "epoch": 0.338501350645813, "grad_norm": 1.529018033590309, "learning_rate": 9.984304440719582e-06, "loss": 0.7976, "step": 466 }, { "epoch": 0.33922774839397996, "grad_norm": 1.4941622511227373, "learning_rate": 9.984114868504023e-06, "loss": 0.8079, "step": 467 }, { "epoch": 0.339954146142147, "grad_norm": 1.4781959326957563, "learning_rate": 9.983924160137627e-06, "loss": 0.8022, "step": 468 }, { "epoch": 0.34068054389031394, "grad_norm": 1.4674192733847702, "learning_rate": 9.983732315663865e-06, "loss": 0.8091, "step": 469 }, { "epoch": 0.3414069416384809, "grad_norm": 1.4885758046839543, "learning_rate": 9.983539335126473e-06, "loss": 0.8014, "step": 470 }, { "epoch": 0.3421333393866479, "grad_norm": 1.4639151595858675, "learning_rate": 9.983345218569438e-06, "loss": 0.8076, "step": 471 }, { "epoch": 0.3428597371348149, "grad_norm": 1.4519543436282374, "learning_rate": 9.98314996603701e-06, "loss": 0.8147, "step": 472 }, { "epoch": 0.34358613488298184, "grad_norm": 1.4770141552096316, "learning_rate": 9.9829535775737e-06, "loss": 0.7926, "step": 473 }, { "epoch": 0.34431253263114886, "grad_norm": 1.461845780579588, "learning_rate": 9.982756053224274e-06, "loss": 0.8123, "step": 474 }, { "epoch": 0.3450389303793158, "grad_norm": 1.503728720014433, "learning_rate": 9.982557393033758e-06, "loss": 0.779, "step": 475 }, { "epoch": 0.3457653281274828, "grad_norm": 1.4849653618968792, "learning_rate": 9.982357597047438e-06, "loss": 0.7866, "step": 476 }, { "epoch": 0.3464917258756498, "grad_norm": 1.4519069940854028, "learning_rate": 9.98215666531086e-06, "loss": 0.7858, "step": 477 }, { "epoch": 0.34721812362381677, "grad_norm": 1.441735470802499, "learning_rate": 9.981954597869825e-06, "loss": 0.8038, "step": 478 }, { "epoch": 0.34794452137198373, "grad_norm": 1.4516397081642147, "learning_rate": 9.981751394770395e-06, "loss": 0.8039, "step": 479 }, { "epoch": 0.34867091912015075, "grad_norm": 1.4238296243544586, "learning_rate": 9.981547056058893e-06, "loss": 0.8057, "step": 480 }, { "epoch": 0.3493973168683177, "grad_norm": 1.4190132849441395, "learning_rate": 9.981341581781899e-06, "loss": 0.7899, "step": 481 }, { "epoch": 0.35012371461648467, "grad_norm": 1.4326395273254544, "learning_rate": 9.98113497198625e-06, "loss": 0.7895, "step": 482 }, { "epoch": 0.3508501123646517, "grad_norm": 1.399840454673465, "learning_rate": 9.980927226719044e-06, "loss": 0.7965, "step": 483 }, { "epoch": 0.35157651011281865, "grad_norm": 1.4298889459244744, "learning_rate": 9.980718346027636e-06, "loss": 0.7716, "step": 484 }, { "epoch": 0.3523029078609856, "grad_norm": 1.40248883066053, "learning_rate": 9.980508329959646e-06, "loss": 0.7886, "step": 485 }, { "epoch": 0.35302930560915263, "grad_norm": 1.387451411929723, "learning_rate": 9.980297178562943e-06, "loss": 0.7865, "step": 486 }, { "epoch": 0.3537557033573196, "grad_norm": 1.3729511131953516, "learning_rate": 9.980084891885662e-06, "loss": 0.7872, "step": 487 }, { "epoch": 0.35448210110548656, "grad_norm": 1.3898531180126903, "learning_rate": 9.979871469976197e-06, "loss": 0.7692, "step": 488 }, { "epoch": 0.3552084988536536, "grad_norm": 1.3687738845362856, "learning_rate": 9.979656912883193e-06, "loss": 0.7845, "step": 489 }, { "epoch": 0.35593489660182054, "grad_norm": 1.381027925131146, "learning_rate": 9.979441220655564e-06, "loss": 0.7895, "step": 490 }, { "epoch": 0.3566612943499875, "grad_norm": 1.346328819485624, "learning_rate": 9.979224393342477e-06, "loss": 0.8029, "step": 491 }, { "epoch": 0.3573876920981545, "grad_norm": 1.384237345196671, "learning_rate": 9.979006430993357e-06, "loss": 0.8098, "step": 492 }, { "epoch": 0.3581140898463215, "grad_norm": 1.3699677137536757, "learning_rate": 9.97878733365789e-06, "loss": 0.7804, "step": 493 }, { "epoch": 0.35884048759448844, "grad_norm": 1.336258195408756, "learning_rate": 9.978567101386021e-06, "loss": 0.7782, "step": 494 }, { "epoch": 0.35956688534265546, "grad_norm": 1.3486480096565705, "learning_rate": 9.978345734227953e-06, "loss": 0.791, "step": 495 }, { "epoch": 0.3602932830908224, "grad_norm": 1.3402139669890587, "learning_rate": 9.978123232234147e-06, "loss": 0.7805, "step": 496 }, { "epoch": 0.3610196808389894, "grad_norm": 1.3203806678784924, "learning_rate": 9.977899595455324e-06, "loss": 0.8059, "step": 497 }, { "epoch": 0.3617460785871564, "grad_norm": 1.3240137088291317, "learning_rate": 9.977674823942463e-06, "loss": 0.7769, "step": 498 }, { "epoch": 0.36247247633532337, "grad_norm": 1.3371647877624209, "learning_rate": 9.9774489177468e-06, "loss": 0.7828, "step": 499 }, { "epoch": 0.36319887408349033, "grad_norm": 1.3435475467562, "learning_rate": 9.977221876919833e-06, "loss": 0.7942, "step": 500 }, { "epoch": 0.36392527183165735, "grad_norm": 1.3153448965184218, "learning_rate": 9.976993701513317e-06, "loss": 0.7721, "step": 501 }, { "epoch": 0.3646516695798243, "grad_norm": 1.314625926636882, "learning_rate": 9.976764391579266e-06, "loss": 0.7625, "step": 502 }, { "epoch": 0.3653780673279913, "grad_norm": 1.2981750511740526, "learning_rate": 9.976533947169948e-06, "loss": 0.7834, "step": 503 }, { "epoch": 0.36610446507615824, "grad_norm": 1.3305161678384152, "learning_rate": 9.9763023683379e-06, "loss": 0.7874, "step": 504 }, { "epoch": 0.36683086282432525, "grad_norm": 1.3051284028811305, "learning_rate": 9.976069655135906e-06, "loss": 0.7657, "step": 505 }, { "epoch": 0.3675572605724922, "grad_norm": 1.279969631814808, "learning_rate": 9.975835807617019e-06, "loss": 0.7822, "step": 506 }, { "epoch": 0.3682836583206592, "grad_norm": 1.2960567138751116, "learning_rate": 9.975600825834542e-06, "loss": 0.7694, "step": 507 }, { "epoch": 0.3690100560688262, "grad_norm": 1.306849416485967, "learning_rate": 9.975364709842042e-06, "loss": 0.786, "step": 508 }, { "epoch": 0.36973645381699316, "grad_norm": 1.271523847952412, "learning_rate": 9.97512745969334e-06, "loss": 0.7761, "step": 509 }, { "epoch": 0.3704628515651601, "grad_norm": 1.2914497316352134, "learning_rate": 9.97488907544252e-06, "loss": 0.7623, "step": 510 }, { "epoch": 0.37118924931332714, "grad_norm": 1.309823954459763, "learning_rate": 9.974649557143926e-06, "loss": 0.777, "step": 511 }, { "epoch": 0.3719156470614941, "grad_norm": 1.2827788113935663, "learning_rate": 9.97440890485215e-06, "loss": 0.7787, "step": 512 }, { "epoch": 0.37264204480966107, "grad_norm": 1.3873355303110653, "learning_rate": 9.974167118622056e-06, "loss": 0.784, "step": 513 }, { "epoch": 0.3733684425578281, "grad_norm": 1.3254270203436433, "learning_rate": 9.973924198508758e-06, "loss": 0.7701, "step": 514 }, { "epoch": 0.37409484030599505, "grad_norm": 1.2944959469357342, "learning_rate": 9.97368014456763e-06, "loss": 0.76, "step": 515 }, { "epoch": 0.374821238054162, "grad_norm": 1.2474253013121739, "learning_rate": 9.973434956854306e-06, "loss": 0.7666, "step": 516 }, { "epoch": 0.375547635802329, "grad_norm": 1.2656514925156077, "learning_rate": 9.97318863542468e-06, "loss": 0.7684, "step": 517 }, { "epoch": 0.376274033550496, "grad_norm": 1.2260343270931553, "learning_rate": 9.972941180334896e-06, "loss": 0.7752, "step": 518 }, { "epoch": 0.37700043129866295, "grad_norm": 1.2365101027609489, "learning_rate": 9.972692591641367e-06, "loss": 0.7659, "step": 519 }, { "epoch": 0.37772682904682997, "grad_norm": 1.2107248676220899, "learning_rate": 9.97244286940076e-06, "loss": 0.7958, "step": 520 }, { "epoch": 0.37845322679499693, "grad_norm": 1.305074081461733, "learning_rate": 9.972192013669998e-06, "loss": 0.7608, "step": 521 }, { "epoch": 0.3791796245431639, "grad_norm": 1.2188322690177842, "learning_rate": 9.971940024506265e-06, "loss": 0.7864, "step": 522 }, { "epoch": 0.3799060222913309, "grad_norm": 1.2134567730823578, "learning_rate": 9.971686901967006e-06, "loss": 0.7777, "step": 523 }, { "epoch": 0.3806324200394979, "grad_norm": 1.214745326766063, "learning_rate": 9.971432646109919e-06, "loss": 0.7643, "step": 524 }, { "epoch": 0.38135881778766484, "grad_norm": 3.637168647480618, "learning_rate": 9.971177256992961e-06, "loss": 0.7689, "step": 525 }, { "epoch": 0.38208521553583186, "grad_norm": 1.195297556379348, "learning_rate": 9.970920734674355e-06, "loss": 0.7732, "step": 526 }, { "epoch": 0.3828116132839988, "grad_norm": 1.2164501623111703, "learning_rate": 9.970663079212568e-06, "loss": 0.7609, "step": 527 }, { "epoch": 0.3835380110321658, "grad_norm": 1.2178209396112816, "learning_rate": 9.97040429066634e-06, "loss": 0.7654, "step": 528 }, { "epoch": 0.3842644087803328, "grad_norm": 1.196664997899127, "learning_rate": 9.970144369094663e-06, "loss": 0.7604, "step": 529 }, { "epoch": 0.38499080652849976, "grad_norm": 1.205518971317643, "learning_rate": 9.969883314556784e-06, "loss": 0.7621, "step": 530 }, { "epoch": 0.3857172042766667, "grad_norm": 1.1951038596907404, "learning_rate": 9.969621127112211e-06, "loss": 0.7821, "step": 531 }, { "epoch": 0.38644360202483374, "grad_norm": 1.1948561987593798, "learning_rate": 9.969357806820714e-06, "loss": 0.7613, "step": 532 }, { "epoch": 0.3871699997730007, "grad_norm": 1.2101490112809141, "learning_rate": 9.969093353742318e-06, "loss": 0.7612, "step": 533 }, { "epoch": 0.38789639752116767, "grad_norm": 1.171209913607891, "learning_rate": 9.968827767937303e-06, "loss": 0.7801, "step": 534 }, { "epoch": 0.3886227952693347, "grad_norm": 1.2245907679426409, "learning_rate": 9.968561049466214e-06, "loss": 0.7712, "step": 535 }, { "epoch": 0.38934919301750165, "grad_norm": 1.2097923964427286, "learning_rate": 9.96829319838985e-06, "loss": 0.7504, "step": 536 }, { "epoch": 0.3900755907656686, "grad_norm": 1.2500277611138257, "learning_rate": 9.968024214769265e-06, "loss": 0.7662, "step": 537 }, { "epoch": 0.39080198851383563, "grad_norm": 1.1852000915918435, "learning_rate": 9.967754098665778e-06, "loss": 0.7614, "step": 538 }, { "epoch": 0.3915283862620026, "grad_norm": 1.194954947602848, "learning_rate": 9.967482850140965e-06, "loss": 0.7758, "step": 539 }, { "epoch": 0.39225478401016955, "grad_norm": 1.1815956688439189, "learning_rate": 9.967210469256657e-06, "loss": 0.7598, "step": 540 }, { "epoch": 0.3929811817583366, "grad_norm": 1.1881258495264189, "learning_rate": 9.96693695607494e-06, "loss": 0.7642, "step": 541 }, { "epoch": 0.39370757950650354, "grad_norm": 1.1408535421371642, "learning_rate": 9.96666231065817e-06, "loss": 0.7645, "step": 542 }, { "epoch": 0.3944339772546705, "grad_norm": 1.2935039433273636, "learning_rate": 9.966386533068949e-06, "loss": 0.7826, "step": 543 }, { "epoch": 0.3951603750028375, "grad_norm": 1.2067339559583887, "learning_rate": 9.96610962337014e-06, "loss": 0.7424, "step": 544 }, { "epoch": 0.3958867727510045, "grad_norm": 1.1225159513779392, "learning_rate": 9.965831581624872e-06, "loss": 0.7654, "step": 545 }, { "epoch": 0.39661317049917144, "grad_norm": 1.180080430089498, "learning_rate": 9.965552407896519e-06, "loss": 0.7655, "step": 546 }, { "epoch": 0.39733956824733846, "grad_norm": 1.1256118419038732, "learning_rate": 9.965272102248726e-06, "loss": 0.7581, "step": 547 }, { "epoch": 0.3980659659955054, "grad_norm": 1.1436250068062523, "learning_rate": 9.964990664745387e-06, "loss": 0.7493, "step": 548 }, { "epoch": 0.3987923637436724, "grad_norm": 1.125774487206082, "learning_rate": 9.964708095450655e-06, "loss": 0.7563, "step": 549 }, { "epoch": 0.3995187614918394, "grad_norm": 1.136420520925349, "learning_rate": 9.964424394428943e-06, "loss": 0.7648, "step": 550 }, { "epoch": 0.40024515924000637, "grad_norm": 1.1580527989641898, "learning_rate": 9.964139561744927e-06, "loss": 0.7468, "step": 551 }, { "epoch": 0.4009715569881733, "grad_norm": 1.1434802807692184, "learning_rate": 9.963853597463533e-06, "loss": 0.7502, "step": 552 }, { "epoch": 0.4016979547363403, "grad_norm": 1.1142997290828731, "learning_rate": 9.963566501649946e-06, "loss": 0.7355, "step": 553 }, { "epoch": 0.4024243524845073, "grad_norm": 1.0993449364698016, "learning_rate": 9.963278274369613e-06, "loss": 0.7438, "step": 554 }, { "epoch": 0.40315075023267427, "grad_norm": 1.0645134866074886, "learning_rate": 9.962988915688234e-06, "loss": 0.7581, "step": 555 }, { "epoch": 0.40387714798084123, "grad_norm": 1.0874163008360718, "learning_rate": 9.962698425671773e-06, "loss": 0.7559, "step": 556 }, { "epoch": 0.40460354572900825, "grad_norm": 1.0893436970495776, "learning_rate": 9.962406804386447e-06, "loss": 0.7392, "step": 557 }, { "epoch": 0.4053299434771752, "grad_norm": 1.1791025461727056, "learning_rate": 9.962114051898731e-06, "loss": 0.7468, "step": 558 }, { "epoch": 0.4060563412253422, "grad_norm": 1.0617478387032344, "learning_rate": 9.96182016827536e-06, "loss": 0.7549, "step": 559 }, { "epoch": 0.4067827389735092, "grad_norm": 1.0774497292109935, "learning_rate": 9.961525153583327e-06, "loss": 0.7415, "step": 560 }, { "epoch": 0.40750913672167616, "grad_norm": 1.0821338781986698, "learning_rate": 9.961229007889883e-06, "loss": 0.7307, "step": 561 }, { "epoch": 0.4082355344698431, "grad_norm": 1.0892656942041017, "learning_rate": 9.960931731262533e-06, "loss": 0.7427, "step": 562 }, { "epoch": 0.40896193221801014, "grad_norm": 1.062469625082668, "learning_rate": 9.960633323769043e-06, "loss": 0.7498, "step": 563 }, { "epoch": 0.4096883299661771, "grad_norm": 1.0668837175759642, "learning_rate": 9.960333785477437e-06, "loss": 0.7478, "step": 564 }, { "epoch": 0.41041472771434406, "grad_norm": 1.0441183230317423, "learning_rate": 9.960033116455997e-06, "loss": 0.7559, "step": 565 }, { "epoch": 0.4111411254625111, "grad_norm": 1.4159202842843437, "learning_rate": 9.95973131677326e-06, "loss": 0.7347, "step": 566 }, { "epoch": 0.41186752321067804, "grad_norm": 1.2970154589976308, "learning_rate": 9.959428386498023e-06, "loss": 0.7428, "step": 567 }, { "epoch": 0.412593920958845, "grad_norm": 1.0635260939557447, "learning_rate": 9.95912432569934e-06, "loss": 0.7684, "step": 568 }, { "epoch": 0.413320318707012, "grad_norm": 1.065670306952603, "learning_rate": 9.958819134446527e-06, "loss": 0.7305, "step": 569 }, { "epoch": 0.414046716455179, "grad_norm": 1.051820184891295, "learning_rate": 9.958512812809148e-06, "loss": 0.7417, "step": 570 }, { "epoch": 0.41477311420334595, "grad_norm": 1.0932314529080025, "learning_rate": 9.958205360857031e-06, "loss": 0.757, "step": 571 }, { "epoch": 0.41549951195151297, "grad_norm": 1.0904112546627889, "learning_rate": 9.957896778660266e-06, "loss": 0.7325, "step": 572 }, { "epoch": 0.41622590969967993, "grad_norm": 1.0710868590287677, "learning_rate": 9.957587066289189e-06, "loss": 0.7532, "step": 573 }, { "epoch": 0.4169523074478469, "grad_norm": 1.029792935403967, "learning_rate": 9.957276223814405e-06, "loss": 0.7487, "step": 574 }, { "epoch": 0.4176787051960139, "grad_norm": 1.039615004460751, "learning_rate": 9.95696425130677e-06, "loss": 0.7484, "step": 575 }, { "epoch": 0.4184051029441809, "grad_norm": 1.027265520457917, "learning_rate": 9.956651148837402e-06, "loss": 0.7431, "step": 576 }, { "epoch": 0.41913150069234784, "grad_norm": 1.0445331233584851, "learning_rate": 9.956336916477672e-06, "loss": 0.7518, "step": 577 }, { "epoch": 0.41985789844051485, "grad_norm": 1.0056629352503992, "learning_rate": 9.95602155429921e-06, "loss": 0.7442, "step": 578 }, { "epoch": 0.4205842961886818, "grad_norm": 1.0364566761082272, "learning_rate": 9.955705062373904e-06, "loss": 0.7563, "step": 579 }, { "epoch": 0.4213106939368488, "grad_norm": 1.0210209438240678, "learning_rate": 9.955387440773902e-06, "loss": 0.7484, "step": 580 }, { "epoch": 0.4220370916850158, "grad_norm": 1.0282324113074215, "learning_rate": 9.955068689571606e-06, "loss": 0.7282, "step": 581 }, { "epoch": 0.42276348943318276, "grad_norm": 1.1729764498651893, "learning_rate": 9.954748808839675e-06, "loss": 0.7542, "step": 582 }, { "epoch": 0.4234898871813497, "grad_norm": 1.040462706700545, "learning_rate": 9.95442779865103e-06, "loss": 0.7194, "step": 583 }, { "epoch": 0.42421628492951674, "grad_norm": 1.0062430347983087, "learning_rate": 9.954105659078846e-06, "loss": 0.7431, "step": 584 }, { "epoch": 0.4249426826776837, "grad_norm": 1.0639035558270775, "learning_rate": 9.953782390196554e-06, "loss": 0.7492, "step": 585 }, { "epoch": 0.42566908042585067, "grad_norm": 1.0425379187859118, "learning_rate": 9.953457992077847e-06, "loss": 0.7339, "step": 586 }, { "epoch": 0.4263954781740177, "grad_norm": 1.0031591436611542, "learning_rate": 9.953132464796674e-06, "loss": 0.7358, "step": 587 }, { "epoch": 0.42712187592218465, "grad_norm": 1.0015864996993507, "learning_rate": 9.952805808427235e-06, "loss": 0.7305, "step": 588 }, { "epoch": 0.4278482736703516, "grad_norm": 1.8101689727197796, "learning_rate": 9.952478023043999e-06, "loss": 0.7501, "step": 589 }, { "epoch": 0.4285746714185186, "grad_norm": 1.0253116313842865, "learning_rate": 9.952149108721683e-06, "loss": 0.7311, "step": 590 }, { "epoch": 0.4293010691666856, "grad_norm": 0.9992846826039635, "learning_rate": 9.951819065535265e-06, "loss": 0.7172, "step": 591 }, { "epoch": 0.43002746691485255, "grad_norm": 0.9817655480972577, "learning_rate": 9.95148789355998e-06, "loss": 0.7259, "step": 592 }, { "epoch": 0.43075386466301957, "grad_norm": 1.063898145073969, "learning_rate": 9.951155592871317e-06, "loss": 0.7283, "step": 593 }, { "epoch": 0.43148026241118653, "grad_norm": 0.9758454373620455, "learning_rate": 9.950822163545032e-06, "loss": 0.7472, "step": 594 }, { "epoch": 0.4322066601593535, "grad_norm": 1.000367186605782, "learning_rate": 9.950487605657126e-06, "loss": 0.7452, "step": 595 }, { "epoch": 0.4329330579075205, "grad_norm": 0.9540605112818284, "learning_rate": 9.950151919283866e-06, "loss": 0.742, "step": 596 }, { "epoch": 0.4336594556556875, "grad_norm": 1.0039954162122182, "learning_rate": 9.94981510450177e-06, "loss": 0.734, "step": 597 }, { "epoch": 0.43438585340385444, "grad_norm": 0.9744628011735277, "learning_rate": 9.949477161387619e-06, "loss": 0.7395, "step": 598 }, { "epoch": 0.43511225115202146, "grad_norm": 0.9773584355212287, "learning_rate": 9.94913809001845e-06, "loss": 0.7315, "step": 599 }, { "epoch": 0.4358386489001884, "grad_norm": 0.9760546875526702, "learning_rate": 9.948797890471552e-06, "loss": 0.731, "step": 600 }, { "epoch": 0.4365650466483554, "grad_norm": 0.9911326295240364, "learning_rate": 9.948456562824478e-06, "loss": 0.7209, "step": 601 }, { "epoch": 0.43729144439652234, "grad_norm": 0.9630389794461729, "learning_rate": 9.948114107155032e-06, "loss": 0.7185, "step": 602 }, { "epoch": 0.43801784214468936, "grad_norm": 0.9643561574474725, "learning_rate": 9.947770523541283e-06, "loss": 0.7312, "step": 603 }, { "epoch": 0.4387442398928563, "grad_norm": 0.9457862591453264, "learning_rate": 9.947425812061546e-06, "loss": 0.7334, "step": 604 }, { "epoch": 0.4394706376410233, "grad_norm": 0.9764292467518713, "learning_rate": 9.947079972794405e-06, "loss": 0.719, "step": 605 }, { "epoch": 0.4401970353891903, "grad_norm": 0.9652820848231466, "learning_rate": 9.946733005818694e-06, "loss": 0.7614, "step": 606 }, { "epoch": 0.44092343313735727, "grad_norm": 0.9789870034754022, "learning_rate": 9.946384911213504e-06, "loss": 0.7505, "step": 607 }, { "epoch": 0.44164983088552423, "grad_norm": 0.97261896897398, "learning_rate": 9.946035689058189e-06, "loss": 0.7269, "step": 608 }, { "epoch": 0.44237622863369125, "grad_norm": 0.9512662501729403, "learning_rate": 9.945685339432349e-06, "loss": 0.7275, "step": 609 }, { "epoch": 0.4431026263818582, "grad_norm": 0.9193936544152037, "learning_rate": 9.945333862415855e-06, "loss": 0.734, "step": 610 }, { "epoch": 0.4438290241300252, "grad_norm": 0.9437084533207984, "learning_rate": 9.944981258088822e-06, "loss": 0.7455, "step": 611 }, { "epoch": 0.4445554218781922, "grad_norm": 0.9366898434836213, "learning_rate": 9.94462752653163e-06, "loss": 0.7141, "step": 612 }, { "epoch": 0.44528181962635915, "grad_norm": 0.9649835362940479, "learning_rate": 9.944272667824915e-06, "loss": 0.7172, "step": 613 }, { "epoch": 0.4460082173745261, "grad_norm": 0.9480294246476275, "learning_rate": 9.943916682049564e-06, "loss": 0.7282, "step": 614 }, { "epoch": 0.44673461512269314, "grad_norm": 0.9575970196238945, "learning_rate": 9.943559569286731e-06, "loss": 0.731, "step": 615 }, { "epoch": 0.4474610128708601, "grad_norm": 1.0501616602935506, "learning_rate": 9.943201329617819e-06, "loss": 0.7344, "step": 616 }, { "epoch": 0.44818741061902706, "grad_norm": 0.9169524659520641, "learning_rate": 9.94284196312449e-06, "loss": 0.7241, "step": 617 }, { "epoch": 0.4489138083671941, "grad_norm": 0.8988178774810287, "learning_rate": 9.942481469888664e-06, "loss": 0.7263, "step": 618 }, { "epoch": 0.44964020611536104, "grad_norm": 0.9133573838776243, "learning_rate": 9.942119849992515e-06, "loss": 0.7176, "step": 619 }, { "epoch": 0.450366603863528, "grad_norm": 0.9312725209626799, "learning_rate": 9.94175710351848e-06, "loss": 0.7199, "step": 620 }, { "epoch": 0.451093001611695, "grad_norm": 0.9231425800341202, "learning_rate": 9.941393230549245e-06, "loss": 0.721, "step": 621 }, { "epoch": 0.451819399359862, "grad_norm": 0.8913757165093452, "learning_rate": 9.941028231167756e-06, "loss": 0.736, "step": 622 }, { "epoch": 0.45254579710802895, "grad_norm": 0.9192511627739857, "learning_rate": 9.940662105457219e-06, "loss": 0.7311, "step": 623 }, { "epoch": 0.45327219485619596, "grad_norm": 1.0126121327276585, "learning_rate": 9.940294853501093e-06, "loss": 0.7328, "step": 624 }, { "epoch": 0.4539985926043629, "grad_norm": 0.9086059945770585, "learning_rate": 9.939926475383093e-06, "loss": 0.7306, "step": 625 }, { "epoch": 0.4547249903525299, "grad_norm": 0.9006835415527965, "learning_rate": 9.939556971187195e-06, "loss": 0.7139, "step": 626 }, { "epoch": 0.4554513881006969, "grad_norm": 0.8776531408479233, "learning_rate": 9.939186340997629e-06, "loss": 0.7256, "step": 627 }, { "epoch": 0.45617778584886387, "grad_norm": 0.9131179128239798, "learning_rate": 9.938814584898878e-06, "loss": 0.7139, "step": 628 }, { "epoch": 0.45690418359703083, "grad_norm": 0.911827974566791, "learning_rate": 9.938441702975689e-06, "loss": 0.7057, "step": 629 }, { "epoch": 0.45763058134519785, "grad_norm": 0.9156346537960734, "learning_rate": 9.938067695313062e-06, "loss": 0.7087, "step": 630 }, { "epoch": 0.4583569790933648, "grad_norm": 0.8880023303410371, "learning_rate": 9.937692561996253e-06, "loss": 0.718, "step": 631 }, { "epoch": 0.4590833768415318, "grad_norm": 0.9401020700448064, "learning_rate": 9.937316303110773e-06, "loss": 0.7213, "step": 632 }, { "epoch": 0.4598097745896988, "grad_norm": 0.8760498121504141, "learning_rate": 9.936938918742396e-06, "loss": 0.7087, "step": 633 }, { "epoch": 0.46053617233786576, "grad_norm": 0.8471932856764717, "learning_rate": 9.936560408977144e-06, "loss": 0.7294, "step": 634 }, { "epoch": 0.4612625700860327, "grad_norm": 0.8875415965820969, "learning_rate": 9.936180773901305e-06, "loss": 0.7232, "step": 635 }, { "epoch": 0.46198896783419974, "grad_norm": 0.8747330864112066, "learning_rate": 9.935800013601415e-06, "loss": 0.7288, "step": 636 }, { "epoch": 0.4627153655823667, "grad_norm": 0.8939346610383381, "learning_rate": 9.935418128164271e-06, "loss": 0.7422, "step": 637 }, { "epoch": 0.46344176333053366, "grad_norm": 0.8639659321367306, "learning_rate": 9.935035117676925e-06, "loss": 0.7111, "step": 638 }, { "epoch": 0.4641681610787007, "grad_norm": 0.8809990893660246, "learning_rate": 9.934650982226687e-06, "loss": 0.706, "step": 639 }, { "epoch": 0.46489455882686764, "grad_norm": 0.8495844086453649, "learning_rate": 9.93426572190112e-06, "loss": 0.7274, "step": 640 }, { "epoch": 0.4656209565750346, "grad_norm": 0.8457492178190025, "learning_rate": 9.933879336788048e-06, "loss": 0.7159, "step": 641 }, { "epoch": 0.4663473543232016, "grad_norm": 0.872251965734634, "learning_rate": 9.93349182697555e-06, "loss": 0.7174, "step": 642 }, { "epoch": 0.4670737520713686, "grad_norm": 0.8564380863748268, "learning_rate": 9.933103192551958e-06, "loss": 0.6997, "step": 643 }, { "epoch": 0.46780014981953555, "grad_norm": 0.8539062885057994, "learning_rate": 9.932713433605862e-06, "loss": 0.7169, "step": 644 }, { "epoch": 0.46852654756770257, "grad_norm": 0.8602195511545719, "learning_rate": 9.932322550226114e-06, "loss": 0.7267, "step": 645 }, { "epoch": 0.46925294531586953, "grad_norm": 0.8660259689062596, "learning_rate": 9.931930542501813e-06, "loss": 0.7058, "step": 646 }, { "epoch": 0.4699793430640365, "grad_norm": 0.8412500932338813, "learning_rate": 9.931537410522323e-06, "loss": 0.7117, "step": 647 }, { "epoch": 0.47070574081220345, "grad_norm": 0.847365844467039, "learning_rate": 9.931143154377257e-06, "loss": 0.7038, "step": 648 }, { "epoch": 0.4714321385603705, "grad_norm": 0.863076023805393, "learning_rate": 9.930747774156485e-06, "loss": 0.7236, "step": 649 }, { "epoch": 0.47215853630853744, "grad_norm": 0.8578990699179848, "learning_rate": 9.930351269950144e-06, "loss": 0.7249, "step": 650 }, { "epoch": 0.4728849340567044, "grad_norm": 0.8468784620880256, "learning_rate": 9.92995364184861e-06, "loss": 0.7111, "step": 651 }, { "epoch": 0.4736113318048714, "grad_norm": 0.8548540373716411, "learning_rate": 9.92955488994253e-06, "loss": 0.7054, "step": 652 }, { "epoch": 0.4743377295530384, "grad_norm": 0.8263217534184496, "learning_rate": 9.9291550143228e-06, "loss": 0.7013, "step": 653 }, { "epoch": 0.47506412730120534, "grad_norm": 0.8558868202871573, "learning_rate": 9.928754015080571e-06, "loss": 0.6998, "step": 654 }, { "epoch": 0.47579052504937236, "grad_norm": 0.8527525987219964, "learning_rate": 9.928351892307254e-06, "loss": 0.706, "step": 655 }, { "epoch": 0.4765169227975393, "grad_norm": 0.8096658723028995, "learning_rate": 9.927948646094514e-06, "loss": 0.7104, "step": 656 }, { "epoch": 0.4772433205457063, "grad_norm": 0.825386383857699, "learning_rate": 9.927544276534275e-06, "loss": 0.7263, "step": 657 }, { "epoch": 0.4779697182938733, "grad_norm": 0.8110031620274161, "learning_rate": 9.927138783718713e-06, "loss": 0.7327, "step": 658 }, { "epoch": 0.47869611604204027, "grad_norm": 0.8725373219371803, "learning_rate": 9.926732167740262e-06, "loss": 0.7062, "step": 659 }, { "epoch": 0.4794225137902072, "grad_norm": 0.84156185662018, "learning_rate": 9.926324428691612e-06, "loss": 0.7184, "step": 660 }, { "epoch": 0.48014891153837425, "grad_norm": 0.8234352820652021, "learning_rate": 9.92591556666571e-06, "loss": 0.6966, "step": 661 }, { "epoch": 0.4808753092865412, "grad_norm": 0.8392923258120455, "learning_rate": 9.925505581755754e-06, "loss": 0.7033, "step": 662 }, { "epoch": 0.48160170703470817, "grad_norm": 0.8175945014983422, "learning_rate": 9.925094474055207e-06, "loss": 0.7089, "step": 663 }, { "epoch": 0.4823281047828752, "grad_norm": 0.8911331780707599, "learning_rate": 9.92468224365778e-06, "loss": 0.6947, "step": 664 }, { "epoch": 0.48305450253104215, "grad_norm": 0.8111671666726761, "learning_rate": 9.924268890657443e-06, "loss": 0.7013, "step": 665 }, { "epoch": 0.4837809002792091, "grad_norm": 0.820218156032821, "learning_rate": 9.923854415148423e-06, "loss": 0.7069, "step": 666 }, { "epoch": 0.48450729802737613, "grad_norm": 0.8368104072898442, "learning_rate": 9.923438817225198e-06, "loss": 0.7056, "step": 667 }, { "epoch": 0.4852336957755431, "grad_norm": 0.8172858374676331, "learning_rate": 9.92302209698251e-06, "loss": 0.7113, "step": 668 }, { "epoch": 0.48596009352371006, "grad_norm": 0.8111333167554251, "learning_rate": 9.922604254515348e-06, "loss": 0.6986, "step": 669 }, { "epoch": 0.4866864912718771, "grad_norm": 0.7939369425854435, "learning_rate": 9.922185289918965e-06, "loss": 0.7197, "step": 670 }, { "epoch": 0.48741288902004404, "grad_norm": 0.8073816864202008, "learning_rate": 9.921765203288862e-06, "loss": 0.6938, "step": 671 }, { "epoch": 0.488139286768211, "grad_norm": 0.8146062858167089, "learning_rate": 9.921343994720803e-06, "loss": 0.6901, "step": 672 }, { "epoch": 0.488865684516378, "grad_norm": 0.8073049273734886, "learning_rate": 9.920921664310801e-06, "loss": 0.689, "step": 673 }, { "epoch": 0.489592082264545, "grad_norm": 0.8000480150131922, "learning_rate": 9.920498212155132e-06, "loss": 0.6939, "step": 674 }, { "epoch": 0.49031848001271194, "grad_norm": 0.8068016192638968, "learning_rate": 9.920073638350319e-06, "loss": 0.7025, "step": 675 }, { "epoch": 0.49104487776087896, "grad_norm": 0.8260934820137759, "learning_rate": 9.91964794299315e-06, "loss": 0.6908, "step": 676 }, { "epoch": 0.4917712755090459, "grad_norm": 0.7997873997828585, "learning_rate": 9.91922112618066e-06, "loss": 0.7041, "step": 677 }, { "epoch": 0.4924976732572129, "grad_norm": 2.174322927976191, "learning_rate": 9.918793188010147e-06, "loss": 0.7143, "step": 678 }, { "epoch": 0.4932240710053799, "grad_norm": 0.8298841272183457, "learning_rate": 9.91836412857916e-06, "loss": 0.7002, "step": 679 }, { "epoch": 0.49395046875354687, "grad_norm": 0.815955296740638, "learning_rate": 9.917933947985508e-06, "loss": 0.6999, "step": 680 }, { "epoch": 0.49467686650171383, "grad_norm": 0.8141154640169115, "learning_rate": 9.917502646327247e-06, "loss": 0.7185, "step": 681 }, { "epoch": 0.49540326424988085, "grad_norm": 0.8441227422366394, "learning_rate": 9.917070223702697e-06, "loss": 0.7182, "step": 682 }, { "epoch": 0.4961296619980478, "grad_norm": 0.7881967510667903, "learning_rate": 9.916636680210431e-06, "loss": 0.7189, "step": 683 }, { "epoch": 0.4968560597462148, "grad_norm": 0.787513266332936, "learning_rate": 9.916202015949277e-06, "loss": 0.6961, "step": 684 }, { "epoch": 0.4975824574943818, "grad_norm": 0.8297981418322181, "learning_rate": 9.915766231018317e-06, "loss": 0.7032, "step": 685 }, { "epoch": 0.49830885524254875, "grad_norm": 0.7882950365342019, "learning_rate": 9.915329325516894e-06, "loss": 0.6936, "step": 686 }, { "epoch": 0.4990352529907157, "grad_norm": 0.8012742339373384, "learning_rate": 9.914891299544599e-06, "loss": 0.6999, "step": 687 }, { "epoch": 0.49976165073888273, "grad_norm": 0.7831837037525278, "learning_rate": 9.914452153201282e-06, "loss": 0.7051, "step": 688 }, { "epoch": 0.5004880484870496, "grad_norm": 0.7862014823443961, "learning_rate": 9.91401188658705e-06, "loss": 0.6818, "step": 689 }, { "epoch": 0.5012144462352167, "grad_norm": 0.7812613534102408, "learning_rate": 9.91357049980226e-06, "loss": 0.7019, "step": 690 }, { "epoch": 0.5019408439833837, "grad_norm": 0.7697961842553203, "learning_rate": 9.913127992947534e-06, "loss": 0.7026, "step": 691 }, { "epoch": 0.5026672417315506, "grad_norm": 0.7706971322719415, "learning_rate": 9.91268436612374e-06, "loss": 0.6966, "step": 692 }, { "epoch": 0.5033936394797176, "grad_norm": 0.8218604590916418, "learning_rate": 9.912239619432004e-06, "loss": 0.7078, "step": 693 }, { "epoch": 0.5041200372278846, "grad_norm": 0.8656142603168385, "learning_rate": 9.911793752973711e-06, "loss": 0.6914, "step": 694 }, { "epoch": 0.5048464349760515, "grad_norm": 0.7750652898480885, "learning_rate": 9.911346766850493e-06, "loss": 0.6906, "step": 695 }, { "epoch": 0.5055728327242186, "grad_norm": 0.7414098590179129, "learning_rate": 9.910898661164249e-06, "loss": 0.7109, "step": 696 }, { "epoch": 0.5062992304723856, "grad_norm": 0.7569073529543336, "learning_rate": 9.910449436017123e-06, "loss": 0.7044, "step": 697 }, { "epoch": 0.5070256282205525, "grad_norm": 0.7519787240625135, "learning_rate": 9.909999091511516e-06, "loss": 0.6964, "step": 698 }, { "epoch": 0.5077520259687195, "grad_norm": 0.7733018395208994, "learning_rate": 9.909547627750089e-06, "loss": 0.7088, "step": 699 }, { "epoch": 0.5084784237168865, "grad_norm": 0.7721039060567157, "learning_rate": 9.909095044835755e-06, "loss": 0.6913, "step": 700 }, { "epoch": 0.5092048214650534, "grad_norm": 0.7385882659818571, "learning_rate": 9.90864134287168e-06, "loss": 0.693, "step": 701 }, { "epoch": 0.5099312192132205, "grad_norm": 0.7421782318904324, "learning_rate": 9.90818652196129e-06, "loss": 0.7196, "step": 702 }, { "epoch": 0.5106576169613875, "grad_norm": 0.7481323889267485, "learning_rate": 9.907730582208261e-06, "loss": 0.6999, "step": 703 }, { "epoch": 0.5113840147095544, "grad_norm": 0.7601344672286628, "learning_rate": 9.90727352371653e-06, "loss": 0.698, "step": 704 }, { "epoch": 0.5121104124577214, "grad_norm": 0.7483342620276054, "learning_rate": 9.906815346590282e-06, "loss": 0.7051, "step": 705 }, { "epoch": 0.5128368102058883, "grad_norm": 0.818152958994249, "learning_rate": 9.906356050933962e-06, "loss": 0.6999, "step": 706 }, { "epoch": 0.5135632079540553, "grad_norm": 0.7478510985456449, "learning_rate": 9.905895636852268e-06, "loss": 0.7063, "step": 707 }, { "epoch": 0.5142896057022224, "grad_norm": 0.8360311926939497, "learning_rate": 9.905434104450153e-06, "loss": 0.7075, "step": 708 }, { "epoch": 0.5150160034503893, "grad_norm": 0.728081494414813, "learning_rate": 9.904971453832828e-06, "loss": 0.6928, "step": 709 }, { "epoch": 0.5157424011985563, "grad_norm": 0.7732823717838929, "learning_rate": 9.904507685105753e-06, "loss": 0.6804, "step": 710 }, { "epoch": 0.5164687989467233, "grad_norm": 0.732175100079485, "learning_rate": 9.904042798374647e-06, "loss": 0.7015, "step": 711 }, { "epoch": 0.5171951966948902, "grad_norm": 0.7521814228879359, "learning_rate": 9.903576793745482e-06, "loss": 0.6938, "step": 712 }, { "epoch": 0.5179215944430572, "grad_norm": 0.727963567904356, "learning_rate": 9.903109671324488e-06, "loss": 0.6927, "step": 713 }, { "epoch": 0.5186479921912243, "grad_norm": 0.8051400934341161, "learning_rate": 9.902641431218149e-06, "loss": 0.6845, "step": 714 }, { "epoch": 0.5193743899393912, "grad_norm": 0.7399599754417058, "learning_rate": 9.902172073533198e-06, "loss": 0.702, "step": 715 }, { "epoch": 0.5201007876875582, "grad_norm": 0.7177232225200838, "learning_rate": 9.90170159837663e-06, "loss": 0.6854, "step": 716 }, { "epoch": 0.5208271854357251, "grad_norm": 0.7250491270529293, "learning_rate": 9.90123000585569e-06, "loss": 0.6718, "step": 717 }, { "epoch": 0.5215535831838921, "grad_norm": 0.8226795700631545, "learning_rate": 9.90075729607788e-06, "loss": 0.6718, "step": 718 }, { "epoch": 0.5222799809320591, "grad_norm": 0.7404005782833571, "learning_rate": 9.900283469150959e-06, "loss": 0.6894, "step": 719 }, { "epoch": 0.523006378680226, "grad_norm": 0.7201992579567925, "learning_rate": 9.899808525182935e-06, "loss": 0.6878, "step": 720 }, { "epoch": 0.5237327764283931, "grad_norm": 0.7085334339463191, "learning_rate": 9.899332464282075e-06, "loss": 0.6953, "step": 721 }, { "epoch": 0.5244591741765601, "grad_norm": 0.7360701211727764, "learning_rate": 9.898855286556897e-06, "loss": 0.7087, "step": 722 }, { "epoch": 0.525185571924727, "grad_norm": 0.7083495495920359, "learning_rate": 9.898376992116179e-06, "loss": 0.6957, "step": 723 }, { "epoch": 0.525911969672894, "grad_norm": 0.7240208436144087, "learning_rate": 9.897897581068948e-06, "loss": 0.6986, "step": 724 }, { "epoch": 0.526638367421061, "grad_norm": 0.7409795690878006, "learning_rate": 9.897417053524487e-06, "loss": 0.6898, "step": 725 }, { "epoch": 0.5273647651692279, "grad_norm": 0.7292619216347412, "learning_rate": 9.896935409592339e-06, "loss": 0.6915, "step": 726 }, { "epoch": 0.528091162917395, "grad_norm": 0.825328679283231, "learning_rate": 9.896452649382291e-06, "loss": 0.6867, "step": 727 }, { "epoch": 0.528817560665562, "grad_norm": 0.731154122216054, "learning_rate": 9.895968773004394e-06, "loss": 0.6741, "step": 728 }, { "epoch": 0.5295439584137289, "grad_norm": 0.7268817197791653, "learning_rate": 9.895483780568949e-06, "loss": 0.6728, "step": 729 }, { "epoch": 0.5302703561618959, "grad_norm": 0.8850345181917018, "learning_rate": 9.894997672186513e-06, "loss": 0.6674, "step": 730 }, { "epoch": 0.5309967539100628, "grad_norm": 0.7129326314830353, "learning_rate": 9.894510447967893e-06, "loss": 0.6949, "step": 731 }, { "epoch": 0.5317231516582298, "grad_norm": 0.7445881485337487, "learning_rate": 9.894022108024157e-06, "loss": 0.6958, "step": 732 }, { "epoch": 0.5324495494063969, "grad_norm": 0.7087267238053241, "learning_rate": 9.893532652466625e-06, "loss": 0.6826, "step": 733 }, { "epoch": 0.5331759471545638, "grad_norm": 0.7029975399745358, "learning_rate": 9.893042081406868e-06, "loss": 0.691, "step": 734 }, { "epoch": 0.5339023449027308, "grad_norm": 0.7357703371007258, "learning_rate": 9.892550394956715e-06, "loss": 0.6657, "step": 735 }, { "epoch": 0.5346287426508978, "grad_norm": 0.7042415378243042, "learning_rate": 9.892057593228248e-06, "loss": 0.6669, "step": 736 }, { "epoch": 0.5353551403990647, "grad_norm": 0.7537027415403048, "learning_rate": 9.891563676333805e-06, "loss": 0.7107, "step": 737 }, { "epoch": 0.5360815381472317, "grad_norm": 0.716171888686636, "learning_rate": 9.891068644385973e-06, "loss": 0.679, "step": 738 }, { "epoch": 0.5368079358953988, "grad_norm": 0.7988047089210955, "learning_rate": 9.890572497497598e-06, "loss": 0.6931, "step": 739 }, { "epoch": 0.5375343336435657, "grad_norm": 0.7574454818586034, "learning_rate": 9.89007523578178e-06, "loss": 0.6924, "step": 740 }, { "epoch": 0.5382607313917327, "grad_norm": 0.6985625682424247, "learning_rate": 9.889576859351873e-06, "loss": 0.6986, "step": 741 }, { "epoch": 0.5389871291398997, "grad_norm": 0.9035477970405281, "learning_rate": 9.88907736832148e-06, "loss": 0.6931, "step": 742 }, { "epoch": 0.5397135268880666, "grad_norm": 0.6894579767672432, "learning_rate": 9.888576762804465e-06, "loss": 0.6933, "step": 743 }, { "epoch": 0.5404399246362336, "grad_norm": 0.6913186724338037, "learning_rate": 9.888075042914946e-06, "loss": 0.7082, "step": 744 }, { "epoch": 0.5411663223844007, "grad_norm": 0.6914903982190204, "learning_rate": 9.887572208767287e-06, "loss": 0.6932, "step": 745 }, { "epoch": 0.5418927201325676, "grad_norm": 5.796056874784305, "learning_rate": 9.887068260476112e-06, "loss": 0.6791, "step": 746 }, { "epoch": 0.5426191178807346, "grad_norm": 0.8081792780426444, "learning_rate": 9.886563198156302e-06, "loss": 0.69, "step": 747 }, { "epoch": 0.5433455156289015, "grad_norm": 0.9851928484816941, "learning_rate": 9.886057021922984e-06, "loss": 0.6944, "step": 748 }, { "epoch": 0.5440719133770685, "grad_norm": 1.0116190695537544, "learning_rate": 9.885549731891547e-06, "loss": 0.6921, "step": 749 }, { "epoch": 0.5447983111252355, "grad_norm": 1.1344844671703262, "learning_rate": 9.885041328177625e-06, "loss": 0.7013, "step": 750 }, { "epoch": 0.5455247088734025, "grad_norm": 0.8914720633431127, "learning_rate": 9.884531810897115e-06, "loss": 0.6885, "step": 751 }, { "epoch": 0.5462511066215695, "grad_norm": 0.8559206929302882, "learning_rate": 9.884021180166164e-06, "loss": 0.683, "step": 752 }, { "epoch": 0.5469775043697365, "grad_norm": 0.8223803501330558, "learning_rate": 9.883509436101168e-06, "loss": 0.6741, "step": 753 }, { "epoch": 0.5477039021179034, "grad_norm": 0.8233029896285243, "learning_rate": 9.882996578818786e-06, "loss": 0.6931, "step": 754 }, { "epoch": 0.5484302998660704, "grad_norm": 0.8167149294453134, "learning_rate": 9.882482608435924e-06, "loss": 0.6798, "step": 755 }, { "epoch": 0.5491566976142374, "grad_norm": 0.8157887270833556, "learning_rate": 9.881967525069745e-06, "loss": 0.6746, "step": 756 }, { "epoch": 0.5498830953624044, "grad_norm": 0.786640198971054, "learning_rate": 9.881451328837663e-06, "loss": 0.7005, "step": 757 }, { "epoch": 0.5506094931105714, "grad_norm": 0.7642969715869354, "learning_rate": 9.880934019857346e-06, "loss": 0.6806, "step": 758 }, { "epoch": 0.5513358908587384, "grad_norm": 0.7827436991540868, "learning_rate": 9.880415598246722e-06, "loss": 0.6789, "step": 759 }, { "epoch": 0.5520622886069053, "grad_norm": 0.7550650650499403, "learning_rate": 9.879896064123961e-06, "loss": 0.6859, "step": 760 }, { "epoch": 0.5527886863550723, "grad_norm": 0.7314415455404134, "learning_rate": 9.879375417607498e-06, "loss": 0.673, "step": 761 }, { "epoch": 0.5535150841032392, "grad_norm": 0.725667106303544, "learning_rate": 9.878853658816015e-06, "loss": 0.6722, "step": 762 }, { "epoch": 0.5542414818514063, "grad_norm": 0.7253088889076802, "learning_rate": 9.878330787868448e-06, "loss": 0.6831, "step": 763 }, { "epoch": 0.5549678795995733, "grad_norm": 0.7118746003515296, "learning_rate": 9.87780680488399e-06, "loss": 0.6854, "step": 764 }, { "epoch": 0.5556942773477402, "grad_norm": 0.7124344265859947, "learning_rate": 9.877281709982082e-06, "loss": 0.6668, "step": 765 }, { "epoch": 0.5564206750959072, "grad_norm": 0.6870086540392398, "learning_rate": 9.876755503282425e-06, "loss": 0.669, "step": 766 }, { "epoch": 0.5571470728440742, "grad_norm": 0.6921474230850522, "learning_rate": 9.87622818490497e-06, "loss": 0.6735, "step": 767 }, { "epoch": 0.5578734705922411, "grad_norm": 0.6691283320223589, "learning_rate": 9.875699754969919e-06, "loss": 0.6788, "step": 768 }, { "epoch": 0.5585998683404081, "grad_norm": 0.7111757456950618, "learning_rate": 9.875170213597731e-06, "loss": 0.68, "step": 769 }, { "epoch": 0.5593262660885752, "grad_norm": 0.6912163957683205, "learning_rate": 9.874639560909118e-06, "loss": 0.6835, "step": 770 }, { "epoch": 0.5600526638367421, "grad_norm": 0.7059079937216522, "learning_rate": 9.874107797025045e-06, "loss": 0.6836, "step": 771 }, { "epoch": 0.5607790615849091, "grad_norm": 0.6943502146356935, "learning_rate": 9.87357492206673e-06, "loss": 0.688, "step": 772 }, { "epoch": 0.561505459333076, "grad_norm": 0.663025793246858, "learning_rate": 9.87304093615564e-06, "loss": 0.6801, "step": 773 }, { "epoch": 0.562231857081243, "grad_norm": 0.680073642969289, "learning_rate": 9.872505839413504e-06, "loss": 0.6752, "step": 774 }, { "epoch": 0.56295825482941, "grad_norm": 0.8247131197237683, "learning_rate": 9.8719696319623e-06, "loss": 0.6897, "step": 775 }, { "epoch": 0.563684652577577, "grad_norm": 0.6800749444531121, "learning_rate": 9.871432313924255e-06, "loss": 0.6944, "step": 776 }, { "epoch": 0.564411050325744, "grad_norm": 0.6589468936996172, "learning_rate": 9.870893885421856e-06, "loss": 0.6891, "step": 777 }, { "epoch": 0.565137448073911, "grad_norm": 0.6699789187345543, "learning_rate": 9.870354346577839e-06, "loss": 0.6689, "step": 778 }, { "epoch": 0.5658638458220779, "grad_norm": 0.6407811046259845, "learning_rate": 9.869813697515197e-06, "loss": 0.6662, "step": 779 }, { "epoch": 0.5665902435702449, "grad_norm": 0.6621715745362748, "learning_rate": 9.869271938357168e-06, "loss": 0.6789, "step": 780 }, { "epoch": 0.5673166413184119, "grad_norm": 0.6559166860078044, "learning_rate": 9.868729069227253e-06, "loss": 0.6734, "step": 781 }, { "epoch": 0.5680430390665789, "grad_norm": 0.6934565492823604, "learning_rate": 9.868185090249198e-06, "loss": 0.6946, "step": 782 }, { "epoch": 0.5687694368147459, "grad_norm": 0.6727050469819293, "learning_rate": 9.867640001547007e-06, "loss": 0.6851, "step": 783 }, { "epoch": 0.5694958345629129, "grad_norm": 0.6829495739508391, "learning_rate": 9.867093803244935e-06, "loss": 0.664, "step": 784 }, { "epoch": 0.5702222323110798, "grad_norm": 0.6481246326312373, "learning_rate": 9.866546495467493e-06, "loss": 0.6779, "step": 785 }, { "epoch": 0.5709486300592468, "grad_norm": 0.6439766189909788, "learning_rate": 9.86599807833944e-06, "loss": 0.6581, "step": 786 }, { "epoch": 0.5716750278074137, "grad_norm": 0.687659375861452, "learning_rate": 9.865448551985788e-06, "loss": 0.6766, "step": 787 }, { "epoch": 0.5724014255555808, "grad_norm": 0.6283754813822696, "learning_rate": 9.864897916531807e-06, "loss": 0.6718, "step": 788 }, { "epoch": 0.5731278233037478, "grad_norm": 0.6560916613823546, "learning_rate": 9.864346172103016e-06, "loss": 0.6791, "step": 789 }, { "epoch": 0.5738542210519147, "grad_norm": 0.6579158607817118, "learning_rate": 9.863793318825186e-06, "loss": 0.6719, "step": 790 }, { "epoch": 0.5745806188000817, "grad_norm": 0.6209174204285736, "learning_rate": 9.863239356824346e-06, "loss": 0.6668, "step": 791 }, { "epoch": 0.5753070165482487, "grad_norm": 0.6674035902894433, "learning_rate": 9.862684286226769e-06, "loss": 0.663, "step": 792 }, { "epoch": 0.5760334142964156, "grad_norm": 0.6390144070498434, "learning_rate": 9.86212810715899e-06, "loss": 0.6734, "step": 793 }, { "epoch": 0.5767598120445827, "grad_norm": 0.6627367196299603, "learning_rate": 9.86157081974779e-06, "loss": 0.6527, "step": 794 }, { "epoch": 0.5774862097927497, "grad_norm": 0.6596657350941416, "learning_rate": 9.861012424120207e-06, "loss": 0.6535, "step": 795 }, { "epoch": 0.5782126075409166, "grad_norm": 0.6778250210754974, "learning_rate": 9.860452920403528e-06, "loss": 0.6806, "step": 796 }, { "epoch": 0.5789390052890836, "grad_norm": 0.7402223552090328, "learning_rate": 9.859892308725296e-06, "loss": 0.6659, "step": 797 }, { "epoch": 0.5796654030372506, "grad_norm": 0.6521000937099793, "learning_rate": 9.859330589213302e-06, "loss": 0.6747, "step": 798 }, { "epoch": 0.5803918007854175, "grad_norm": 0.6589709652600224, "learning_rate": 9.858767761995597e-06, "loss": 0.6516, "step": 799 }, { "epoch": 0.5811181985335846, "grad_norm": 0.6221340208951421, "learning_rate": 9.858203827200477e-06, "loss": 0.6575, "step": 800 }, { "epoch": 0.5818445962817516, "grad_norm": 1.4392475113310477, "learning_rate": 9.857638784956492e-06, "loss": 0.6735, "step": 801 }, { "epoch": 0.5825709940299185, "grad_norm": 0.6345408144241541, "learning_rate": 9.857072635392449e-06, "loss": 0.6704, "step": 802 }, { "epoch": 0.5832973917780855, "grad_norm": 0.7024596655943957, "learning_rate": 9.856505378637402e-06, "loss": 0.6829, "step": 803 }, { "epoch": 0.5840237895262524, "grad_norm": 0.6292491947390643, "learning_rate": 9.85593701482066e-06, "loss": 0.6877, "step": 804 }, { "epoch": 0.5847501872744194, "grad_norm": 0.6444928744841502, "learning_rate": 9.855367544071785e-06, "loss": 0.664, "step": 805 }, { "epoch": 0.5854765850225865, "grad_norm": 0.6334451683246128, "learning_rate": 9.85479696652059e-06, "loss": 0.6696, "step": 806 }, { "epoch": 0.5862029827707534, "grad_norm": 0.6809663655418011, "learning_rate": 9.854225282297141e-06, "loss": 0.6951, "step": 807 }, { "epoch": 0.5869293805189204, "grad_norm": 0.6633437669132748, "learning_rate": 9.853652491531756e-06, "loss": 0.6822, "step": 808 }, { "epoch": 0.5876557782670874, "grad_norm": 0.6756561406729703, "learning_rate": 9.853078594355003e-06, "loss": 0.6729, "step": 809 }, { "epoch": 0.5883821760152543, "grad_norm": 0.6175772675068528, "learning_rate": 9.852503590897708e-06, "loss": 0.6654, "step": 810 }, { "epoch": 0.5891085737634213, "grad_norm": 0.6342072664742784, "learning_rate": 9.851927481290943e-06, "loss": 0.6813, "step": 811 }, { "epoch": 0.5898349715115884, "grad_norm": 0.615481235538996, "learning_rate": 9.851350265666034e-06, "loss": 0.6569, "step": 812 }, { "epoch": 0.5905613692597553, "grad_norm": 0.623506210218369, "learning_rate": 9.850771944154563e-06, "loss": 0.664, "step": 813 }, { "epoch": 0.5912877670079223, "grad_norm": 0.6268532226267906, "learning_rate": 9.850192516888357e-06, "loss": 0.662, "step": 814 }, { "epoch": 0.5920141647560893, "grad_norm": 0.6137453262606134, "learning_rate": 9.849611983999503e-06, "loss": 0.6481, "step": 815 }, { "epoch": 0.5927405625042562, "grad_norm": 0.593844174928628, "learning_rate": 9.849030345620333e-06, "loss": 0.6664, "step": 816 }, { "epoch": 0.5934669602524232, "grad_norm": 0.6066439569327111, "learning_rate": 9.848447601883436e-06, "loss": 0.6625, "step": 817 }, { "epoch": 0.5941933580005901, "grad_norm": 0.6131220553241871, "learning_rate": 9.847863752921649e-06, "loss": 0.6769, "step": 818 }, { "epoch": 0.5949197557487572, "grad_norm": 0.6127604757106416, "learning_rate": 9.847278798868065e-06, "loss": 0.6475, "step": 819 }, { "epoch": 0.5956461534969242, "grad_norm": 0.6099319552963688, "learning_rate": 9.846692739856023e-06, "loss": 0.6714, "step": 820 }, { "epoch": 0.5963725512450911, "grad_norm": 0.6400652371963013, "learning_rate": 9.846105576019124e-06, "loss": 0.6549, "step": 821 }, { "epoch": 0.5970989489932581, "grad_norm": 0.6455148000021561, "learning_rate": 9.84551730749121e-06, "loss": 0.671, "step": 822 }, { "epoch": 0.5978253467414251, "grad_norm": 0.6703898500769329, "learning_rate": 9.844927934406381e-06, "loss": 0.6672, "step": 823 }, { "epoch": 0.598551744489592, "grad_norm": 0.6818434803159491, "learning_rate": 9.844337456898985e-06, "loss": 0.6559, "step": 824 }, { "epoch": 0.5992781422377591, "grad_norm": 0.6307691400318461, "learning_rate": 9.843745875103628e-06, "loss": 0.6497, "step": 825 }, { "epoch": 0.6000045399859261, "grad_norm": 0.605302882547688, "learning_rate": 9.84315318915516e-06, "loss": 0.6723, "step": 826 }, { "epoch": 0.600730937734093, "grad_norm": 0.6250992899123806, "learning_rate": 9.842559399188687e-06, "loss": 0.6481, "step": 827 }, { "epoch": 0.60145733548226, "grad_norm": 0.6090951991297758, "learning_rate": 9.841964505339568e-06, "loss": 0.6534, "step": 828 }, { "epoch": 0.602183733230427, "grad_norm": 0.6731271428571752, "learning_rate": 9.84136850774341e-06, "loss": 0.6479, "step": 829 }, { "epoch": 0.6029101309785939, "grad_norm": 0.6039650209903596, "learning_rate": 9.840771406536073e-06, "loss": 0.6731, "step": 830 }, { "epoch": 0.603636528726761, "grad_norm": 0.6011413219903619, "learning_rate": 9.84017320185367e-06, "loss": 0.6838, "step": 831 }, { "epoch": 0.604362926474928, "grad_norm": 0.6247705736995238, "learning_rate": 9.839573893832564e-06, "loss": 0.6723, "step": 832 }, { "epoch": 0.6050893242230949, "grad_norm": 0.6220282560246873, "learning_rate": 9.83897348260937e-06, "loss": 0.6511, "step": 833 }, { "epoch": 0.6058157219712619, "grad_norm": 0.6196889968424526, "learning_rate": 9.838371968320951e-06, "loss": 0.6669, "step": 834 }, { "epoch": 0.6065421197194288, "grad_norm": 0.6183641181183734, "learning_rate": 9.837769351104431e-06, "loss": 0.6492, "step": 835 }, { "epoch": 0.6072685174675958, "grad_norm": 0.7105892308887408, "learning_rate": 9.837165631097177e-06, "loss": 0.66, "step": 836 }, { "epoch": 0.6079949152157629, "grad_norm": 0.5994749592259667, "learning_rate": 9.836560808436806e-06, "loss": 0.6766, "step": 837 }, { "epoch": 0.6087213129639298, "grad_norm": 0.6002717353440816, "learning_rate": 9.835954883261195e-06, "loss": 0.6705, "step": 838 }, { "epoch": 0.6094477107120968, "grad_norm": 0.6091493878958004, "learning_rate": 9.835347855708464e-06, "loss": 0.6582, "step": 839 }, { "epoch": 0.6101741084602638, "grad_norm": 0.5968783813759632, "learning_rate": 9.834739725916988e-06, "loss": 0.6562, "step": 840 }, { "epoch": 0.6109005062084307, "grad_norm": 0.7226406343809145, "learning_rate": 9.834130494025395e-06, "loss": 0.6682, "step": 841 }, { "epoch": 0.6116269039565977, "grad_norm": 0.6236245151795735, "learning_rate": 9.833520160172561e-06, "loss": 0.6716, "step": 842 }, { "epoch": 0.6123533017047648, "grad_norm": 0.649954848171266, "learning_rate": 9.832908724497613e-06, "loss": 0.6793, "step": 843 }, { "epoch": 0.6130796994529317, "grad_norm": 0.8026775812212916, "learning_rate": 9.83229618713993e-06, "loss": 0.6482, "step": 844 }, { "epoch": 0.6138060972010987, "grad_norm": 0.6549633112318536, "learning_rate": 9.831682548239145e-06, "loss": 0.6554, "step": 845 }, { "epoch": 0.6145324949492657, "grad_norm": 0.6713749440634522, "learning_rate": 9.83106780793514e-06, "loss": 0.6501, "step": 846 }, { "epoch": 0.6152588926974326, "grad_norm": 0.5976092035267176, "learning_rate": 9.830451966368044e-06, "loss": 0.6531, "step": 847 }, { "epoch": 0.6159852904455996, "grad_norm": 0.6049813779727117, "learning_rate": 9.829835023678243e-06, "loss": 0.6511, "step": 848 }, { "epoch": 0.6167116881937666, "grad_norm": 0.6075110205247508, "learning_rate": 9.829216980006372e-06, "loss": 0.6521, "step": 849 }, { "epoch": 0.6174380859419336, "grad_norm": 0.6252951495072839, "learning_rate": 9.828597835493319e-06, "loss": 0.6458, "step": 850 }, { "epoch": 0.6181644836901006, "grad_norm": 0.6655929334398094, "learning_rate": 9.827977590280217e-06, "loss": 0.6442, "step": 851 }, { "epoch": 0.6188908814382675, "grad_norm": 0.5849309071329679, "learning_rate": 9.827356244508455e-06, "loss": 0.654, "step": 852 }, { "epoch": 0.6196172791864345, "grad_norm": 0.6329417045286834, "learning_rate": 9.82673379831967e-06, "loss": 0.6541, "step": 853 }, { "epoch": 0.6203436769346015, "grad_norm": 0.6746285553018466, "learning_rate": 9.826110251855755e-06, "loss": 0.6468, "step": 854 }, { "epoch": 0.6210700746827685, "grad_norm": 0.6625458089858239, "learning_rate": 9.825485605258846e-06, "loss": 0.6475, "step": 855 }, { "epoch": 0.6217964724309355, "grad_norm": 0.5924986665162189, "learning_rate": 9.824859858671337e-06, "loss": 0.6533, "step": 856 }, { "epoch": 0.6225228701791025, "grad_norm": 0.5925928031908367, "learning_rate": 9.824233012235869e-06, "loss": 0.6523, "step": 857 }, { "epoch": 0.6232492679272694, "grad_norm": 0.6605467068380393, "learning_rate": 9.823605066095333e-06, "loss": 0.6655, "step": 858 }, { "epoch": 0.6239756656754364, "grad_norm": 0.5758378226313285, "learning_rate": 9.822976020392874e-06, "loss": 0.6527, "step": 859 }, { "epoch": 0.6247020634236033, "grad_norm": 0.8187112632079937, "learning_rate": 9.822345875271884e-06, "loss": 0.6708, "step": 860 }, { "epoch": 0.6254284611717704, "grad_norm": 0.6091453080384412, "learning_rate": 9.821714630876009e-06, "loss": 0.6741, "step": 861 }, { "epoch": 0.6261548589199374, "grad_norm": 0.6029200767372679, "learning_rate": 9.821082287349142e-06, "loss": 0.652, "step": 862 }, { "epoch": 0.6268812566681043, "grad_norm": 0.6106188486403386, "learning_rate": 9.820448844835428e-06, "loss": 0.6535, "step": 863 }, { "epoch": 0.6276076544162713, "grad_norm": 0.5798197726465589, "learning_rate": 9.819814303479268e-06, "loss": 0.6448, "step": 864 }, { "epoch": 0.6283340521644383, "grad_norm": 0.5943359460871429, "learning_rate": 9.819178663425302e-06, "loss": 0.6593, "step": 865 }, { "epoch": 0.6290604499126052, "grad_norm": 0.5711044824370614, "learning_rate": 9.818541924818432e-06, "loss": 0.6672, "step": 866 }, { "epoch": 0.6297868476607722, "grad_norm": 0.6952982586972026, "learning_rate": 9.817904087803802e-06, "loss": 0.6609, "step": 867 }, { "epoch": 0.6305132454089393, "grad_norm": 0.5666054412389501, "learning_rate": 9.817265152526811e-06, "loss": 0.6608, "step": 868 }, { "epoch": 0.6312396431571062, "grad_norm": 0.607448748434137, "learning_rate": 9.816625119133109e-06, "loss": 0.6532, "step": 869 }, { "epoch": 0.6319660409052732, "grad_norm": 0.6135073373387316, "learning_rate": 9.81598398776859e-06, "loss": 0.657, "step": 870 }, { "epoch": 0.6326924386534402, "grad_norm": 0.5926396668336961, "learning_rate": 9.815341758579407e-06, "loss": 0.6576, "step": 871 }, { "epoch": 0.6334188364016071, "grad_norm": 0.5597381238227889, "learning_rate": 9.814698431711957e-06, "loss": 0.6547, "step": 872 }, { "epoch": 0.6341452341497741, "grad_norm": 0.5823788201141964, "learning_rate": 9.814054007312888e-06, "loss": 0.6552, "step": 873 }, { "epoch": 0.6348716318979412, "grad_norm": 0.5756772937219624, "learning_rate": 9.813408485529103e-06, "loss": 0.664, "step": 874 }, { "epoch": 0.6355980296461081, "grad_norm": 0.5809430277549485, "learning_rate": 9.812761866507748e-06, "loss": 0.665, "step": 875 }, { "epoch": 0.6363244273942751, "grad_norm": 0.5656279015120884, "learning_rate": 9.812114150396223e-06, "loss": 0.6685, "step": 876 }, { "epoch": 0.637050825142442, "grad_norm": 0.5645400936452617, "learning_rate": 9.81146533734218e-06, "loss": 0.6661, "step": 877 }, { "epoch": 0.637777222890609, "grad_norm": 0.6038920581138288, "learning_rate": 9.810815427493516e-06, "loss": 0.6513, "step": 878 }, { "epoch": 0.638503620638776, "grad_norm": 0.6287288278527075, "learning_rate": 9.810164420998385e-06, "loss": 0.6378, "step": 879 }, { "epoch": 0.639230018386943, "grad_norm": 0.5533432856351014, "learning_rate": 9.80951231800518e-06, "loss": 0.6503, "step": 880 }, { "epoch": 0.63995641613511, "grad_norm": 0.5470769024145264, "learning_rate": 9.808859118662558e-06, "loss": 0.6487, "step": 881 }, { "epoch": 0.640682813883277, "grad_norm": 0.5735833277625101, "learning_rate": 9.808204823119414e-06, "loss": 0.6383, "step": 882 }, { "epoch": 0.6414092116314439, "grad_norm": 0.556322686472196, "learning_rate": 9.8075494315249e-06, "loss": 0.6465, "step": 883 }, { "epoch": 0.6421356093796109, "grad_norm": 0.5843117762599995, "learning_rate": 9.806892944028414e-06, "loss": 0.662, "step": 884 }, { "epoch": 0.6428620071277779, "grad_norm": 0.5499746337779424, "learning_rate": 9.806235360779605e-06, "loss": 0.6469, "step": 885 }, { "epoch": 0.6435884048759449, "grad_norm": 0.5773912530605604, "learning_rate": 9.805576681928373e-06, "loss": 0.6573, "step": 886 }, { "epoch": 0.6443148026241119, "grad_norm": 0.5699879773421873, "learning_rate": 9.804916907624865e-06, "loss": 0.6674, "step": 887 }, { "epoch": 0.6450412003722789, "grad_norm": 0.5789871539035835, "learning_rate": 9.804256038019482e-06, "loss": 0.6588, "step": 888 }, { "epoch": 0.6457675981204458, "grad_norm": 0.5261604408640989, "learning_rate": 9.80359407326287e-06, "loss": 0.6562, "step": 889 }, { "epoch": 0.6464939958686128, "grad_norm": 0.5557539019590454, "learning_rate": 9.802931013505929e-06, "loss": 0.6409, "step": 890 }, { "epoch": 0.6472203936167797, "grad_norm": 0.5481978932889137, "learning_rate": 9.802266858899802e-06, "loss": 0.645, "step": 891 }, { "epoch": 0.6479467913649468, "grad_norm": 0.5563785530002918, "learning_rate": 9.801601609595892e-06, "loss": 0.6453, "step": 892 }, { "epoch": 0.6486731891131138, "grad_norm": 0.5694891433544842, "learning_rate": 9.80093526574584e-06, "loss": 0.6409, "step": 893 }, { "epoch": 0.6493995868612807, "grad_norm": 0.6636903196693987, "learning_rate": 9.800267827501548e-06, "loss": 0.6245, "step": 894 }, { "epoch": 0.6501259846094477, "grad_norm": 0.6284598297309115, "learning_rate": 9.799599295015154e-06, "loss": 0.6366, "step": 895 }, { "epoch": 0.6508523823576147, "grad_norm": 0.5437492801963629, "learning_rate": 9.798929668439059e-06, "loss": 0.6618, "step": 896 }, { "epoch": 0.6515787801057816, "grad_norm": 0.5833253680539345, "learning_rate": 9.798258947925905e-06, "loss": 0.6482, "step": 897 }, { "epoch": 0.6523051778539487, "grad_norm": 0.639449411834674, "learning_rate": 9.797587133628586e-06, "loss": 0.6472, "step": 898 }, { "epoch": 0.6530315756021157, "grad_norm": 0.535584341412608, "learning_rate": 9.796914225700243e-06, "loss": 0.6593, "step": 899 }, { "epoch": 0.6537579733502826, "grad_norm": 0.547567869110903, "learning_rate": 9.79624022429427e-06, "loss": 0.6393, "step": 900 }, { "epoch": 0.6544843710984496, "grad_norm": 0.5522922741935506, "learning_rate": 9.79556512956431e-06, "loss": 0.6572, "step": 901 }, { "epoch": 0.6552107688466166, "grad_norm": 0.5893600739438992, "learning_rate": 9.794888941664253e-06, "loss": 0.6416, "step": 902 }, { "epoch": 0.6559371665947835, "grad_norm": 0.5351461232055588, "learning_rate": 9.794211660748239e-06, "loss": 0.6421, "step": 903 }, { "epoch": 0.6566635643429506, "grad_norm": 0.5945928584609042, "learning_rate": 9.793533286970655e-06, "loss": 0.6706, "step": 904 }, { "epoch": 0.6573899620911176, "grad_norm": 0.5570387654513104, "learning_rate": 9.79285382048614e-06, "loss": 0.6413, "step": 905 }, { "epoch": 0.6581163598392845, "grad_norm": 0.5846520230724995, "learning_rate": 9.792173261449586e-06, "loss": 0.653, "step": 906 }, { "epoch": 0.6588427575874515, "grad_norm": 0.5526390975042158, "learning_rate": 9.791491610016123e-06, "loss": 0.65, "step": 907 }, { "epoch": 0.6595691553356184, "grad_norm": 0.5574061987078701, "learning_rate": 9.79080886634114e-06, "loss": 0.6425, "step": 908 }, { "epoch": 0.6602955530837854, "grad_norm": 0.5341499920110605, "learning_rate": 9.79012503058027e-06, "loss": 0.6521, "step": 909 }, { "epoch": 0.6610219508319525, "grad_norm": 0.5791055056403942, "learning_rate": 9.789440102889396e-06, "loss": 0.658, "step": 910 }, { "epoch": 0.6617483485801194, "grad_norm": 0.5260761897292887, "learning_rate": 9.788754083424654e-06, "loss": 0.6401, "step": 911 }, { "epoch": 0.6624747463282864, "grad_norm": 0.5109150407815444, "learning_rate": 9.788066972342421e-06, "loss": 0.6506, "step": 912 }, { "epoch": 0.6632011440764534, "grad_norm": 0.5319111825792613, "learning_rate": 9.787378769799326e-06, "loss": 0.652, "step": 913 }, { "epoch": 0.6639275418246203, "grad_norm": 0.5240092882015561, "learning_rate": 9.786689475952254e-06, "loss": 0.6445, "step": 914 }, { "epoch": 0.6646539395727873, "grad_norm": 0.5440903216518904, "learning_rate": 9.785999090958326e-06, "loss": 0.6308, "step": 915 }, { "epoch": 0.6653803373209543, "grad_norm": 0.5182863646250728, "learning_rate": 9.785307614974922e-06, "loss": 0.6353, "step": 916 }, { "epoch": 0.6661067350691213, "grad_norm": 0.5407517444632997, "learning_rate": 9.784615048159665e-06, "loss": 0.6567, "step": 917 }, { "epoch": 0.6668331328172883, "grad_norm": 0.5595401196963885, "learning_rate": 9.78392139067043e-06, "loss": 0.6618, "step": 918 }, { "epoch": 0.6675595305654553, "grad_norm": 0.5281983812603945, "learning_rate": 9.783226642665338e-06, "loss": 0.637, "step": 919 }, { "epoch": 0.6682859283136222, "grad_norm": 0.588468629477807, "learning_rate": 9.782530804302763e-06, "loss": 0.6406, "step": 920 }, { "epoch": 0.6690123260617892, "grad_norm": 0.5387289554693768, "learning_rate": 9.78183387574132e-06, "loss": 0.6327, "step": 921 }, { "epoch": 0.6697387238099561, "grad_norm": 0.5521325969515342, "learning_rate": 9.781135857139881e-06, "loss": 0.6374, "step": 922 }, { "epoch": 0.6704651215581232, "grad_norm": 0.5453121610390395, "learning_rate": 9.780436748657559e-06, "loss": 0.6742, "step": 923 }, { "epoch": 0.6711915193062902, "grad_norm": 0.527286669061359, "learning_rate": 9.77973655045372e-06, "loss": 0.648, "step": 924 }, { "epoch": 0.6719179170544571, "grad_norm": 0.5625960128189396, "learning_rate": 9.779035262687976e-06, "loss": 0.6288, "step": 925 }, { "epoch": 0.6726443148026241, "grad_norm": 0.5362827537791287, "learning_rate": 9.778332885520195e-06, "loss": 0.6277, "step": 926 }, { "epoch": 0.6733707125507911, "grad_norm": 0.745633309649563, "learning_rate": 9.777629419110478e-06, "loss": 0.636, "step": 927 }, { "epoch": 0.674097110298958, "grad_norm": 0.5743650133346059, "learning_rate": 9.776924863619187e-06, "loss": 0.6313, "step": 928 }, { "epoch": 0.6748235080471251, "grad_norm": 0.5339811952680448, "learning_rate": 9.776219219206933e-06, "loss": 0.6534, "step": 929 }, { "epoch": 0.6755499057952921, "grad_norm": 0.5412542702478056, "learning_rate": 9.775512486034564e-06, "loss": 0.6439, "step": 930 }, { "epoch": 0.676276303543459, "grad_norm": 0.5390435892309067, "learning_rate": 9.774804664263184e-06, "loss": 0.658, "step": 931 }, { "epoch": 0.677002701291626, "grad_norm": 0.6095626038690631, "learning_rate": 9.774095754054147e-06, "loss": 0.6488, "step": 932 }, { "epoch": 0.677729099039793, "grad_norm": 0.5497445658882337, "learning_rate": 9.773385755569052e-06, "loss": 0.6394, "step": 933 }, { "epoch": 0.6784554967879599, "grad_norm": 0.5966473206171481, "learning_rate": 9.772674668969743e-06, "loss": 0.6429, "step": 934 }, { "epoch": 0.679181894536127, "grad_norm": 0.5590698378286874, "learning_rate": 9.771962494418319e-06, "loss": 0.6359, "step": 935 }, { "epoch": 0.679908292284294, "grad_norm": 0.5339631288047351, "learning_rate": 9.77124923207712e-06, "loss": 0.6255, "step": 936 }, { "epoch": 0.6806346900324609, "grad_norm": 0.5693835323242853, "learning_rate": 9.77053488210874e-06, "loss": 0.6454, "step": 937 }, { "epoch": 0.6813610877806279, "grad_norm": 0.5136483877400199, "learning_rate": 9.769819444676017e-06, "loss": 0.6423, "step": 938 }, { "epoch": 0.6820874855287948, "grad_norm": 0.5644953061878822, "learning_rate": 9.769102919942038e-06, "loss": 0.6327, "step": 939 }, { "epoch": 0.6828138832769618, "grad_norm": 0.5243587189424204, "learning_rate": 9.768385308070139e-06, "loss": 0.6523, "step": 940 }, { "epoch": 0.6835402810251289, "grad_norm": 0.53271625284346, "learning_rate": 9.767666609223902e-06, "loss": 0.6441, "step": 941 }, { "epoch": 0.6842666787732958, "grad_norm": 0.5814209737223036, "learning_rate": 9.766946823567157e-06, "loss": 0.619, "step": 942 }, { "epoch": 0.6849930765214628, "grad_norm": 0.5309212150196624, "learning_rate": 9.766225951263982e-06, "loss": 0.6108, "step": 943 }, { "epoch": 0.6857194742696298, "grad_norm": 0.5374951063410888, "learning_rate": 9.765503992478704e-06, "loss": 0.627, "step": 944 }, { "epoch": 0.6864458720177967, "grad_norm": 0.5142193404257767, "learning_rate": 9.764780947375898e-06, "loss": 0.65, "step": 945 }, { "epoch": 0.6871722697659637, "grad_norm": 0.5385889304668647, "learning_rate": 9.764056816120383e-06, "loss": 0.6447, "step": 946 }, { "epoch": 0.6878986675141308, "grad_norm": 0.5079349675047481, "learning_rate": 9.763331598877229e-06, "loss": 0.6252, "step": 947 }, { "epoch": 0.6886250652622977, "grad_norm": 0.5252424116585818, "learning_rate": 9.762605295811751e-06, "loss": 0.6378, "step": 948 }, { "epoch": 0.6893514630104647, "grad_norm": 0.5351541129299291, "learning_rate": 9.761877907089515e-06, "loss": 0.6316, "step": 949 }, { "epoch": 0.6900778607586316, "grad_norm": 0.5213029147033851, "learning_rate": 9.761149432876331e-06, "loss": 0.6434, "step": 950 }, { "epoch": 0.6908042585067986, "grad_norm": 0.5287068047571596, "learning_rate": 9.760419873338261e-06, "loss": 0.6434, "step": 951 }, { "epoch": 0.6915306562549656, "grad_norm": 0.5534499293711467, "learning_rate": 9.759689228641605e-06, "loss": 0.6395, "step": 952 }, { "epoch": 0.6922570540031326, "grad_norm": 0.586784866270694, "learning_rate": 9.758957498952923e-06, "loss": 0.653, "step": 953 }, { "epoch": 0.6929834517512996, "grad_norm": 0.5266464417997495, "learning_rate": 9.758224684439013e-06, "loss": 0.6266, "step": 954 }, { "epoch": 0.6937098494994666, "grad_norm": 0.5122612880356957, "learning_rate": 9.757490785266924e-06, "loss": 0.6347, "step": 955 }, { "epoch": 0.6944362472476335, "grad_norm": 0.6375844075472203, "learning_rate": 9.75675580160395e-06, "loss": 0.625, "step": 956 }, { "epoch": 0.6951626449958005, "grad_norm": 0.5212655025584345, "learning_rate": 9.756019733617634e-06, "loss": 0.6396, "step": 957 }, { "epoch": 0.6958890427439675, "grad_norm": 0.5086286936385832, "learning_rate": 9.755282581475769e-06, "loss": 0.6342, "step": 958 }, { "epoch": 0.6966154404921344, "grad_norm": 0.5128731180861688, "learning_rate": 9.754544345346388e-06, "loss": 0.6292, "step": 959 }, { "epoch": 0.6973418382403015, "grad_norm": 0.4999617398047923, "learning_rate": 9.75380502539778e-06, "loss": 0.6339, "step": 960 }, { "epoch": 0.6980682359884685, "grad_norm": 0.5282274063606959, "learning_rate": 9.753064621798472e-06, "loss": 0.6432, "step": 961 }, { "epoch": 0.6987946337366354, "grad_norm": 0.5252127904810339, "learning_rate": 9.752323134717244e-06, "loss": 0.6281, "step": 962 }, { "epoch": 0.6995210314848024, "grad_norm": 0.5082613730934245, "learning_rate": 9.75158056432312e-06, "loss": 0.6338, "step": 963 }, { "epoch": 0.7002474292329693, "grad_norm": 0.4882829401463821, "learning_rate": 9.750836910785372e-06, "loss": 0.6304, "step": 964 }, { "epoch": 0.7009738269811363, "grad_norm": 0.5538873366273053, "learning_rate": 9.75009217427352e-06, "loss": 0.6355, "step": 965 }, { "epoch": 0.7017002247293034, "grad_norm": 0.7956754652954774, "learning_rate": 9.749346354957331e-06, "loss": 0.6462, "step": 966 }, { "epoch": 0.7024266224774703, "grad_norm": 0.5219908542511834, "learning_rate": 9.748599453006818e-06, "loss": 0.6265, "step": 967 }, { "epoch": 0.7031530202256373, "grad_norm": 0.5210128918221577, "learning_rate": 9.74785146859224e-06, "loss": 0.6537, "step": 968 }, { "epoch": 0.7038794179738043, "grad_norm": 0.5040833546403364, "learning_rate": 9.7471024018841e-06, "loss": 0.6385, "step": 969 }, { "epoch": 0.7046058157219712, "grad_norm": 0.5001664206030246, "learning_rate": 9.746352253053155e-06, "loss": 0.6313, "step": 970 }, { "epoch": 0.7053322134701382, "grad_norm": 0.5098747327571058, "learning_rate": 9.745601022270403e-06, "loss": 0.633, "step": 971 }, { "epoch": 0.7060586112183053, "grad_norm": 0.8219230135157451, "learning_rate": 9.74484870970709e-06, "loss": 0.6325, "step": 972 }, { "epoch": 0.7067850089664722, "grad_norm": 0.5041778633169351, "learning_rate": 9.744095315534711e-06, "loss": 0.6515, "step": 973 }, { "epoch": 0.7075114067146392, "grad_norm": 0.48370176730119585, "learning_rate": 9.743340839925006e-06, "loss": 0.6188, "step": 974 }, { "epoch": 0.7082378044628062, "grad_norm": 0.5875403728810346, "learning_rate": 9.742585283049957e-06, "loss": 0.6616, "step": 975 }, { "epoch": 0.7089642022109731, "grad_norm": 0.6310835977211383, "learning_rate": 9.741828645081799e-06, "loss": 0.6453, "step": 976 }, { "epoch": 0.7096905999591401, "grad_norm": 0.545599500733757, "learning_rate": 9.741070926193013e-06, "loss": 0.6322, "step": 977 }, { "epoch": 0.7104169977073072, "grad_norm": 0.5017837062704927, "learning_rate": 9.74031212655632e-06, "loss": 0.6512, "step": 978 }, { "epoch": 0.7111433954554741, "grad_norm": 0.503250513244053, "learning_rate": 9.739552246344692e-06, "loss": 0.6547, "step": 979 }, { "epoch": 0.7118697932036411, "grad_norm": 0.5163358870991519, "learning_rate": 9.738791285731353e-06, "loss": 0.6483, "step": 980 }, { "epoch": 0.712596190951808, "grad_norm": 0.49564125514372964, "learning_rate": 9.73802924488976e-06, "loss": 0.6462, "step": 981 }, { "epoch": 0.713322588699975, "grad_norm": 0.4913113083401416, "learning_rate": 9.737266123993627e-06, "loss": 0.6428, "step": 982 }, { "epoch": 0.714048986448142, "grad_norm": 0.5095254384295473, "learning_rate": 9.736501923216912e-06, "loss": 0.6188, "step": 983 }, { "epoch": 0.714775384196309, "grad_norm": 0.5189549653207551, "learning_rate": 9.735736642733816e-06, "loss": 0.6122, "step": 984 }, { "epoch": 0.715501781944476, "grad_norm": 0.5112989759132592, "learning_rate": 9.734970282718788e-06, "loss": 0.6334, "step": 985 }, { "epoch": 0.716228179692643, "grad_norm": 0.5107580279579825, "learning_rate": 9.734202843346522e-06, "loss": 0.6252, "step": 986 }, { "epoch": 0.7169545774408099, "grad_norm": 0.5167080885659902, "learning_rate": 9.733434324791962e-06, "loss": 0.6401, "step": 987 }, { "epoch": 0.7176809751889769, "grad_norm": 0.5023084975379272, "learning_rate": 9.732664727230293e-06, "loss": 0.6281, "step": 988 }, { "epoch": 0.7184073729371439, "grad_norm": 0.537562102338342, "learning_rate": 9.731894050836952e-06, "loss": 0.6246, "step": 989 }, { "epoch": 0.7191337706853109, "grad_norm": 0.48088299234652243, "learning_rate": 9.731122295787611e-06, "loss": 0.6354, "step": 990 }, { "epoch": 0.7198601684334779, "grad_norm": 0.563544077811699, "learning_rate": 9.730349462258202e-06, "loss": 0.6261, "step": 991 }, { "epoch": 0.7205865661816448, "grad_norm": 0.49876787412524776, "learning_rate": 9.729575550424892e-06, "loss": 0.6181, "step": 992 }, { "epoch": 0.7213129639298118, "grad_norm": 0.527873118148596, "learning_rate": 9.728800560464097e-06, "loss": 0.6362, "step": 993 }, { "epoch": 0.7220393616779788, "grad_norm": 0.6547177893128939, "learning_rate": 9.728024492552484e-06, "loss": 0.6312, "step": 994 }, { "epoch": 0.7227657594261457, "grad_norm": 0.4938014707944413, "learning_rate": 9.727247346866955e-06, "loss": 0.6386, "step": 995 }, { "epoch": 0.7234921571743128, "grad_norm": 0.4969962276566467, "learning_rate": 9.726469123584668e-06, "loss": 0.6301, "step": 996 }, { "epoch": 0.7242185549224798, "grad_norm": 0.5610061497738755, "learning_rate": 9.72568982288302e-06, "loss": 0.6261, "step": 997 }, { "epoch": 0.7249449526706467, "grad_norm": 0.48231769992974666, "learning_rate": 9.724909444939657e-06, "loss": 0.6321, "step": 998 }, { "epoch": 0.7256713504188137, "grad_norm": 0.5566561152996569, "learning_rate": 9.72412798993247e-06, "loss": 0.6301, "step": 999 }, { "epoch": 0.7263977481669807, "grad_norm": 0.4740045617404099, "learning_rate": 9.723345458039595e-06, "loss": 0.6333, "step": 1000 }, { "epoch": 0.7271241459151476, "grad_norm": 0.4673201163985113, "learning_rate": 9.722561849439414e-06, "loss": 0.6273, "step": 1001 }, { "epoch": 0.7278505436633147, "grad_norm": 0.5108892842569576, "learning_rate": 9.72177716431055e-06, "loss": 0.6239, "step": 1002 }, { "epoch": 0.7285769414114817, "grad_norm": 0.5098263048545012, "learning_rate": 9.720991402831883e-06, "loss": 0.6012, "step": 1003 }, { "epoch": 0.7293033391596486, "grad_norm": 0.5815800988061166, "learning_rate": 9.720204565182522e-06, "loss": 0.6204, "step": 1004 }, { "epoch": 0.7300297369078156, "grad_norm": 0.47676217623350187, "learning_rate": 9.719416651541839e-06, "loss": 0.6105, "step": 1005 }, { "epoch": 0.7307561346559825, "grad_norm": 0.47753745865698777, "learning_rate": 9.718627662089435e-06, "loss": 0.6395, "step": 1006 }, { "epoch": 0.7314825324041495, "grad_norm": 0.4879838290382639, "learning_rate": 9.717837597005169e-06, "loss": 0.639, "step": 1007 }, { "epoch": 0.7322089301523165, "grad_norm": 0.5013716494090865, "learning_rate": 9.717046456469135e-06, "loss": 0.6325, "step": 1008 }, { "epoch": 0.7329353279004835, "grad_norm": 0.4751272371204352, "learning_rate": 9.71625424066168e-06, "loss": 0.6241, "step": 1009 }, { "epoch": 0.7336617256486505, "grad_norm": 0.4699052765127366, "learning_rate": 9.715460949763393e-06, "loss": 0.6285, "step": 1010 }, { "epoch": 0.7343881233968175, "grad_norm": 0.5191334092088646, "learning_rate": 9.714666583955108e-06, "loss": 0.6228, "step": 1011 }, { "epoch": 0.7351145211449844, "grad_norm": 0.49374498413992257, "learning_rate": 9.713871143417902e-06, "loss": 0.6441, "step": 1012 }, { "epoch": 0.7358409188931514, "grad_norm": 0.4863582873661774, "learning_rate": 9.713074628333102e-06, "loss": 0.6254, "step": 1013 }, { "epoch": 0.7365673166413184, "grad_norm": 0.4498108134181778, "learning_rate": 9.712277038882274e-06, "loss": 0.6301, "step": 1014 }, { "epoch": 0.7372937143894854, "grad_norm": 0.5126199564731452, "learning_rate": 9.711478375247237e-06, "loss": 0.6335, "step": 1015 }, { "epoch": 0.7380201121376524, "grad_norm": 0.49607691586084646, "learning_rate": 9.710678637610045e-06, "loss": 0.6268, "step": 1016 }, { "epoch": 0.7387465098858194, "grad_norm": 0.458163972923393, "learning_rate": 9.709877826153003e-06, "loss": 0.6478, "step": 1017 }, { "epoch": 0.7394729076339863, "grad_norm": 0.4874017595835269, "learning_rate": 9.709075941058661e-06, "loss": 0.6251, "step": 1018 }, { "epoch": 0.7401993053821533, "grad_norm": 0.47017693490570733, "learning_rate": 9.70827298250981e-06, "loss": 0.6363, "step": 1019 }, { "epoch": 0.7409257031303202, "grad_norm": 0.6968313114729715, "learning_rate": 9.70746895068949e-06, "loss": 0.6149, "step": 1020 }, { "epoch": 0.7416521008784873, "grad_norm": 0.4965074758910424, "learning_rate": 9.706663845780984e-06, "loss": 0.6154, "step": 1021 }, { "epoch": 0.7423784986266543, "grad_norm": 0.48551084123096894, "learning_rate": 9.705857667967818e-06, "loss": 0.6312, "step": 1022 }, { "epoch": 0.7431048963748212, "grad_norm": 0.4866557666908839, "learning_rate": 9.705050417433762e-06, "loss": 0.6192, "step": 1023 }, { "epoch": 0.7438312941229882, "grad_norm": 0.5025491441411499, "learning_rate": 9.704242094362834e-06, "loss": 0.6086, "step": 1024 }, { "epoch": 0.7445576918711552, "grad_norm": 0.4786396836500014, "learning_rate": 9.703432698939294e-06, "loss": 0.6355, "step": 1025 }, { "epoch": 0.7452840896193221, "grad_norm": 0.5195325928953927, "learning_rate": 9.70262223134765e-06, "loss": 0.6176, "step": 1026 }, { "epoch": 0.7460104873674892, "grad_norm": 0.47770348548058295, "learning_rate": 9.70181069177265e-06, "loss": 0.6208, "step": 1027 }, { "epoch": 0.7467368851156562, "grad_norm": 0.5669878887712634, "learning_rate": 9.700998080399287e-06, "loss": 0.6118, "step": 1028 }, { "epoch": 0.7474632828638231, "grad_norm": 0.4697665700113132, "learning_rate": 9.700184397412799e-06, "loss": 0.6287, "step": 1029 }, { "epoch": 0.7481896806119901, "grad_norm": 0.4642121247185043, "learning_rate": 9.699369642998671e-06, "loss": 0.627, "step": 1030 }, { "epoch": 0.7489160783601571, "grad_norm": 0.48927006449177757, "learning_rate": 9.698553817342627e-06, "loss": 0.6185, "step": 1031 }, { "epoch": 0.749642476108324, "grad_norm": 0.5077132474206864, "learning_rate": 9.697736920630641e-06, "loss": 0.6382, "step": 1032 }, { "epoch": 0.7503688738564911, "grad_norm": 0.4849060148767091, "learning_rate": 9.696918953048925e-06, "loss": 0.6371, "step": 1033 }, { "epoch": 0.751095271604658, "grad_norm": 0.4952415892198469, "learning_rate": 9.69609991478394e-06, "loss": 0.6219, "step": 1034 }, { "epoch": 0.751821669352825, "grad_norm": 0.46870581026203334, "learning_rate": 9.695279806022391e-06, "loss": 0.6062, "step": 1035 }, { "epoch": 0.752548067100992, "grad_norm": 0.44552062529184516, "learning_rate": 9.694458626951223e-06, "loss": 0.6336, "step": 1036 }, { "epoch": 0.7532744648491589, "grad_norm": 0.4701788285387683, "learning_rate": 9.693636377757628e-06, "loss": 0.6348, "step": 1037 }, { "epoch": 0.7540008625973259, "grad_norm": 0.4930902095299255, "learning_rate": 9.692813058629041e-06, "loss": 0.6309, "step": 1038 }, { "epoch": 0.754727260345493, "grad_norm": 0.513865274475057, "learning_rate": 9.691988669753141e-06, "loss": 0.6313, "step": 1039 }, { "epoch": 0.7554536580936599, "grad_norm": 0.5004488299102964, "learning_rate": 9.691163211317853e-06, "loss": 0.6186, "step": 1040 }, { "epoch": 0.7561800558418269, "grad_norm": 0.4632958610592306, "learning_rate": 9.69033668351134e-06, "loss": 0.6335, "step": 1041 }, { "epoch": 0.7569064535899939, "grad_norm": 0.5029063747150795, "learning_rate": 9.689509086522019e-06, "loss": 0.6247, "step": 1042 }, { "epoch": 0.7576328513381608, "grad_norm": 0.4634748963399614, "learning_rate": 9.688680420538539e-06, "loss": 0.6309, "step": 1043 }, { "epoch": 0.7583592490863278, "grad_norm": 0.4718510576080903, "learning_rate": 9.6878506857498e-06, "loss": 0.6371, "step": 1044 }, { "epoch": 0.7590856468344949, "grad_norm": 0.5131883999829031, "learning_rate": 9.687019882344942e-06, "loss": 0.6227, "step": 1045 }, { "epoch": 0.7598120445826618, "grad_norm": 0.5286803442259869, "learning_rate": 9.68618801051335e-06, "loss": 0.6347, "step": 1046 }, { "epoch": 0.7605384423308288, "grad_norm": 0.4592287328761153, "learning_rate": 9.685355070444658e-06, "loss": 0.6289, "step": 1047 }, { "epoch": 0.7612648400789958, "grad_norm": 0.46453734575529443, "learning_rate": 9.684521062328734e-06, "loss": 0.6167, "step": 1048 }, { "epoch": 0.7619912378271627, "grad_norm": 0.554800979203622, "learning_rate": 9.683685986355692e-06, "loss": 0.6174, "step": 1049 }, { "epoch": 0.7627176355753297, "grad_norm": 0.47868566678845503, "learning_rate": 9.682849842715895e-06, "loss": 0.6169, "step": 1050 }, { "epoch": 0.7634440333234968, "grad_norm": 0.4752654997453638, "learning_rate": 9.682012631599945e-06, "loss": 0.6115, "step": 1051 }, { "epoch": 0.7641704310716637, "grad_norm": 0.4779615895205545, "learning_rate": 9.681174353198687e-06, "loss": 0.6145, "step": 1052 }, { "epoch": 0.7648968288198307, "grad_norm": 0.48803918870499163, "learning_rate": 9.68033500770321e-06, "loss": 0.6149, "step": 1053 }, { "epoch": 0.7656232265679976, "grad_norm": 0.4696941476074349, "learning_rate": 9.679494595304848e-06, "loss": 0.6327, "step": 1054 }, { "epoch": 0.7663496243161646, "grad_norm": 0.4844539384776647, "learning_rate": 9.678653116195174e-06, "loss": 0.6239, "step": 1055 }, { "epoch": 0.7670760220643316, "grad_norm": 0.6568435300988817, "learning_rate": 9.677810570566011e-06, "loss": 0.6181, "step": 1056 }, { "epoch": 0.7678024198124985, "grad_norm": 0.5597089201336259, "learning_rate": 9.676966958609417e-06, "loss": 0.6265, "step": 1057 }, { "epoch": 0.7685288175606656, "grad_norm": 0.5421661140844151, "learning_rate": 9.676122280517699e-06, "loss": 0.6243, "step": 1058 }, { "epoch": 0.7692552153088326, "grad_norm": 0.45296333286927, "learning_rate": 9.675276536483404e-06, "loss": 0.6265, "step": 1059 }, { "epoch": 0.7699816130569995, "grad_norm": 0.4950287756900467, "learning_rate": 9.674429726699324e-06, "loss": 0.6139, "step": 1060 }, { "epoch": 0.7707080108051665, "grad_norm": 0.4530383471143353, "learning_rate": 9.673581851358493e-06, "loss": 0.613, "step": 1061 }, { "epoch": 0.7714344085533335, "grad_norm": 0.46904027613626054, "learning_rate": 9.672732910654187e-06, "loss": 0.61, "step": 1062 }, { "epoch": 0.7721608063015004, "grad_norm": 0.4562341083900913, "learning_rate": 9.671882904779927e-06, "loss": 0.6019, "step": 1063 }, { "epoch": 0.7728872040496675, "grad_norm": 0.47972675828391576, "learning_rate": 9.671031833929474e-06, "loss": 0.6012, "step": 1064 }, { "epoch": 0.7736136017978344, "grad_norm": 0.4889221372366856, "learning_rate": 9.670179698296835e-06, "loss": 0.6057, "step": 1065 }, { "epoch": 0.7743399995460014, "grad_norm": 0.4998365163823747, "learning_rate": 9.669326498076258e-06, "loss": 0.6244, "step": 1066 }, { "epoch": 0.7750663972941684, "grad_norm": 0.4461796783543215, "learning_rate": 9.668472233462232e-06, "loss": 0.5957, "step": 1067 }, { "epoch": 0.7757927950423353, "grad_norm": 0.4653827741141469, "learning_rate": 9.667616904649491e-06, "loss": 0.6223, "step": 1068 }, { "epoch": 0.7765191927905023, "grad_norm": 0.5101486161304896, "learning_rate": 9.666760511833014e-06, "loss": 0.6206, "step": 1069 }, { "epoch": 0.7772455905386694, "grad_norm": 0.44938945973356537, "learning_rate": 9.665903055208013e-06, "loss": 0.614, "step": 1070 }, { "epoch": 0.7779719882868363, "grad_norm": 0.48210108522020845, "learning_rate": 9.665044534969957e-06, "loss": 0.6294, "step": 1071 }, { "epoch": 0.7786983860350033, "grad_norm": 0.536826645391065, "learning_rate": 9.664184951314542e-06, "loss": 0.6216, "step": 1072 }, { "epoch": 0.7794247837831703, "grad_norm": 0.46290611415210603, "learning_rate": 9.663324304437717e-06, "loss": 0.6162, "step": 1073 }, { "epoch": 0.7801511815313372, "grad_norm": 0.4389114689546384, "learning_rate": 9.662462594535674e-06, "loss": 0.6107, "step": 1074 }, { "epoch": 0.7808775792795042, "grad_norm": 0.4572319500457869, "learning_rate": 9.661599821804836e-06, "loss": 0.6241, "step": 1075 }, { "epoch": 0.7816039770276713, "grad_norm": 0.4827522638295839, "learning_rate": 9.660735986441881e-06, "loss": 0.6122, "step": 1076 }, { "epoch": 0.7823303747758382, "grad_norm": 0.4740115259109735, "learning_rate": 9.659871088643724e-06, "loss": 0.6115, "step": 1077 }, { "epoch": 0.7830567725240052, "grad_norm": 0.4590734004873104, "learning_rate": 9.659005128607518e-06, "loss": 0.6185, "step": 1078 }, { "epoch": 0.7837831702721721, "grad_norm": 0.8749921694047537, "learning_rate": 9.65813810653067e-06, "loss": 0.5945, "step": 1079 }, { "epoch": 0.7845095680203391, "grad_norm": 0.7035666241156626, "learning_rate": 9.657270022610814e-06, "loss": 0.6091, "step": 1080 }, { "epoch": 0.7852359657685061, "grad_norm": 0.5346422298106133, "learning_rate": 9.656400877045836e-06, "loss": 0.6152, "step": 1081 }, { "epoch": 0.7859623635166731, "grad_norm": 0.4782527712539082, "learning_rate": 9.655530670033866e-06, "loss": 0.6225, "step": 1082 }, { "epoch": 0.7866887612648401, "grad_norm": 0.47908779964086995, "learning_rate": 9.654659401773264e-06, "loss": 0.6161, "step": 1083 }, { "epoch": 0.7874151590130071, "grad_norm": 0.4391207715095105, "learning_rate": 9.653787072462644e-06, "loss": 0.6166, "step": 1084 }, { "epoch": 0.788141556761174, "grad_norm": 0.4423155828257749, "learning_rate": 9.652913682300856e-06, "loss": 0.5925, "step": 1085 }, { "epoch": 0.788867954509341, "grad_norm": 0.4425248921573, "learning_rate": 9.652039231486993e-06, "loss": 0.6095, "step": 1086 }, { "epoch": 0.789594352257508, "grad_norm": 0.4700153894358856, "learning_rate": 9.651163720220391e-06, "loss": 0.6084, "step": 1087 }, { "epoch": 0.790320750005675, "grad_norm": 0.5178001098713066, "learning_rate": 9.650287148700626e-06, "loss": 0.6016, "step": 1088 }, { "epoch": 0.791047147753842, "grad_norm": 0.47127932138489764, "learning_rate": 9.649409517127515e-06, "loss": 0.632, "step": 1089 }, { "epoch": 0.791773545502009, "grad_norm": 0.5332912769127331, "learning_rate": 9.648530825701118e-06, "loss": 0.6172, "step": 1090 }, { "epoch": 0.7924999432501759, "grad_norm": 0.5257897079993382, "learning_rate": 9.647651074621741e-06, "loss": 0.6212, "step": 1091 }, { "epoch": 0.7932263409983429, "grad_norm": 0.5086619699814738, "learning_rate": 9.646770264089921e-06, "loss": 0.6206, "step": 1092 }, { "epoch": 0.7939527387465098, "grad_norm": 0.47689364802107714, "learning_rate": 9.645888394306448e-06, "loss": 0.6112, "step": 1093 }, { "epoch": 0.7946791364946769, "grad_norm": 0.5028782234896939, "learning_rate": 9.645005465472345e-06, "loss": 0.6258, "step": 1094 }, { "epoch": 0.7954055342428439, "grad_norm": 0.48796076270457783, "learning_rate": 9.644121477788879e-06, "loss": 0.6159, "step": 1095 }, { "epoch": 0.7961319319910108, "grad_norm": 0.4538044670324512, "learning_rate": 9.643236431457561e-06, "loss": 0.608, "step": 1096 }, { "epoch": 0.7968583297391778, "grad_norm": 0.4616355979057923, "learning_rate": 9.64235032668014e-06, "loss": 0.6164, "step": 1097 }, { "epoch": 0.7975847274873448, "grad_norm": 0.5297894983069743, "learning_rate": 9.641463163658606e-06, "loss": 0.6352, "step": 1098 }, { "epoch": 0.7983111252355117, "grad_norm": 0.4591303780851485, "learning_rate": 9.640574942595195e-06, "loss": 0.5976, "step": 1099 }, { "epoch": 0.7990375229836788, "grad_norm": 0.465064202103376, "learning_rate": 9.63968566369238e-06, "loss": 0.614, "step": 1100 }, { "epoch": 0.7997639207318458, "grad_norm": 0.4665514161467082, "learning_rate": 9.638795327152872e-06, "loss": 0.5919, "step": 1101 }, { "epoch": 0.8004903184800127, "grad_norm": 0.44878678129345034, "learning_rate": 9.637903933179633e-06, "loss": 0.6059, "step": 1102 }, { "epoch": 0.8012167162281797, "grad_norm": 0.4741833500595366, "learning_rate": 9.637011481975858e-06, "loss": 0.6378, "step": 1103 }, { "epoch": 0.8019431139763467, "grad_norm": 0.45791949887831834, "learning_rate": 9.636117973744982e-06, "loss": 0.6179, "step": 1104 }, { "epoch": 0.8026695117245136, "grad_norm": 0.5404917727674573, "learning_rate": 9.635223408690688e-06, "loss": 0.6081, "step": 1105 }, { "epoch": 0.8033959094726806, "grad_norm": 0.4476699095118304, "learning_rate": 9.634327787016894e-06, "loss": 0.6212, "step": 1106 }, { "epoch": 0.8041223072208477, "grad_norm": 0.4645084972315594, "learning_rate": 9.633431108927764e-06, "loss": 0.6103, "step": 1107 }, { "epoch": 0.8048487049690146, "grad_norm": 0.4591418402004859, "learning_rate": 9.632533374627695e-06, "loss": 0.624, "step": 1108 }, { "epoch": 0.8055751027171816, "grad_norm": 0.4264466790448802, "learning_rate": 9.631634584321334e-06, "loss": 0.604, "step": 1109 }, { "epoch": 0.8063015004653485, "grad_norm": 0.462102047288957, "learning_rate": 9.630734738213561e-06, "loss": 0.6193, "step": 1110 }, { "epoch": 0.8070278982135155, "grad_norm": 0.44591136659649727, "learning_rate": 9.6298338365095e-06, "loss": 0.5942, "step": 1111 }, { "epoch": 0.8077542959616825, "grad_norm": 0.5135813110167959, "learning_rate": 9.628931879414519e-06, "loss": 0.6073, "step": 1112 }, { "epoch": 0.8084806937098495, "grad_norm": 0.4301105343160582, "learning_rate": 9.628028867134216e-06, "loss": 0.5967, "step": 1113 }, { "epoch": 0.8092070914580165, "grad_norm": 0.6578782656668021, "learning_rate": 9.627124799874446e-06, "loss": 0.6026, "step": 1114 }, { "epoch": 0.8099334892061835, "grad_norm": 0.4958492633099008, "learning_rate": 9.626219677841287e-06, "loss": 0.6152, "step": 1115 }, { "epoch": 0.8106598869543504, "grad_norm": 0.4756410241779951, "learning_rate": 9.625313501241069e-06, "loss": 0.5805, "step": 1116 }, { "epoch": 0.8113862847025174, "grad_norm": 0.5433562306080713, "learning_rate": 9.62440627028036e-06, "loss": 0.626, "step": 1117 }, { "epoch": 0.8121126824506844, "grad_norm": 0.48943135901184903, "learning_rate": 9.623497985165964e-06, "loss": 0.6052, "step": 1118 }, { "epoch": 0.8128390801988514, "grad_norm": 0.5114674351446247, "learning_rate": 9.622588646104934e-06, "loss": 0.6023, "step": 1119 }, { "epoch": 0.8135654779470184, "grad_norm": 0.5220309988554744, "learning_rate": 9.62167825330455e-06, "loss": 0.6, "step": 1120 }, { "epoch": 0.8142918756951854, "grad_norm": 0.4994686649781941, "learning_rate": 9.620766806972348e-06, "loss": 0.5928, "step": 1121 }, { "epoch": 0.8150182734433523, "grad_norm": 0.4514390687848239, "learning_rate": 9.61985430731609e-06, "loss": 0.6155, "step": 1122 }, { "epoch": 0.8157446711915193, "grad_norm": 0.4681036653080852, "learning_rate": 9.618940754543788e-06, "loss": 0.5874, "step": 1123 }, { "epoch": 0.8164710689396862, "grad_norm": 0.4395085861388702, "learning_rate": 9.618026148863689e-06, "loss": 0.5947, "step": 1124 }, { "epoch": 0.8171974666878533, "grad_norm": 0.5053784535742852, "learning_rate": 9.617110490484281e-06, "loss": 0.6001, "step": 1125 }, { "epoch": 0.8179238644360203, "grad_norm": 0.4960217010563304, "learning_rate": 9.616193779614294e-06, "loss": 0.6125, "step": 1126 }, { "epoch": 0.8186502621841872, "grad_norm": 0.4852233185765258, "learning_rate": 9.615276016462694e-06, "loss": 0.631, "step": 1127 }, { "epoch": 0.8193766599323542, "grad_norm": 0.4550745865680181, "learning_rate": 9.61435720123869e-06, "loss": 0.6036, "step": 1128 }, { "epoch": 0.8201030576805212, "grad_norm": 0.46274283671479666, "learning_rate": 9.613437334151731e-06, "loss": 0.6051, "step": 1129 }, { "epoch": 0.8208294554286881, "grad_norm": 0.4768928920065255, "learning_rate": 9.612516415411505e-06, "loss": 0.6172, "step": 1130 }, { "epoch": 0.8215558531768552, "grad_norm": 0.4382177071505045, "learning_rate": 9.611594445227939e-06, "loss": 0.6208, "step": 1131 }, { "epoch": 0.8222822509250222, "grad_norm": 0.4637384188259406, "learning_rate": 9.610671423811197e-06, "loss": 0.6223, "step": 1132 }, { "epoch": 0.8230086486731891, "grad_norm": 0.4480710315760765, "learning_rate": 9.60974735137169e-06, "loss": 0.6044, "step": 1133 }, { "epoch": 0.8237350464213561, "grad_norm": 0.7018777620026823, "learning_rate": 9.608822228120063e-06, "loss": 0.6102, "step": 1134 }, { "epoch": 0.824461444169523, "grad_norm": 0.4466079797386302, "learning_rate": 9.607896054267201e-06, "loss": 0.6009, "step": 1135 }, { "epoch": 0.82518784191769, "grad_norm": 0.5051574955921055, "learning_rate": 9.60696883002423e-06, "loss": 0.6031, "step": 1136 }, { "epoch": 0.8259142396658571, "grad_norm": 0.4809646087862845, "learning_rate": 9.606040555602516e-06, "loss": 0.5941, "step": 1137 }, { "epoch": 0.826640637414024, "grad_norm": 0.44871620180448013, "learning_rate": 9.60511123121366e-06, "loss": 0.6103, "step": 1138 }, { "epoch": 0.827367035162191, "grad_norm": 0.49068878161291896, "learning_rate": 9.604180857069509e-06, "loss": 0.6228, "step": 1139 }, { "epoch": 0.828093432910358, "grad_norm": 0.4768595004531384, "learning_rate": 9.603249433382145e-06, "loss": 0.6194, "step": 1140 }, { "epoch": 0.8288198306585249, "grad_norm": 0.48342908704288834, "learning_rate": 9.602316960363889e-06, "loss": 0.6033, "step": 1141 }, { "epoch": 0.8295462284066919, "grad_norm": 0.6161075106997672, "learning_rate": 9.601383438227303e-06, "loss": 0.6043, "step": 1142 }, { "epoch": 0.830272626154859, "grad_norm": 0.4365995879925137, "learning_rate": 9.600448867185188e-06, "loss": 0.5937, "step": 1143 }, { "epoch": 0.8309990239030259, "grad_norm": 0.43924899950044805, "learning_rate": 9.599513247450581e-06, "loss": 0.6126, "step": 1144 }, { "epoch": 0.8317254216511929, "grad_norm": 0.4606375387545376, "learning_rate": 9.598576579236765e-06, "loss": 0.6205, "step": 1145 }, { "epoch": 0.8324518193993599, "grad_norm": 0.4370080723956695, "learning_rate": 9.597638862757255e-06, "loss": 0.6222, "step": 1146 }, { "epoch": 0.8331782171475268, "grad_norm": 0.45381509810289633, "learning_rate": 9.596700098225806e-06, "loss": 0.5969, "step": 1147 }, { "epoch": 0.8339046148956938, "grad_norm": 0.45557634817071224, "learning_rate": 9.595760285856418e-06, "loss": 0.5993, "step": 1148 }, { "epoch": 0.8346310126438609, "grad_norm": 0.4413227228028858, "learning_rate": 9.59481942586332e-06, "loss": 0.6113, "step": 1149 }, { "epoch": 0.8353574103920278, "grad_norm": 0.45948096434027247, "learning_rate": 9.593877518460988e-06, "loss": 0.6003, "step": 1150 }, { "epoch": 0.8360838081401948, "grad_norm": 0.4077180909410102, "learning_rate": 9.592934563864136e-06, "loss": 0.6172, "step": 1151 }, { "epoch": 0.8368102058883617, "grad_norm": 0.43642720072650515, "learning_rate": 9.59199056228771e-06, "loss": 0.5942, "step": 1152 }, { "epoch": 0.8375366036365287, "grad_norm": 0.4566716571217164, "learning_rate": 9.591045513946904e-06, "loss": 0.6288, "step": 1153 }, { "epoch": 0.8382630013846957, "grad_norm": 0.4256375577799388, "learning_rate": 9.590099419057142e-06, "loss": 0.5925, "step": 1154 }, { "epoch": 0.8389893991328626, "grad_norm": 0.43304644846968315, "learning_rate": 9.589152277834093e-06, "loss": 0.6201, "step": 1155 }, { "epoch": 0.8397157968810297, "grad_norm": 0.4100228725712963, "learning_rate": 9.58820409049366e-06, "loss": 0.5943, "step": 1156 }, { "epoch": 0.8404421946291967, "grad_norm": 0.4099528807583989, "learning_rate": 9.587254857251988e-06, "loss": 0.6109, "step": 1157 }, { "epoch": 0.8411685923773636, "grad_norm": 0.491774985036076, "learning_rate": 9.586304578325457e-06, "loss": 0.6096, "step": 1158 }, { "epoch": 0.8418949901255306, "grad_norm": 0.559738280848998, "learning_rate": 9.585353253930692e-06, "loss": 0.6069, "step": 1159 }, { "epoch": 0.8426213878736976, "grad_norm": 0.40139731209421825, "learning_rate": 9.584400884284546e-06, "loss": 0.6131, "step": 1160 }, { "epoch": 0.8433477856218645, "grad_norm": 0.4400340950055217, "learning_rate": 9.58344746960412e-06, "loss": 0.6129, "step": 1161 }, { "epoch": 0.8440741833700316, "grad_norm": 0.4166850878820713, "learning_rate": 9.582493010106744e-06, "loss": 0.6041, "step": 1162 }, { "epoch": 0.8448005811181986, "grad_norm": 0.5165919941194519, "learning_rate": 9.581537506009996e-06, "loss": 0.6115, "step": 1163 }, { "epoch": 0.8455269788663655, "grad_norm": 0.4151737437790665, "learning_rate": 9.580580957531685e-06, "loss": 0.5974, "step": 1164 }, { "epoch": 0.8462533766145325, "grad_norm": 0.4550231106805591, "learning_rate": 9.579623364889863e-06, "loss": 0.6238, "step": 1165 }, { "epoch": 0.8469797743626994, "grad_norm": 0.4506499145111714, "learning_rate": 9.578664728302813e-06, "loss": 0.608, "step": 1166 }, { "epoch": 0.8477061721108664, "grad_norm": 0.5499080210212972, "learning_rate": 9.577705047989065e-06, "loss": 0.6023, "step": 1167 }, { "epoch": 0.8484325698590335, "grad_norm": 0.625121033765801, "learning_rate": 9.57674432416738e-06, "loss": 0.5984, "step": 1168 }, { "epoch": 0.8491589676072004, "grad_norm": 1.1862443305257935, "learning_rate": 9.575782557056759e-06, "loss": 0.6052, "step": 1169 }, { "epoch": 0.8498853653553674, "grad_norm": 0.47769553696473244, "learning_rate": 9.57481974687644e-06, "loss": 0.5912, "step": 1170 }, { "epoch": 0.8506117631035344, "grad_norm": 0.4080077932005349, "learning_rate": 9.573855893845905e-06, "loss": 0.6235, "step": 1171 }, { "epoch": 0.8513381608517013, "grad_norm": 0.4761375945040686, "learning_rate": 9.572890998184862e-06, "loss": 0.606, "step": 1172 }, { "epoch": 0.8520645585998683, "grad_norm": 0.5766409060005017, "learning_rate": 9.571925060113267e-06, "loss": 0.5979, "step": 1173 }, { "epoch": 0.8527909563480354, "grad_norm": 0.42596241668640383, "learning_rate": 9.57095807985131e-06, "loss": 0.5915, "step": 1174 }, { "epoch": 0.8535173540962023, "grad_norm": 0.5056126132604277, "learning_rate": 9.569990057619414e-06, "loss": 0.5898, "step": 1175 }, { "epoch": 0.8542437518443693, "grad_norm": 0.43335266407709333, "learning_rate": 9.569020993638249e-06, "loss": 0.6022, "step": 1176 }, { "epoch": 0.8549701495925363, "grad_norm": 0.44207808281110866, "learning_rate": 9.568050888128716e-06, "loss": 0.6071, "step": 1177 }, { "epoch": 0.8556965473407032, "grad_norm": 0.4341399555907341, "learning_rate": 9.567079741311956e-06, "loss": 0.6043, "step": 1178 }, { "epoch": 0.8564229450888702, "grad_norm": 0.4369089309389326, "learning_rate": 9.566107553409341e-06, "loss": 0.587, "step": 1179 }, { "epoch": 0.8571493428370373, "grad_norm": 0.4599730151913197, "learning_rate": 9.565134324642491e-06, "loss": 0.6102, "step": 1180 }, { "epoch": 0.8578757405852042, "grad_norm": 0.43977979280579654, "learning_rate": 9.564160055233256e-06, "loss": 0.5987, "step": 1181 }, { "epoch": 0.8586021383333712, "grad_norm": 0.46783053277652853, "learning_rate": 9.563184745403725e-06, "loss": 0.6029, "step": 1182 }, { "epoch": 0.8593285360815381, "grad_norm": 0.44458871164794883, "learning_rate": 9.562208395376223e-06, "loss": 0.593, "step": 1183 }, { "epoch": 0.8600549338297051, "grad_norm": 0.5733471186258695, "learning_rate": 9.561231005373315e-06, "loss": 0.6064, "step": 1184 }, { "epoch": 0.8607813315778721, "grad_norm": 0.45341893514784476, "learning_rate": 9.560252575617798e-06, "loss": 0.5947, "step": 1185 }, { "epoch": 0.8615077293260391, "grad_norm": 0.5303361989038317, "learning_rate": 9.559273106332716e-06, "loss": 0.5921, "step": 1186 }, { "epoch": 0.8622341270742061, "grad_norm": 0.4370520610409579, "learning_rate": 9.558292597741337e-06, "loss": 0.607, "step": 1187 }, { "epoch": 0.8629605248223731, "grad_norm": 0.4175970592323233, "learning_rate": 9.557311050067175e-06, "loss": 0.6018, "step": 1188 }, { "epoch": 0.86368692257054, "grad_norm": 0.46416643253026196, "learning_rate": 9.556328463533976e-06, "loss": 0.6089, "step": 1189 }, { "epoch": 0.864413320318707, "grad_norm": 0.6168553625857448, "learning_rate": 9.555344838365727e-06, "loss": 0.5861, "step": 1190 }, { "epoch": 0.865139718066874, "grad_norm": 0.48915934761576074, "learning_rate": 9.554360174786651e-06, "loss": 0.6088, "step": 1191 }, { "epoch": 0.865866115815041, "grad_norm": 0.44675004959116177, "learning_rate": 9.553374473021204e-06, "loss": 0.5922, "step": 1192 }, { "epoch": 0.866592513563208, "grad_norm": 0.4385531991454444, "learning_rate": 9.552387733294081e-06, "loss": 0.5968, "step": 1193 }, { "epoch": 0.867318911311375, "grad_norm": 0.472160005452995, "learning_rate": 9.551399955830215e-06, "loss": 0.6009, "step": 1194 }, { "epoch": 0.8680453090595419, "grad_norm": 0.5114615003958931, "learning_rate": 9.550411140854772e-06, "loss": 0.5957, "step": 1195 }, { "epoch": 0.8687717068077089, "grad_norm": 0.46197727438219466, "learning_rate": 9.549421288593157e-06, "loss": 0.6035, "step": 1196 }, { "epoch": 0.8694981045558758, "grad_norm": 0.4505938549631761, "learning_rate": 9.548430399271013e-06, "loss": 0.6237, "step": 1197 }, { "epoch": 0.8702245023040429, "grad_norm": 0.43654822032779134, "learning_rate": 9.547438473114219e-06, "loss": 0.6089, "step": 1198 }, { "epoch": 0.8709509000522099, "grad_norm": 0.46067909068019247, "learning_rate": 9.546445510348884e-06, "loss": 0.61, "step": 1199 }, { "epoch": 0.8716772978003768, "grad_norm": 0.41694081713666115, "learning_rate": 9.545451511201365e-06, "loss": 0.5918, "step": 1200 }, { "epoch": 0.8724036955485438, "grad_norm": 0.46578768324984093, "learning_rate": 9.54445647589824e-06, "loss": 0.6005, "step": 1201 }, { "epoch": 0.8731300932967108, "grad_norm": 0.4438616477116534, "learning_rate": 9.543460404666338e-06, "loss": 0.5999, "step": 1202 }, { "epoch": 0.8738564910448777, "grad_norm": 0.48358201793997363, "learning_rate": 9.542463297732716e-06, "loss": 0.6064, "step": 1203 }, { "epoch": 0.8745828887930447, "grad_norm": 0.501153042263637, "learning_rate": 9.54146515532467e-06, "loss": 0.6203, "step": 1204 }, { "epoch": 0.8753092865412118, "grad_norm": 0.4521383558348136, "learning_rate": 9.540465977669728e-06, "loss": 0.6022, "step": 1205 }, { "epoch": 0.8760356842893787, "grad_norm": 0.5112596720285417, "learning_rate": 9.539465764995658e-06, "loss": 0.5977, "step": 1206 }, { "epoch": 0.8767620820375457, "grad_norm": 0.4861584352029897, "learning_rate": 9.538464517530466e-06, "loss": 0.5856, "step": 1207 }, { "epoch": 0.8774884797857126, "grad_norm": 0.4200265080369869, "learning_rate": 9.537462235502385e-06, "loss": 0.5866, "step": 1208 }, { "epoch": 0.8782148775338796, "grad_norm": 0.4261094133432591, "learning_rate": 9.536458919139894e-06, "loss": 0.5933, "step": 1209 }, { "epoch": 0.8789412752820466, "grad_norm": 0.4003104970002111, "learning_rate": 9.535454568671705e-06, "loss": 0.5844, "step": 1210 }, { "epoch": 0.8796676730302136, "grad_norm": 0.42295910863999364, "learning_rate": 9.53444918432676e-06, "loss": 0.6024, "step": 1211 }, { "epoch": 0.8803940707783806, "grad_norm": 0.4149494814967926, "learning_rate": 9.53344276633424e-06, "loss": 0.5863, "step": 1212 }, { "epoch": 0.8811204685265476, "grad_norm": 0.5110127220773379, "learning_rate": 9.532435314923566e-06, "loss": 0.5767, "step": 1213 }, { "epoch": 0.8818468662747145, "grad_norm": 0.43349333837655685, "learning_rate": 9.531426830324388e-06, "loss": 0.5951, "step": 1214 }, { "epoch": 0.8825732640228815, "grad_norm": 0.44997031713934815, "learning_rate": 9.530417312766598e-06, "loss": 0.6187, "step": 1215 }, { "epoch": 0.8832996617710485, "grad_norm": 0.4446591721155272, "learning_rate": 9.529406762480318e-06, "loss": 0.5985, "step": 1216 }, { "epoch": 0.8840260595192155, "grad_norm": 0.44120700182390005, "learning_rate": 9.528395179695907e-06, "loss": 0.5813, "step": 1217 }, { "epoch": 0.8847524572673825, "grad_norm": 0.4295735205310877, "learning_rate": 9.527382564643957e-06, "loss": 0.5971, "step": 1218 }, { "epoch": 0.8854788550155495, "grad_norm": 0.43730360118736156, "learning_rate": 9.526368917555304e-06, "loss": 0.6023, "step": 1219 }, { "epoch": 0.8862052527637164, "grad_norm": 0.43250587556589976, "learning_rate": 9.52535423866101e-06, "loss": 0.5893, "step": 1220 }, { "epoch": 0.8869316505118834, "grad_norm": 0.47439969227732015, "learning_rate": 9.524338528192375e-06, "loss": 0.5983, "step": 1221 }, { "epoch": 0.8876580482600503, "grad_norm": 0.43225531857326166, "learning_rate": 9.523321786380936e-06, "loss": 0.5784, "step": 1222 }, { "epoch": 0.8883844460082174, "grad_norm": 0.5170344250080916, "learning_rate": 9.522304013458464e-06, "loss": 0.6216, "step": 1223 }, { "epoch": 0.8891108437563844, "grad_norm": 0.42919086642378623, "learning_rate": 9.521285209656964e-06, "loss": 0.5881, "step": 1224 }, { "epoch": 0.8898372415045513, "grad_norm": 0.47659107028807385, "learning_rate": 9.520265375208675e-06, "loss": 0.5952, "step": 1225 }, { "epoch": 0.8905636392527183, "grad_norm": 0.41359128320071464, "learning_rate": 9.519244510346076e-06, "loss": 0.5908, "step": 1226 }, { "epoch": 0.8912900370008853, "grad_norm": 0.5644872809341976, "learning_rate": 9.518222615301875e-06, "loss": 0.5875, "step": 1227 }, { "epoch": 0.8920164347490522, "grad_norm": 0.47490025096662025, "learning_rate": 9.51719969030902e-06, "loss": 0.5917, "step": 1228 }, { "epoch": 0.8927428324972193, "grad_norm": 0.569369819329793, "learning_rate": 9.516175735600688e-06, "loss": 0.6024, "step": 1229 }, { "epoch": 0.8934692302453863, "grad_norm": 0.43225621864223424, "learning_rate": 9.515150751410298e-06, "loss": 0.6077, "step": 1230 }, { "epoch": 0.8941956279935532, "grad_norm": 0.708224935453519, "learning_rate": 9.514124737971495e-06, "loss": 0.5964, "step": 1231 }, { "epoch": 0.8949220257417202, "grad_norm": 0.4739759856133637, "learning_rate": 9.51309769551817e-06, "loss": 0.5763, "step": 1232 }, { "epoch": 0.8956484234898872, "grad_norm": 0.7728743977820823, "learning_rate": 9.512069624284433e-06, "loss": 0.5842, "step": 1233 }, { "epoch": 0.8963748212380541, "grad_norm": 0.43514666056639595, "learning_rate": 9.511040524504645e-06, "loss": 0.5967, "step": 1234 }, { "epoch": 0.8971012189862212, "grad_norm": 0.45557762839508126, "learning_rate": 9.510010396413389e-06, "loss": 0.5897, "step": 1235 }, { "epoch": 0.8978276167343882, "grad_norm": 0.5039340185306304, "learning_rate": 9.508979240245489e-06, "loss": 0.5773, "step": 1236 }, { "epoch": 0.8985540144825551, "grad_norm": 0.46341563582692274, "learning_rate": 9.507947056236003e-06, "loss": 0.5814, "step": 1237 }, { "epoch": 0.8992804122307221, "grad_norm": 0.47824940112014946, "learning_rate": 9.506913844620217e-06, "loss": 0.6016, "step": 1238 }, { "epoch": 0.900006809978889, "grad_norm": 0.46306899110926264, "learning_rate": 9.505879605633664e-06, "loss": 0.5969, "step": 1239 }, { "epoch": 0.900733207727056, "grad_norm": 0.4785112448900572, "learning_rate": 9.504844339512096e-06, "loss": 0.6181, "step": 1240 }, { "epoch": 0.9014596054752231, "grad_norm": 0.44806415943157596, "learning_rate": 9.503808046491511e-06, "loss": 0.5909, "step": 1241 }, { "epoch": 0.90218600322339, "grad_norm": 0.4039053308077116, "learning_rate": 9.502770726808133e-06, "loss": 0.5942, "step": 1242 }, { "epoch": 0.902912400971557, "grad_norm": 0.4988364921150387, "learning_rate": 9.501732380698426e-06, "loss": 0.6076, "step": 1243 }, { "epoch": 0.903638798719724, "grad_norm": 0.4467206176328996, "learning_rate": 9.500693008399085e-06, "loss": 0.6061, "step": 1244 }, { "epoch": 0.9043651964678909, "grad_norm": 0.40186754448407863, "learning_rate": 9.49965261014704e-06, "loss": 0.5745, "step": 1245 }, { "epoch": 0.9050915942160579, "grad_norm": 0.4649796467570825, "learning_rate": 9.498611186179454e-06, "loss": 0.5891, "step": 1246 }, { "epoch": 0.9058179919642249, "grad_norm": 0.4076705157572678, "learning_rate": 9.497568736733722e-06, "loss": 0.6031, "step": 1247 }, { "epoch": 0.9065443897123919, "grad_norm": 0.4512970825986025, "learning_rate": 9.496525262047479e-06, "loss": 0.589, "step": 1248 }, { "epoch": 0.9072707874605589, "grad_norm": 0.42034195469706914, "learning_rate": 9.495480762358585e-06, "loss": 0.5858, "step": 1249 }, { "epoch": 0.9079971852087259, "grad_norm": 0.42929899360092316, "learning_rate": 9.494435237905142e-06, "loss": 0.5743, "step": 1250 }, { "epoch": 0.9087235829568928, "grad_norm": 0.4118235244162934, "learning_rate": 9.493388688925481e-06, "loss": 0.5844, "step": 1251 }, { "epoch": 0.9094499807050598, "grad_norm": 0.41506895210123806, "learning_rate": 9.492341115658167e-06, "loss": 0.6049, "step": 1252 }, { "epoch": 0.9101763784532267, "grad_norm": 0.4165328260065925, "learning_rate": 9.491292518341998e-06, "loss": 0.6008, "step": 1253 }, { "epoch": 0.9109027762013938, "grad_norm": 0.38977236916049524, "learning_rate": 9.490242897216008e-06, "loss": 0.615, "step": 1254 }, { "epoch": 0.9116291739495608, "grad_norm": 0.46505897512184374, "learning_rate": 9.489192252519462e-06, "loss": 0.5814, "step": 1255 }, { "epoch": 0.9123555716977277, "grad_norm": 0.446928251555708, "learning_rate": 9.488140584491859e-06, "loss": 0.6047, "step": 1256 }, { "epoch": 0.9130819694458947, "grad_norm": 0.43706570616826285, "learning_rate": 9.487087893372933e-06, "loss": 0.5741, "step": 1257 }, { "epoch": 0.9138083671940617, "grad_norm": 0.4537167354168948, "learning_rate": 9.486034179402645e-06, "loss": 0.5896, "step": 1258 }, { "epoch": 0.9145347649422286, "grad_norm": 0.41506148446047436, "learning_rate": 9.484979442821199e-06, "loss": 0.5949, "step": 1259 }, { "epoch": 0.9152611626903957, "grad_norm": 0.4638563917811123, "learning_rate": 9.483923683869025e-06, "loss": 0.584, "step": 1260 }, { "epoch": 0.9159875604385627, "grad_norm": 0.41081717634677034, "learning_rate": 9.482866902786784e-06, "loss": 0.5958, "step": 1261 }, { "epoch": 0.9167139581867296, "grad_norm": 0.44940733112945636, "learning_rate": 9.481809099815382e-06, "loss": 0.5868, "step": 1262 }, { "epoch": 0.9174403559348966, "grad_norm": 0.5181433591757858, "learning_rate": 9.480750275195942e-06, "loss": 0.609, "step": 1263 }, { "epoch": 0.9181667536830636, "grad_norm": 0.4264257820314425, "learning_rate": 9.479690429169832e-06, "loss": 0.585, "step": 1264 }, { "epoch": 0.9188931514312305, "grad_norm": 0.4385872968678699, "learning_rate": 9.478629561978648e-06, "loss": 0.5789, "step": 1265 }, { "epoch": 0.9196195491793976, "grad_norm": 0.4339437079303383, "learning_rate": 9.477567673864217e-06, "loss": 0.5947, "step": 1266 }, { "epoch": 0.9203459469275646, "grad_norm": 0.4415046209883479, "learning_rate": 9.476504765068604e-06, "loss": 0.6006, "step": 1267 }, { "epoch": 0.9210723446757315, "grad_norm": 0.42129146459625944, "learning_rate": 9.475440835834103e-06, "loss": 0.582, "step": 1268 }, { "epoch": 0.9217987424238985, "grad_norm": 0.48232608637628704, "learning_rate": 9.474375886403239e-06, "loss": 0.5914, "step": 1269 }, { "epoch": 0.9225251401720654, "grad_norm": 0.4770042456279813, "learning_rate": 9.473309917018774e-06, "loss": 0.6019, "step": 1270 }, { "epoch": 0.9232515379202324, "grad_norm": 0.4536442707571275, "learning_rate": 9.4722429279237e-06, "loss": 0.5864, "step": 1271 }, { "epoch": 0.9239779356683995, "grad_norm": 0.42626513154423673, "learning_rate": 9.471174919361244e-06, "loss": 0.5763, "step": 1272 }, { "epoch": 0.9247043334165664, "grad_norm": 0.6205482644129555, "learning_rate": 9.47010589157486e-06, "loss": 0.5901, "step": 1273 }, { "epoch": 0.9254307311647334, "grad_norm": 0.43225121195112626, "learning_rate": 9.469035844808238e-06, "loss": 0.5956, "step": 1274 }, { "epoch": 0.9261571289129004, "grad_norm": 0.41283182941994373, "learning_rate": 9.467964779305304e-06, "loss": 0.5997, "step": 1275 }, { "epoch": 0.9268835266610673, "grad_norm": 0.41546967158855563, "learning_rate": 9.466892695310204e-06, "loss": 0.584, "step": 1276 }, { "epoch": 0.9276099244092343, "grad_norm": 0.4300218140453633, "learning_rate": 9.465819593067332e-06, "loss": 0.5849, "step": 1277 }, { "epoch": 0.9283363221574014, "grad_norm": 0.4456416918954236, "learning_rate": 9.464745472821302e-06, "loss": 0.5973, "step": 1278 }, { "epoch": 0.9290627199055683, "grad_norm": 0.4482037090374508, "learning_rate": 9.463670334816967e-06, "loss": 0.58, "step": 1279 }, { "epoch": 0.9297891176537353, "grad_norm": 0.46007508129114516, "learning_rate": 9.462594179299408e-06, "loss": 0.6001, "step": 1280 }, { "epoch": 0.9305155154019022, "grad_norm": 0.5011958549498003, "learning_rate": 9.461517006513938e-06, "loss": 0.5845, "step": 1281 }, { "epoch": 0.9312419131500692, "grad_norm": 0.42431402288059444, "learning_rate": 9.460438816706106e-06, "loss": 0.5905, "step": 1282 }, { "epoch": 0.9319683108982362, "grad_norm": 0.4502683304139396, "learning_rate": 9.459359610121688e-06, "loss": 0.5704, "step": 1283 }, { "epoch": 0.9326947086464032, "grad_norm": 0.3796874806100935, "learning_rate": 9.458279387006695e-06, "loss": 0.5768, "step": 1284 }, { "epoch": 0.9334211063945702, "grad_norm": 0.5004568592535342, "learning_rate": 9.45719814760737e-06, "loss": 0.5774, "step": 1285 }, { "epoch": 0.9341475041427372, "grad_norm": 0.4340577622957361, "learning_rate": 9.456115892170183e-06, "loss": 0.5911, "step": 1286 }, { "epoch": 0.9348739018909041, "grad_norm": 0.45670721572589634, "learning_rate": 9.45503262094184e-06, "loss": 0.599, "step": 1287 }, { "epoch": 0.9356002996390711, "grad_norm": 0.45572780467011514, "learning_rate": 9.453948334169279e-06, "loss": 0.5747, "step": 1288 }, { "epoch": 0.9363266973872381, "grad_norm": 0.38841356192268106, "learning_rate": 9.452863032099666e-06, "loss": 0.5971, "step": 1289 }, { "epoch": 0.9370530951354051, "grad_norm": 0.4298037377188982, "learning_rate": 9.451776714980402e-06, "loss": 0.5913, "step": 1290 }, { "epoch": 0.9377794928835721, "grad_norm": 0.41341305861326055, "learning_rate": 9.450689383059118e-06, "loss": 0.5796, "step": 1291 }, { "epoch": 0.9385058906317391, "grad_norm": 0.42660791554657346, "learning_rate": 9.449601036583673e-06, "loss": 0.5819, "step": 1292 }, { "epoch": 0.939232288379906, "grad_norm": 0.5598103689052227, "learning_rate": 9.448511675802162e-06, "loss": 0.5838, "step": 1293 }, { "epoch": 0.939958686128073, "grad_norm": 0.4586706687352272, "learning_rate": 9.447421300962911e-06, "loss": 0.5976, "step": 1294 }, { "epoch": 0.94068508387624, "grad_norm": 0.43063693058417746, "learning_rate": 9.446329912314475e-06, "loss": 0.5797, "step": 1295 }, { "epoch": 0.9414114816244069, "grad_norm": 0.4404865199677851, "learning_rate": 9.44523751010564e-06, "loss": 0.5937, "step": 1296 }, { "epoch": 0.942137879372574, "grad_norm": 0.4531349959997388, "learning_rate": 9.444144094585425e-06, "loss": 0.5941, "step": 1297 }, { "epoch": 0.942864277120741, "grad_norm": 0.5032016176400471, "learning_rate": 9.443049666003077e-06, "loss": 0.5945, "step": 1298 }, { "epoch": 0.9435906748689079, "grad_norm": 0.44126419665834615, "learning_rate": 9.441954224608079e-06, "loss": 0.5949, "step": 1299 }, { "epoch": 0.9443170726170749, "grad_norm": 0.45391630167344893, "learning_rate": 9.440857770650139e-06, "loss": 0.5898, "step": 1300 }, { "epoch": 0.9450434703652418, "grad_norm": 0.3956164187382792, "learning_rate": 9.439760304379197e-06, "loss": 0.5874, "step": 1301 }, { "epoch": 0.9457698681134088, "grad_norm": 0.4597343602525186, "learning_rate": 9.438661826045429e-06, "loss": 0.5841, "step": 1302 }, { "epoch": 0.9464962658615759, "grad_norm": 0.4933253827016849, "learning_rate": 9.437562335899237e-06, "loss": 0.5949, "step": 1303 }, { "epoch": 0.9472226636097428, "grad_norm": 0.42059079303606517, "learning_rate": 9.436461834191254e-06, "loss": 0.5735, "step": 1304 }, { "epoch": 0.9479490613579098, "grad_norm": 0.6604789856347021, "learning_rate": 9.435360321172344e-06, "loss": 0.5951, "step": 1305 }, { "epoch": 0.9486754591060768, "grad_norm": 0.6174395501507661, "learning_rate": 9.434257797093602e-06, "loss": 0.5904, "step": 1306 }, { "epoch": 0.9494018568542437, "grad_norm": 0.5221870207891371, "learning_rate": 9.433154262206353e-06, "loss": 0.6028, "step": 1307 }, { "epoch": 0.9501282546024107, "grad_norm": 0.4748141147379064, "learning_rate": 9.432049716762151e-06, "loss": 0.5967, "step": 1308 }, { "epoch": 0.9508546523505778, "grad_norm": 0.4557962640119291, "learning_rate": 9.430944161012784e-06, "loss": 0.5936, "step": 1309 }, { "epoch": 0.9515810500987447, "grad_norm": 0.4067491075418985, "learning_rate": 9.42983759521027e-06, "loss": 0.6003, "step": 1310 }, { "epoch": 0.9523074478469117, "grad_norm": 0.44211224151004935, "learning_rate": 9.428730019606853e-06, "loss": 0.5775, "step": 1311 }, { "epoch": 0.9530338455950786, "grad_norm": 0.407063349141801, "learning_rate": 9.42762143445501e-06, "loss": 0.59, "step": 1312 }, { "epoch": 0.9537602433432456, "grad_norm": 0.4028594981568491, "learning_rate": 9.426511840007448e-06, "loss": 0.5801, "step": 1313 }, { "epoch": 0.9544866410914126, "grad_norm": 0.3919749106342764, "learning_rate": 9.425401236517104e-06, "loss": 0.5905, "step": 1314 }, { "epoch": 0.9552130388395796, "grad_norm": 0.4032757710830502, "learning_rate": 9.424289624237143e-06, "loss": 0.6031, "step": 1315 }, { "epoch": 0.9559394365877466, "grad_norm": 0.4293862484316795, "learning_rate": 9.423177003420966e-06, "loss": 0.5708, "step": 1316 }, { "epoch": 0.9566658343359136, "grad_norm": 0.45285430043737923, "learning_rate": 9.422063374322197e-06, "loss": 0.5883, "step": 1317 }, { "epoch": 0.9573922320840805, "grad_norm": 0.5523709468876832, "learning_rate": 9.420948737194693e-06, "loss": 0.5866, "step": 1318 }, { "epoch": 0.9581186298322475, "grad_norm": 0.4459756657537176, "learning_rate": 9.419833092292542e-06, "loss": 0.5641, "step": 1319 }, { "epoch": 0.9588450275804145, "grad_norm": 0.48301638137861325, "learning_rate": 9.418716439870056e-06, "loss": 0.5865, "step": 1320 }, { "epoch": 0.9595714253285815, "grad_norm": 0.4128881676008887, "learning_rate": 9.417598780181787e-06, "loss": 0.6071, "step": 1321 }, { "epoch": 0.9602978230767485, "grad_norm": 0.44753948046420106, "learning_rate": 9.416480113482505e-06, "loss": 0.5822, "step": 1322 }, { "epoch": 0.9610242208249155, "grad_norm": 0.4279853381773522, "learning_rate": 9.415360440027216e-06, "loss": 0.5804, "step": 1323 }, { "epoch": 0.9617506185730824, "grad_norm": 0.4338581451320516, "learning_rate": 9.414239760071158e-06, "loss": 0.5887, "step": 1324 }, { "epoch": 0.9624770163212494, "grad_norm": 0.42860628721189403, "learning_rate": 9.413118073869791e-06, "loss": 0.5882, "step": 1325 }, { "epoch": 0.9632034140694163, "grad_norm": 0.44956892815122457, "learning_rate": 9.411995381678811e-06, "loss": 0.5782, "step": 1326 }, { "epoch": 0.9639298118175834, "grad_norm": 0.4814860194215449, "learning_rate": 9.410871683754139e-06, "loss": 0.5702, "step": 1327 }, { "epoch": 0.9646562095657504, "grad_norm": 0.42581017230827334, "learning_rate": 9.409746980351927e-06, "loss": 0.5678, "step": 1328 }, { "epoch": 0.9653826073139173, "grad_norm": 0.44826183504581213, "learning_rate": 9.408621271728555e-06, "loss": 0.5591, "step": 1329 }, { "epoch": 0.9661090050620843, "grad_norm": 0.5714153732960652, "learning_rate": 9.407494558140636e-06, "loss": 0.6072, "step": 1330 }, { "epoch": 0.9668354028102513, "grad_norm": 0.6836517336640814, "learning_rate": 9.406366839845004e-06, "loss": 0.577, "step": 1331 }, { "epoch": 0.9675618005584182, "grad_norm": 0.41510009176677465, "learning_rate": 9.405238117098736e-06, "loss": 0.582, "step": 1332 }, { "epoch": 0.9682881983065853, "grad_norm": 0.42043939764561933, "learning_rate": 9.40410839015912e-06, "loss": 0.5638, "step": 1333 }, { "epoch": 0.9690145960547523, "grad_norm": 0.47474607904645816, "learning_rate": 9.40297765928369e-06, "loss": 0.5808, "step": 1334 }, { "epoch": 0.9697409938029192, "grad_norm": 0.4404458653043789, "learning_rate": 9.401845924730197e-06, "loss": 0.6007, "step": 1335 }, { "epoch": 0.9704673915510862, "grad_norm": 0.41381749150233, "learning_rate": 9.400713186756625e-06, "loss": 0.5756, "step": 1336 }, { "epoch": 0.9711937892992532, "grad_norm": 0.40002352121048046, "learning_rate": 9.399579445621187e-06, "loss": 0.5845, "step": 1337 }, { "epoch": 0.9719201870474201, "grad_norm": 0.3819643412059154, "learning_rate": 9.398444701582325e-06, "loss": 0.5871, "step": 1338 }, { "epoch": 0.9726465847955872, "grad_norm": 0.4096279784042784, "learning_rate": 9.397308954898708e-06, "loss": 0.5602, "step": 1339 }, { "epoch": 0.9733729825437542, "grad_norm": 0.41853453487079706, "learning_rate": 9.396172205829235e-06, "loss": 0.5776, "step": 1340 }, { "epoch": 0.9740993802919211, "grad_norm": 0.40740299719446227, "learning_rate": 9.395034454633032e-06, "loss": 0.5805, "step": 1341 }, { "epoch": 0.9748257780400881, "grad_norm": 0.5010130777863815, "learning_rate": 9.393895701569455e-06, "loss": 0.5824, "step": 1342 }, { "epoch": 0.975552175788255, "grad_norm": 0.423630646161036, "learning_rate": 9.392755946898087e-06, "loss": 0.5933, "step": 1343 }, { "epoch": 0.976278573536422, "grad_norm": 0.5460848532649165, "learning_rate": 9.391615190878742e-06, "loss": 0.5716, "step": 1344 }, { "epoch": 0.977004971284589, "grad_norm": 0.4314740817665518, "learning_rate": 9.39047343377146e-06, "loss": 0.5858, "step": 1345 }, { "epoch": 0.977731369032756, "grad_norm": 0.4172447068705145, "learning_rate": 9.389330675836508e-06, "loss": 0.5855, "step": 1346 }, { "epoch": 0.978457766780923, "grad_norm": 0.40537926860470647, "learning_rate": 9.388186917334382e-06, "loss": 0.5806, "step": 1347 }, { "epoch": 0.97918416452909, "grad_norm": 0.42272881390227746, "learning_rate": 9.38704215852581e-06, "loss": 0.5833, "step": 1348 }, { "epoch": 0.9799105622772569, "grad_norm": 0.4071530915037966, "learning_rate": 9.38589639967174e-06, "loss": 0.5868, "step": 1349 }, { "epoch": 0.9806369600254239, "grad_norm": 0.4174487525760431, "learning_rate": 9.384749641033358e-06, "loss": 0.592, "step": 1350 }, { "epoch": 0.9813633577735908, "grad_norm": 0.5016160558691094, "learning_rate": 9.38360188287207e-06, "loss": 0.5688, "step": 1351 }, { "epoch": 0.9820897555217579, "grad_norm": 0.3709738274098191, "learning_rate": 9.382453125449513e-06, "loss": 0.5797, "step": 1352 }, { "epoch": 0.9828161532699249, "grad_norm": 0.4792601165573076, "learning_rate": 9.381303369027552e-06, "loss": 0.5799, "step": 1353 }, { "epoch": 0.9835425510180918, "grad_norm": 0.3842929548512234, "learning_rate": 9.380152613868276e-06, "loss": 0.5867, "step": 1354 }, { "epoch": 0.9842689487662588, "grad_norm": 0.443529556661457, "learning_rate": 9.37900086023401e-06, "loss": 0.573, "step": 1355 }, { "epoch": 0.9849953465144258, "grad_norm": 0.40568726593321786, "learning_rate": 9.377848108387295e-06, "loss": 0.5779, "step": 1356 }, { "epoch": 0.9857217442625927, "grad_norm": 0.4435967514139609, "learning_rate": 9.37669435859091e-06, "loss": 0.5798, "step": 1357 }, { "epoch": 0.9864481420107598, "grad_norm": 0.4245008371251557, "learning_rate": 9.375539611107856e-06, "loss": 0.5931, "step": 1358 }, { "epoch": 0.9871745397589268, "grad_norm": 0.4348440208427361, "learning_rate": 9.374383866201364e-06, "loss": 0.5866, "step": 1359 }, { "epoch": 0.9879009375070937, "grad_norm": 0.5474657163639375, "learning_rate": 9.373227124134888e-06, "loss": 0.5747, "step": 1360 }, { "epoch": 0.9886273352552607, "grad_norm": 0.46556069978991227, "learning_rate": 9.372069385172115e-06, "loss": 0.5951, "step": 1361 }, { "epoch": 0.9893537330034277, "grad_norm": 0.4871044110501798, "learning_rate": 9.37091064957696e-06, "loss": 0.5887, "step": 1362 }, { "epoch": 0.9900801307515946, "grad_norm": 0.4259323411994358, "learning_rate": 9.369750917613554e-06, "loss": 0.5907, "step": 1363 }, { "epoch": 0.9908065284997617, "grad_norm": 0.4100962064140313, "learning_rate": 9.368590189546268e-06, "loss": 0.5905, "step": 1364 }, { "epoch": 0.9915329262479287, "grad_norm": 0.4598716942351165, "learning_rate": 9.367428465639696e-06, "loss": 0.5791, "step": 1365 }, { "epoch": 0.9922593239960956, "grad_norm": 0.49479968191021007, "learning_rate": 9.366265746158653e-06, "loss": 0.5491, "step": 1366 }, { "epoch": 0.9929857217442626, "grad_norm": 0.40911147247756996, "learning_rate": 9.365102031368191e-06, "loss": 0.5765, "step": 1367 }, { "epoch": 0.9937121194924295, "grad_norm": 0.40613400942325284, "learning_rate": 9.363937321533583e-06, "loss": 0.5781, "step": 1368 }, { "epoch": 0.9944385172405965, "grad_norm": 0.41346069340622715, "learning_rate": 9.362771616920328e-06, "loss": 0.594, "step": 1369 }, { "epoch": 0.9951649149887636, "grad_norm": 0.42248849941999883, "learning_rate": 9.361604917794155e-06, "loss": 0.5784, "step": 1370 }, { "epoch": 0.9958913127369305, "grad_norm": 0.4395488497003654, "learning_rate": 9.360437224421017e-06, "loss": 0.5663, "step": 1371 }, { "epoch": 0.9966177104850975, "grad_norm": 0.48165754989833665, "learning_rate": 9.359268537067094e-06, "loss": 0.6047, "step": 1372 }, { "epoch": 0.9973441082332645, "grad_norm": 0.8462980679724643, "learning_rate": 9.358098855998797e-06, "loss": 0.5906, "step": 1373 }, { "epoch": 0.9980705059814314, "grad_norm": 0.44867266240084885, "learning_rate": 9.356928181482757e-06, "loss": 0.5807, "step": 1374 }, { "epoch": 0.9987969037295984, "grad_norm": 0.3922906494420764, "learning_rate": 9.355756513785834e-06, "loss": 0.5738, "step": 1375 }, { "epoch": 0.9995233014777655, "grad_norm": 0.3852398607524496, "learning_rate": 9.354583853175117e-06, "loss": 0.591, "step": 1376 }, { "epoch": 1.0002496992259324, "grad_norm": 0.44617123921430374, "learning_rate": 9.353410199917916e-06, "loss": 0.5784, "step": 1377 }, { "epoch": 1.0009760969740993, "grad_norm": 0.38482197267852375, "learning_rate": 9.352235554281775e-06, "loss": 0.5642, "step": 1378 }, { "epoch": 1.0017024947222664, "grad_norm": 0.4127525659520533, "learning_rate": 9.351059916534456e-06, "loss": 0.5659, "step": 1379 }, { "epoch": 1.0024288924704334, "grad_norm": 0.41330752590052344, "learning_rate": 9.349883286943951e-06, "loss": 0.5983, "step": 1380 }, { "epoch": 1.0031552902186003, "grad_norm": 0.40680416072814213, "learning_rate": 9.348705665778479e-06, "loss": 0.5733, "step": 1381 }, { "epoch": 1.0038816879667674, "grad_norm": 0.4128818362833564, "learning_rate": 9.347527053306482e-06, "loss": 0.5882, "step": 1382 }, { "epoch": 1.0046080857149342, "grad_norm": 0.4170411955512922, "learning_rate": 9.346347449796634e-06, "loss": 0.5874, "step": 1383 }, { "epoch": 1.0053344834631013, "grad_norm": 0.45301343977096553, "learning_rate": 9.345166855517827e-06, "loss": 0.5915, "step": 1384 }, { "epoch": 1.0060608812112684, "grad_norm": 0.4174938754667378, "learning_rate": 9.343985270739184e-06, "loss": 0.5699, "step": 1385 }, { "epoch": 1.0067872789594352, "grad_norm": 0.6243604998156592, "learning_rate": 9.34280269573005e-06, "loss": 0.561, "step": 1386 }, { "epoch": 1.0075136767076023, "grad_norm": 0.4128286014435526, "learning_rate": 9.341619130760004e-06, "loss": 0.5883, "step": 1387 }, { "epoch": 1.0082400744557691, "grad_norm": 0.4168211775574965, "learning_rate": 9.34043457609884e-06, "loss": 0.5734, "step": 1388 }, { "epoch": 1.0089664722039362, "grad_norm": 0.418443742472078, "learning_rate": 9.339249032016584e-06, "loss": 0.549, "step": 1389 }, { "epoch": 1.009692869952103, "grad_norm": 0.412506877347843, "learning_rate": 9.338062498783485e-06, "loss": 0.5777, "step": 1390 }, { "epoch": 1.0104192677002701, "grad_norm": 0.4288751680263293, "learning_rate": 9.336874976670018e-06, "loss": 0.5745, "step": 1391 }, { "epoch": 1.0111456654484372, "grad_norm": 0.48767089366316047, "learning_rate": 9.335686465946888e-06, "loss": 0.5835, "step": 1392 }, { "epoch": 1.011872063196604, "grad_norm": 0.400362746791716, "learning_rate": 9.334496966885017e-06, "loss": 0.5917, "step": 1393 }, { "epoch": 1.0125984609447711, "grad_norm": 0.37674500749905987, "learning_rate": 9.333306479755557e-06, "loss": 0.5714, "step": 1394 }, { "epoch": 1.013324858692938, "grad_norm": 0.43401884599608276, "learning_rate": 9.332115004829885e-06, "loss": 0.5727, "step": 1395 }, { "epoch": 1.014051256441105, "grad_norm": 0.4355295649860661, "learning_rate": 9.330922542379605e-06, "loss": 0.5588, "step": 1396 }, { "epoch": 1.014777654189272, "grad_norm": 0.45556005093229185, "learning_rate": 9.329729092676542e-06, "loss": 0.571, "step": 1397 }, { "epoch": 1.015504051937439, "grad_norm": 0.43547493346898447, "learning_rate": 9.328534655992747e-06, "loss": 0.577, "step": 1398 }, { "epoch": 1.016230449685606, "grad_norm": 0.39133492272444154, "learning_rate": 9.3273392326005e-06, "loss": 0.5845, "step": 1399 }, { "epoch": 1.016956847433773, "grad_norm": 0.38477807853847923, "learning_rate": 9.326142822772301e-06, "loss": 0.5687, "step": 1400 }, { "epoch": 1.01768324518194, "grad_norm": 0.4027839692661464, "learning_rate": 9.324945426780879e-06, "loss": 0.5788, "step": 1401 }, { "epoch": 1.0184096429301068, "grad_norm": 0.4215575711137664, "learning_rate": 9.323747044899184e-06, "loss": 0.5787, "step": 1402 }, { "epoch": 1.019136040678274, "grad_norm": 0.4829986330178923, "learning_rate": 9.322547677400391e-06, "loss": 0.5848, "step": 1403 }, { "epoch": 1.019862438426441, "grad_norm": 0.39452309057088686, "learning_rate": 9.321347324557904e-06, "loss": 0.5944, "step": 1404 }, { "epoch": 1.0205888361746078, "grad_norm": 0.4509442010918262, "learning_rate": 9.320145986645348e-06, "loss": 0.5865, "step": 1405 }, { "epoch": 1.021315233922775, "grad_norm": 0.4257629876201213, "learning_rate": 9.31894366393657e-06, "loss": 0.5713, "step": 1406 }, { "epoch": 1.0220416316709418, "grad_norm": 0.48116645828094656, "learning_rate": 9.317740356705649e-06, "loss": 0.6006, "step": 1407 }, { "epoch": 1.0227680294191088, "grad_norm": 0.5090569406001516, "learning_rate": 9.316536065226883e-06, "loss": 0.5891, "step": 1408 }, { "epoch": 1.0234944271672757, "grad_norm": 0.5625989892238936, "learning_rate": 9.315330789774795e-06, "loss": 0.5852, "step": 1409 }, { "epoch": 1.0242208249154428, "grad_norm": 1.4737104492769104, "learning_rate": 9.31412453062413e-06, "loss": 0.5987, "step": 1410 }, { "epoch": 1.0249472226636098, "grad_norm": 0.4422415845798024, "learning_rate": 9.312917288049868e-06, "loss": 0.5858, "step": 1411 }, { "epoch": 1.0256736204117767, "grad_norm": 0.3877553271798556, "learning_rate": 9.311709062327197e-06, "loss": 0.5753, "step": 1412 }, { "epoch": 1.0264000181599438, "grad_norm": 0.5253155430426784, "learning_rate": 9.31049985373154e-06, "loss": 0.5866, "step": 1413 }, { "epoch": 1.0271264159081106, "grad_norm": 0.3967005737441752, "learning_rate": 9.309289662538543e-06, "loss": 0.5605, "step": 1414 }, { "epoch": 1.0278528136562777, "grad_norm": 0.429486431143533, "learning_rate": 9.308078489024072e-06, "loss": 0.5609, "step": 1415 }, { "epoch": 1.0285792114044447, "grad_norm": 0.4650393072669976, "learning_rate": 9.306866333464223e-06, "loss": 0.5597, "step": 1416 }, { "epoch": 1.0293056091526116, "grad_norm": 0.41440859115144746, "learning_rate": 9.305653196135308e-06, "loss": 0.6004, "step": 1417 }, { "epoch": 1.0300320069007787, "grad_norm": 0.41388563070702816, "learning_rate": 9.30443907731387e-06, "loss": 0.5823, "step": 1418 }, { "epoch": 1.0307584046489455, "grad_norm": 0.43655904948926433, "learning_rate": 9.303223977276669e-06, "loss": 0.568, "step": 1419 }, { "epoch": 1.0314848023971126, "grad_norm": 0.3926843050634251, "learning_rate": 9.302007896300697e-06, "loss": 0.5635, "step": 1420 }, { "epoch": 1.0322112001452795, "grad_norm": 0.42957369993443373, "learning_rate": 9.300790834663163e-06, "loss": 0.5746, "step": 1421 }, { "epoch": 1.0329375978934465, "grad_norm": 0.4035745493692686, "learning_rate": 9.2995727926415e-06, "loss": 0.5862, "step": 1422 }, { "epoch": 1.0336639956416136, "grad_norm": 0.3920427008419327, "learning_rate": 9.298353770513368e-06, "loss": 0.5663, "step": 1423 }, { "epoch": 1.0343903933897804, "grad_norm": 0.46057274244672136, "learning_rate": 9.297133768556649e-06, "loss": 0.5812, "step": 1424 }, { "epoch": 1.0351167911379475, "grad_norm": 0.5253446644849149, "learning_rate": 9.295912787049447e-06, "loss": 0.5916, "step": 1425 }, { "epoch": 1.0358431888861144, "grad_norm": 0.4397820455055968, "learning_rate": 9.294690826270086e-06, "loss": 0.5773, "step": 1426 }, { "epoch": 1.0365695866342814, "grad_norm": 0.3794802433265421, "learning_rate": 9.293467886497123e-06, "loss": 0.5638, "step": 1427 }, { "epoch": 1.0372959843824485, "grad_norm": 0.39267603669754747, "learning_rate": 9.292243968009332e-06, "loss": 0.5825, "step": 1428 }, { "epoch": 1.0380223821306154, "grad_norm": 0.39438762623612705, "learning_rate": 9.291019071085707e-06, "loss": 0.5683, "step": 1429 }, { "epoch": 1.0387487798787824, "grad_norm": 0.4124722400201054, "learning_rate": 9.289793196005472e-06, "loss": 0.5887, "step": 1430 }, { "epoch": 1.0394751776269493, "grad_norm": 0.3710351913452333, "learning_rate": 9.288566343048069e-06, "loss": 0.5617, "step": 1431 }, { "epoch": 1.0402015753751164, "grad_norm": 0.3696840977628092, "learning_rate": 9.287338512493165e-06, "loss": 0.5713, "step": 1432 }, { "epoch": 1.0409279731232832, "grad_norm": 0.4711524354304988, "learning_rate": 9.286109704620648e-06, "loss": 0.5703, "step": 1433 }, { "epoch": 1.0416543708714503, "grad_norm": 0.40615010982081595, "learning_rate": 9.284879919710631e-06, "loss": 0.564, "step": 1434 }, { "epoch": 1.0423807686196174, "grad_norm": 0.41109169173341603, "learning_rate": 9.283649158043452e-06, "loss": 0.5732, "step": 1435 }, { "epoch": 1.0431071663677842, "grad_norm": 0.44915827468482616, "learning_rate": 9.282417419899664e-06, "loss": 0.592, "step": 1436 }, { "epoch": 1.0438335641159513, "grad_norm": 0.36242059051621556, "learning_rate": 9.281184705560049e-06, "loss": 0.575, "step": 1437 }, { "epoch": 1.0445599618641181, "grad_norm": 0.3951607612016366, "learning_rate": 9.279951015305611e-06, "loss": 0.5629, "step": 1438 }, { "epoch": 1.0452863596122852, "grad_norm": 0.42649878376009126, "learning_rate": 9.27871634941757e-06, "loss": 0.5697, "step": 1439 }, { "epoch": 1.0460127573604523, "grad_norm": 0.5203166547135417, "learning_rate": 9.27748070817738e-06, "loss": 0.5837, "step": 1440 }, { "epoch": 1.0467391551086191, "grad_norm": 0.3776646098453339, "learning_rate": 9.276244091866706e-06, "loss": 0.5588, "step": 1441 }, { "epoch": 1.0474655528567862, "grad_norm": 0.44599908364573615, "learning_rate": 9.275006500767444e-06, "loss": 0.5693, "step": 1442 }, { "epoch": 1.048191950604953, "grad_norm": 0.38020056144781483, "learning_rate": 9.273767935161705e-06, "loss": 0.5741, "step": 1443 }, { "epoch": 1.0489183483531201, "grad_norm": 0.4268767302486434, "learning_rate": 9.272528395331826e-06, "loss": 0.5732, "step": 1444 }, { "epoch": 1.049644746101287, "grad_norm": 0.40829365584244665, "learning_rate": 9.271287881560368e-06, "loss": 0.5584, "step": 1445 }, { "epoch": 1.050371143849454, "grad_norm": 0.39136192636878153, "learning_rate": 9.27004639413011e-06, "loss": 0.5784, "step": 1446 }, { "epoch": 1.0510975415976211, "grad_norm": 0.39740978287858214, "learning_rate": 9.268803933324053e-06, "loss": 0.5862, "step": 1447 }, { "epoch": 1.051823939345788, "grad_norm": 0.42772270593881806, "learning_rate": 9.267560499425425e-06, "loss": 0.5712, "step": 1448 }, { "epoch": 1.052550337093955, "grad_norm": 0.4015339188340092, "learning_rate": 9.266316092717666e-06, "loss": 0.5804, "step": 1449 }, { "epoch": 1.053276734842122, "grad_norm": 0.4721614746960808, "learning_rate": 9.265070713484452e-06, "loss": 0.5752, "step": 1450 }, { "epoch": 1.054003132590289, "grad_norm": 0.46272828478575495, "learning_rate": 9.263824362009667e-06, "loss": 0.5725, "step": 1451 }, { "epoch": 1.0547295303384558, "grad_norm": 0.43524012594081773, "learning_rate": 9.262577038577423e-06, "loss": 0.5745, "step": 1452 }, { "epoch": 1.055455928086623, "grad_norm": 0.3697781664579044, "learning_rate": 9.261328743472055e-06, "loss": 0.5661, "step": 1453 }, { "epoch": 1.05618232583479, "grad_norm": 0.44617814494960756, "learning_rate": 9.260079476978117e-06, "loss": 0.5852, "step": 1454 }, { "epoch": 1.0569087235829568, "grad_norm": 0.4019452947154167, "learning_rate": 9.25882923938038e-06, "loss": 0.5939, "step": 1455 }, { "epoch": 1.057635121331124, "grad_norm": 0.4191763655875106, "learning_rate": 9.25757803096385e-06, "loss": 0.5748, "step": 1456 }, { "epoch": 1.0583615190792908, "grad_norm": 0.4126101858730787, "learning_rate": 9.256325852013736e-06, "loss": 0.5835, "step": 1457 }, { "epoch": 1.0590879168274578, "grad_norm": 0.4339550235322551, "learning_rate": 9.255072702815487e-06, "loss": 0.5872, "step": 1458 }, { "epoch": 1.059814314575625, "grad_norm": 0.388417450620657, "learning_rate": 9.253818583654754e-06, "loss": 0.5659, "step": 1459 }, { "epoch": 1.0605407123237918, "grad_norm": 0.4540043570449997, "learning_rate": 9.252563494817426e-06, "loss": 0.5756, "step": 1460 }, { "epoch": 1.0612671100719588, "grad_norm": 0.38753097423844807, "learning_rate": 9.251307436589605e-06, "loss": 0.5716, "step": 1461 }, { "epoch": 1.0619935078201257, "grad_norm": 0.37777144670282375, "learning_rate": 9.250050409257612e-06, "loss": 0.5586, "step": 1462 }, { "epoch": 1.0627199055682928, "grad_norm": 0.445355054898785, "learning_rate": 9.248792413107994e-06, "loss": 0.5707, "step": 1463 }, { "epoch": 1.0634463033164596, "grad_norm": 0.43309925235084556, "learning_rate": 9.247533448427519e-06, "loss": 0.5857, "step": 1464 }, { "epoch": 1.0641727010646267, "grad_norm": 0.4464319853779842, "learning_rate": 9.246273515503169e-06, "loss": 0.5802, "step": 1465 }, { "epoch": 1.0648990988127938, "grad_norm": 0.3924050059125471, "learning_rate": 9.245012614622153e-06, "loss": 0.5706, "step": 1466 }, { "epoch": 1.0656254965609606, "grad_norm": 0.40147856573132024, "learning_rate": 9.2437507460719e-06, "loss": 0.5706, "step": 1467 }, { "epoch": 1.0663518943091277, "grad_norm": 0.4001433161155672, "learning_rate": 9.242487910140057e-06, "loss": 0.5548, "step": 1468 }, { "epoch": 1.0670782920572945, "grad_norm": 0.4079874843159747, "learning_rate": 9.241224107114495e-06, "loss": 0.5635, "step": 1469 }, { "epoch": 1.0678046898054616, "grad_norm": 0.4560119919834794, "learning_rate": 9.239959337283301e-06, "loss": 0.5723, "step": 1470 }, { "epoch": 1.0685310875536285, "grad_norm": 0.4407800440761631, "learning_rate": 9.238693600934785e-06, "loss": 0.5775, "step": 1471 }, { "epoch": 1.0692574853017955, "grad_norm": 0.4840882494667328, "learning_rate": 9.23742689835748e-06, "loss": 0.5754, "step": 1472 }, { "epoch": 1.0699838830499626, "grad_norm": 0.37810093243018317, "learning_rate": 9.236159229840133e-06, "loss": 0.5566, "step": 1473 }, { "epoch": 1.0707102807981295, "grad_norm": 0.3863492962548962, "learning_rate": 9.234890595671717e-06, "loss": 0.565, "step": 1474 }, { "epoch": 1.0714366785462965, "grad_norm": 0.7615309489873513, "learning_rate": 9.233620996141421e-06, "loss": 0.5598, "step": 1475 }, { "epoch": 1.0721630762944634, "grad_norm": 0.3976142210474501, "learning_rate": 9.232350431538656e-06, "loss": 0.568, "step": 1476 }, { "epoch": 1.0728894740426305, "grad_norm": 0.3850052436582411, "learning_rate": 9.231078902153058e-06, "loss": 0.5587, "step": 1477 }, { "epoch": 1.0736158717907975, "grad_norm": 0.7063675394401735, "learning_rate": 9.229806408274467e-06, "loss": 0.5527, "step": 1478 }, { "epoch": 1.0743422695389644, "grad_norm": 0.4258570084193788, "learning_rate": 9.228532950192964e-06, "loss": 0.5688, "step": 1479 }, { "epoch": 1.0750686672871315, "grad_norm": 0.41356428724432126, "learning_rate": 9.227258528198832e-06, "loss": 0.5764, "step": 1480 }, { "epoch": 1.0757950650352983, "grad_norm": 0.3686752782723383, "learning_rate": 9.225983142582584e-06, "loss": 0.5571, "step": 1481 }, { "epoch": 1.0765214627834654, "grad_norm": 0.38160042743844014, "learning_rate": 9.224706793634951e-06, "loss": 0.5604, "step": 1482 }, { "epoch": 1.0772478605316325, "grad_norm": 0.3707774059356207, "learning_rate": 9.223429481646881e-06, "loss": 0.581, "step": 1483 }, { "epoch": 1.0779742582797993, "grad_norm": 0.3695515047035739, "learning_rate": 9.222151206909541e-06, "loss": 0.5498, "step": 1484 }, { "epoch": 1.0787006560279664, "grad_norm": 0.3831609762559241, "learning_rate": 9.220871969714325e-06, "loss": 0.5715, "step": 1485 }, { "epoch": 1.0794270537761332, "grad_norm": 0.36103940460289247, "learning_rate": 9.219591770352836e-06, "loss": 0.5713, "step": 1486 }, { "epoch": 1.0801534515243003, "grad_norm": 0.4427569819582544, "learning_rate": 9.2183106091169e-06, "loss": 0.5873, "step": 1487 }, { "epoch": 1.0808798492724672, "grad_norm": 0.45995302001148347, "learning_rate": 9.217028486298567e-06, "loss": 0.5714, "step": 1488 }, { "epoch": 1.0816062470206342, "grad_norm": 0.4569865672570247, "learning_rate": 9.2157454021901e-06, "loss": 0.5734, "step": 1489 }, { "epoch": 1.0823326447688013, "grad_norm": 0.4061969443898093, "learning_rate": 9.214461357083986e-06, "loss": 0.5611, "step": 1490 }, { "epoch": 1.0830590425169682, "grad_norm": 0.36787594074816127, "learning_rate": 9.213176351272926e-06, "loss": 0.5771, "step": 1491 }, { "epoch": 1.0837854402651352, "grad_norm": 0.5034943262793765, "learning_rate": 9.211890385049845e-06, "loss": 0.5707, "step": 1492 }, { "epoch": 1.084511838013302, "grad_norm": 0.38467957622459686, "learning_rate": 9.210603458707883e-06, "loss": 0.5697, "step": 1493 }, { "epoch": 1.0852382357614692, "grad_norm": 0.42418759374955084, "learning_rate": 9.209315572540402e-06, "loss": 0.5625, "step": 1494 }, { "epoch": 1.085964633509636, "grad_norm": 0.3998370227866447, "learning_rate": 9.20802672684098e-06, "loss": 0.5813, "step": 1495 }, { "epoch": 1.086691031257803, "grad_norm": 0.42451732990267654, "learning_rate": 9.206736921903414e-06, "loss": 0.5759, "step": 1496 }, { "epoch": 1.0874174290059702, "grad_norm": 0.5284676685659139, "learning_rate": 9.205446158021725e-06, "loss": 0.5638, "step": 1497 }, { "epoch": 1.088143826754137, "grad_norm": 0.4213863049015563, "learning_rate": 9.204154435490143e-06, "loss": 0.5794, "step": 1498 }, { "epoch": 1.088870224502304, "grad_norm": 0.4080295627251175, "learning_rate": 9.202861754603126e-06, "loss": 0.5751, "step": 1499 }, { "epoch": 1.089596622250471, "grad_norm": 0.39291554245061283, "learning_rate": 9.201568115655343e-06, "loss": 0.556, "step": 1500 }, { "epoch": 1.090323019998638, "grad_norm": 0.3740570835089451, "learning_rate": 9.200273518941688e-06, "loss": 0.5753, "step": 1501 }, { "epoch": 1.091049417746805, "grad_norm": 0.407499502900159, "learning_rate": 9.198977964757266e-06, "loss": 0.5729, "step": 1502 }, { "epoch": 1.091775815494972, "grad_norm": 0.3719259405007259, "learning_rate": 9.197681453397407e-06, "loss": 0.5627, "step": 1503 }, { "epoch": 1.092502213243139, "grad_norm": 0.4068817249799283, "learning_rate": 9.196383985157657e-06, "loss": 0.5682, "step": 1504 }, { "epoch": 1.0932286109913059, "grad_norm": 0.3822730777415677, "learning_rate": 9.195085560333777e-06, "loss": 0.5613, "step": 1505 }, { "epoch": 1.093955008739473, "grad_norm": 0.509101461961365, "learning_rate": 9.193786179221751e-06, "loss": 0.5821, "step": 1506 }, { "epoch": 1.0946814064876398, "grad_norm": 0.36195978677680385, "learning_rate": 9.192485842117777e-06, "loss": 0.5572, "step": 1507 }, { "epoch": 1.0954078042358069, "grad_norm": 0.43123149253343235, "learning_rate": 9.191184549318275e-06, "loss": 0.5676, "step": 1508 }, { "epoch": 1.096134201983974, "grad_norm": 0.39497568720609605, "learning_rate": 9.189882301119878e-06, "loss": 0.5662, "step": 1509 }, { "epoch": 1.0968605997321408, "grad_norm": 0.392750640761948, "learning_rate": 9.188579097819439e-06, "loss": 0.5673, "step": 1510 }, { "epoch": 1.0975869974803079, "grad_norm": 0.39765453193046085, "learning_rate": 9.18727493971403e-06, "loss": 0.5867, "step": 1511 }, { "epoch": 1.0983133952284747, "grad_norm": 0.4802452358360034, "learning_rate": 9.18596982710094e-06, "loss": 0.5591, "step": 1512 }, { "epoch": 1.0990397929766418, "grad_norm": 0.3956359994045311, "learning_rate": 9.184663760277674e-06, "loss": 0.5708, "step": 1513 }, { "epoch": 1.0997661907248089, "grad_norm": 0.4135810979273268, "learning_rate": 9.183356739541958e-06, "loss": 0.5706, "step": 1514 }, { "epoch": 1.1004925884729757, "grad_norm": 0.38969981003076315, "learning_rate": 9.182048765191729e-06, "loss": 0.5751, "step": 1515 }, { "epoch": 1.1012189862211428, "grad_norm": 0.4182089910086, "learning_rate": 9.180739837525148e-06, "loss": 0.5545, "step": 1516 }, { "epoch": 1.1019453839693096, "grad_norm": 0.42615730462223866, "learning_rate": 9.17942995684059e-06, "loss": 0.5654, "step": 1517 }, { "epoch": 1.1026717817174767, "grad_norm": 0.38455182758076384, "learning_rate": 9.178119123436651e-06, "loss": 0.576, "step": 1518 }, { "epoch": 1.1033981794656436, "grad_norm": 0.4081208032550898, "learning_rate": 9.176807337612136e-06, "loss": 0.5673, "step": 1519 }, { "epoch": 1.1041245772138106, "grad_norm": 0.43376128159499927, "learning_rate": 9.175494599666078e-06, "loss": 0.5721, "step": 1520 }, { "epoch": 1.1048509749619777, "grad_norm": 0.40332872980030554, "learning_rate": 9.174180909897715e-06, "loss": 0.5687, "step": 1521 }, { "epoch": 1.1055773727101446, "grad_norm": 0.38223586760682, "learning_rate": 9.172866268606514e-06, "loss": 0.5702, "step": 1522 }, { "epoch": 1.1063037704583116, "grad_norm": 0.40924832268015005, "learning_rate": 9.171550676092151e-06, "loss": 0.5802, "step": 1523 }, { "epoch": 1.1070301682064785, "grad_norm": 0.4228837477832897, "learning_rate": 9.170234132654521e-06, "loss": 0.5779, "step": 1524 }, { "epoch": 1.1077565659546456, "grad_norm": 0.43068444275154966, "learning_rate": 9.168916638593736e-06, "loss": 0.5733, "step": 1525 }, { "epoch": 1.1084829637028126, "grad_norm": 0.4368955020773527, "learning_rate": 9.167598194210124e-06, "loss": 0.5637, "step": 1526 }, { "epoch": 1.1092093614509795, "grad_norm": 0.46106898550934694, "learning_rate": 9.166278799804232e-06, "loss": 0.5621, "step": 1527 }, { "epoch": 1.1099357591991466, "grad_norm": 0.4998543301741803, "learning_rate": 9.164958455676818e-06, "loss": 0.5738, "step": 1528 }, { "epoch": 1.1106621569473134, "grad_norm": 0.3770729245790208, "learning_rate": 9.163637162128865e-06, "loss": 0.5618, "step": 1529 }, { "epoch": 1.1113885546954805, "grad_norm": 0.49041521965602436, "learning_rate": 9.162314919461566e-06, "loss": 0.5533, "step": 1530 }, { "epoch": 1.1121149524436473, "grad_norm": 0.3900108576009805, "learning_rate": 9.16099172797633e-06, "loss": 0.5529, "step": 1531 }, { "epoch": 1.1128413501918144, "grad_norm": 0.37032433949378185, "learning_rate": 9.159667587974786e-06, "loss": 0.5696, "step": 1532 }, { "epoch": 1.1135677479399815, "grad_norm": 0.42684873849663624, "learning_rate": 9.158342499758777e-06, "loss": 0.5434, "step": 1533 }, { "epoch": 1.1142941456881483, "grad_norm": 0.43643707876206966, "learning_rate": 9.157016463630363e-06, "loss": 0.5706, "step": 1534 }, { "epoch": 1.1150205434363154, "grad_norm": 0.4454221836123075, "learning_rate": 9.15568947989182e-06, "loss": 0.5708, "step": 1535 }, { "epoch": 1.1157469411844823, "grad_norm": 0.38248652147035744, "learning_rate": 9.154361548845639e-06, "loss": 0.5571, "step": 1536 }, { "epoch": 1.1164733389326493, "grad_norm": 0.39988857054600924, "learning_rate": 9.153032670794527e-06, "loss": 0.5705, "step": 1537 }, { "epoch": 1.1171997366808162, "grad_norm": 0.4055927377484529, "learning_rate": 9.15170284604141e-06, "loss": 0.5589, "step": 1538 }, { "epoch": 1.1179261344289833, "grad_norm": 0.6645482334421663, "learning_rate": 9.150372074889427e-06, "loss": 0.5665, "step": 1539 }, { "epoch": 1.1186525321771503, "grad_norm": 0.37649582235674955, "learning_rate": 9.14904035764193e-06, "loss": 0.5655, "step": 1540 }, { "epoch": 1.1193789299253172, "grad_norm": 0.456135150501283, "learning_rate": 9.147707694602492e-06, "loss": 0.575, "step": 1541 }, { "epoch": 1.1201053276734843, "grad_norm": 0.4340819757352092, "learning_rate": 9.1463740860749e-06, "loss": 0.5541, "step": 1542 }, { "epoch": 1.120831725421651, "grad_norm": 0.5154115608102522, "learning_rate": 9.145039532363156e-06, "loss": 0.5728, "step": 1543 }, { "epoch": 1.1215581231698182, "grad_norm": 0.415248125939544, "learning_rate": 9.143704033771476e-06, "loss": 0.573, "step": 1544 }, { "epoch": 1.1222845209179853, "grad_norm": 0.47571569465415514, "learning_rate": 9.142367590604294e-06, "loss": 0.5724, "step": 1545 }, { "epoch": 1.123010918666152, "grad_norm": 0.4328702166226174, "learning_rate": 9.141030203166256e-06, "loss": 0.5881, "step": 1546 }, { "epoch": 1.1237373164143192, "grad_norm": 0.4029536592492071, "learning_rate": 9.139691871762229e-06, "loss": 0.5569, "step": 1547 }, { "epoch": 1.124463714162486, "grad_norm": 0.4467378583250728, "learning_rate": 9.138352596697287e-06, "loss": 0.5776, "step": 1548 }, { "epoch": 1.125190111910653, "grad_norm": 0.41357552315372287, "learning_rate": 9.137012378276729e-06, "loss": 0.552, "step": 1549 }, { "epoch": 1.1259165096588202, "grad_norm": 0.3961970064201352, "learning_rate": 9.135671216806057e-06, "loss": 0.542, "step": 1550 }, { "epoch": 1.126642907406987, "grad_norm": 0.3799347314928259, "learning_rate": 9.134329112591e-06, "loss": 0.5783, "step": 1551 }, { "epoch": 1.127369305155154, "grad_norm": 0.4409962185466233, "learning_rate": 9.132986065937495e-06, "loss": 0.5833, "step": 1552 }, { "epoch": 1.128095702903321, "grad_norm": 0.4199431422304019, "learning_rate": 9.131642077151695e-06, "loss": 0.5544, "step": 1553 }, { "epoch": 1.128822100651488, "grad_norm": 0.4359092717576693, "learning_rate": 9.130297146539967e-06, "loss": 0.5601, "step": 1554 }, { "epoch": 1.1295484983996549, "grad_norm": 0.4386918332276292, "learning_rate": 9.128951274408898e-06, "loss": 0.5779, "step": 1555 }, { "epoch": 1.130274896147822, "grad_norm": 0.43388198361205227, "learning_rate": 9.12760446106528e-06, "loss": 0.5601, "step": 1556 }, { "epoch": 1.1310012938959888, "grad_norm": 0.4020892074156709, "learning_rate": 9.126256706816129e-06, "loss": 0.558, "step": 1557 }, { "epoch": 1.1317276916441559, "grad_norm": 0.3612712750947298, "learning_rate": 9.124908011968667e-06, "loss": 0.5615, "step": 1558 }, { "epoch": 1.132454089392323, "grad_norm": 0.4384818833438229, "learning_rate": 9.123558376830342e-06, "loss": 0.5683, "step": 1559 }, { "epoch": 1.1331804871404898, "grad_norm": 0.4018494187243776, "learning_rate": 9.122207801708802e-06, "loss": 0.5727, "step": 1560 }, { "epoch": 1.1339068848886569, "grad_norm": 0.40446557479401735, "learning_rate": 9.120856286911922e-06, "loss": 0.5686, "step": 1561 }, { "epoch": 1.1346332826368237, "grad_norm": 0.7222820279604052, "learning_rate": 9.119503832747782e-06, "loss": 0.5595, "step": 1562 }, { "epoch": 1.1353596803849908, "grad_norm": 0.4118173443641317, "learning_rate": 9.118150439524682e-06, "loss": 0.5591, "step": 1563 }, { "epoch": 1.1360860781331579, "grad_norm": 0.39203921438913475, "learning_rate": 9.116796107551134e-06, "loss": 0.5664, "step": 1564 }, { "epoch": 1.1368124758813247, "grad_norm": 0.44213420522319913, "learning_rate": 9.115440837135862e-06, "loss": 0.5497, "step": 1565 }, { "epoch": 1.1375388736294918, "grad_norm": 0.3915843971752154, "learning_rate": 9.114084628587806e-06, "loss": 0.5743, "step": 1566 }, { "epoch": 1.1382652713776586, "grad_norm": 0.37457500840606167, "learning_rate": 9.112727482216123e-06, "loss": 0.5645, "step": 1567 }, { "epoch": 1.1389916691258257, "grad_norm": 0.39111251152334303, "learning_rate": 9.111369398330177e-06, "loss": 0.5793, "step": 1568 }, { "epoch": 1.1397180668739928, "grad_norm": 0.5449219416546525, "learning_rate": 9.110010377239552e-06, "loss": 0.582, "step": 1569 }, { "epoch": 1.1404444646221596, "grad_norm": 0.49691466965603404, "learning_rate": 9.108650419254037e-06, "loss": 0.5646, "step": 1570 }, { "epoch": 1.1411708623703267, "grad_norm": 0.40646649401243895, "learning_rate": 9.107289524683648e-06, "loss": 0.5645, "step": 1571 }, { "epoch": 1.1418972601184936, "grad_norm": 0.39891945566604176, "learning_rate": 9.105927693838601e-06, "loss": 0.5642, "step": 1572 }, { "epoch": 1.1426236578666606, "grad_norm": 0.4048822149119588, "learning_rate": 9.104564927029337e-06, "loss": 0.5652, "step": 1573 }, { "epoch": 1.1433500556148275, "grad_norm": 0.45448169948196127, "learning_rate": 9.103201224566499e-06, "loss": 0.5704, "step": 1574 }, { "epoch": 1.1440764533629946, "grad_norm": 0.3903073163448049, "learning_rate": 9.101836586760951e-06, "loss": 0.5751, "step": 1575 }, { "epoch": 1.1448028511111616, "grad_norm": 0.5097478191145266, "learning_rate": 9.10047101392377e-06, "loss": 0.5623, "step": 1576 }, { "epoch": 1.1455292488593285, "grad_norm": 0.5564016875040064, "learning_rate": 9.099104506366242e-06, "loss": 0.5735, "step": 1577 }, { "epoch": 1.1462556466074956, "grad_norm": 0.45792127134281246, "learning_rate": 9.09773706439987e-06, "loss": 0.562, "step": 1578 }, { "epoch": 1.1469820443556624, "grad_norm": 0.40573516591270037, "learning_rate": 9.096368688336365e-06, "loss": 0.5712, "step": 1579 }, { "epoch": 1.1477084421038295, "grad_norm": 0.3746216254001378, "learning_rate": 9.094999378487659e-06, "loss": 0.565, "step": 1580 }, { "epoch": 1.1484348398519963, "grad_norm": 0.4132489165843512, "learning_rate": 9.09362913516589e-06, "loss": 0.5611, "step": 1581 }, { "epoch": 1.1491612376001634, "grad_norm": 0.4327030952311928, "learning_rate": 9.092257958683411e-06, "loss": 0.572, "step": 1582 }, { "epoch": 1.1498876353483305, "grad_norm": 0.5657636997480442, "learning_rate": 9.090885849352788e-06, "loss": 0.5725, "step": 1583 }, { "epoch": 1.1506140330964973, "grad_norm": 0.43552805930495025, "learning_rate": 9.0895128074868e-06, "loss": 0.5722, "step": 1584 }, { "epoch": 1.1513404308446644, "grad_norm": 0.4021861507833854, "learning_rate": 9.088138833398435e-06, "loss": 0.5773, "step": 1585 }, { "epoch": 1.1520668285928313, "grad_norm": 0.4019858854361604, "learning_rate": 9.086763927400898e-06, "loss": 0.538, "step": 1586 }, { "epoch": 1.1527932263409983, "grad_norm": 0.5661378384808389, "learning_rate": 9.085388089807607e-06, "loss": 0.5519, "step": 1587 }, { "epoch": 1.1535196240891654, "grad_norm": 0.3744204610801191, "learning_rate": 9.08401132093219e-06, "loss": 0.5452, "step": 1588 }, { "epoch": 1.1542460218373323, "grad_norm": 0.6743822486879415, "learning_rate": 9.082633621088483e-06, "loss": 0.5664, "step": 1589 }, { "epoch": 1.1549724195854993, "grad_norm": 0.4620844862314671, "learning_rate": 9.081254990590542e-06, "loss": 0.5443, "step": 1590 }, { "epoch": 1.1556988173336662, "grad_norm": 0.35175578314298, "learning_rate": 9.079875429752633e-06, "loss": 0.5412, "step": 1591 }, { "epoch": 1.1564252150818333, "grad_norm": 0.391580626589242, "learning_rate": 9.07849493888923e-06, "loss": 0.5537, "step": 1592 }, { "epoch": 1.1571516128300003, "grad_norm": 0.4529476753979372, "learning_rate": 9.077113518315024e-06, "loss": 0.5604, "step": 1593 }, { "epoch": 1.1578780105781672, "grad_norm": 0.4550412254149466, "learning_rate": 9.075731168344917e-06, "loss": 0.5618, "step": 1594 }, { "epoch": 1.1586044083263343, "grad_norm": 0.3809100704198691, "learning_rate": 9.074347889294017e-06, "loss": 0.5896, "step": 1595 }, { "epoch": 1.1593308060745011, "grad_norm": 0.3905562748653443, "learning_rate": 9.072963681477654e-06, "loss": 0.5726, "step": 1596 }, { "epoch": 1.1600572038226682, "grad_norm": 0.4091043456983746, "learning_rate": 9.071578545211362e-06, "loss": 0.5696, "step": 1597 }, { "epoch": 1.160783601570835, "grad_norm": 0.4839681586665417, "learning_rate": 9.070192480810888e-06, "loss": 0.5621, "step": 1598 }, { "epoch": 1.1615099993190021, "grad_norm": 0.40196070880566054, "learning_rate": 9.068805488592191e-06, "loss": 0.5779, "step": 1599 }, { "epoch": 1.162236397067169, "grad_norm": 0.3918863091270513, "learning_rate": 9.067417568871444e-06, "loss": 0.5785, "step": 1600 }, { "epoch": 1.162962794815336, "grad_norm": 0.36932354291748165, "learning_rate": 9.06602872196503e-06, "loss": 0.5688, "step": 1601 }, { "epoch": 1.1636891925635031, "grad_norm": 0.511494494122197, "learning_rate": 9.064638948189539e-06, "loss": 0.5554, "step": 1602 }, { "epoch": 1.16441559031167, "grad_norm": 0.37895248932231274, "learning_rate": 9.06324824786178e-06, "loss": 0.5575, "step": 1603 }, { "epoch": 1.165141988059837, "grad_norm": 0.3832823773236108, "learning_rate": 9.061856621298767e-06, "loss": 0.5654, "step": 1604 }, { "epoch": 1.165868385808004, "grad_norm": 0.43465330879731395, "learning_rate": 9.060464068817728e-06, "loss": 0.5738, "step": 1605 }, { "epoch": 1.166594783556171, "grad_norm": 0.5551169241788557, "learning_rate": 9.059070590736101e-06, "loss": 0.5613, "step": 1606 }, { "epoch": 1.167321181304338, "grad_norm": 0.5780876697750443, "learning_rate": 9.057676187371536e-06, "loss": 0.5462, "step": 1607 }, { "epoch": 1.168047579052505, "grad_norm": 0.38976775310867845, "learning_rate": 9.056280859041893e-06, "loss": 0.5602, "step": 1608 }, { "epoch": 1.168773976800672, "grad_norm": 0.4137511184661419, "learning_rate": 9.054884606065243e-06, "loss": 0.5359, "step": 1609 }, { "epoch": 1.1695003745488388, "grad_norm": 0.4545114979291193, "learning_rate": 9.053487428759869e-06, "loss": 0.5566, "step": 1610 }, { "epoch": 1.170226772297006, "grad_norm": 0.40448803287087315, "learning_rate": 9.052089327444263e-06, "loss": 0.5613, "step": 1611 }, { "epoch": 1.170953170045173, "grad_norm": 0.4204853059368482, "learning_rate": 9.050690302437128e-06, "loss": 0.5506, "step": 1612 }, { "epoch": 1.1716795677933398, "grad_norm": 0.41937616591275495, "learning_rate": 9.049290354057379e-06, "loss": 0.5584, "step": 1613 }, { "epoch": 1.172405965541507, "grad_norm": 0.3812136298560557, "learning_rate": 9.047889482624139e-06, "loss": 0.5502, "step": 1614 }, { "epoch": 1.1731323632896737, "grad_norm": 0.42863197566192074, "learning_rate": 9.046487688456745e-06, "loss": 0.5717, "step": 1615 }, { "epoch": 1.1738587610378408, "grad_norm": 0.4416572927164445, "learning_rate": 9.045084971874738e-06, "loss": 0.5629, "step": 1616 }, { "epoch": 1.1745851587860079, "grad_norm": 0.4318909760541322, "learning_rate": 9.043681333197878e-06, "loss": 0.5616, "step": 1617 }, { "epoch": 1.1753115565341747, "grad_norm": 0.4913993162820547, "learning_rate": 9.042276772746127e-06, "loss": 0.5555, "step": 1618 }, { "epoch": 1.1760379542823418, "grad_norm": 0.38866225966344226, "learning_rate": 9.040871290839663e-06, "loss": 0.5804, "step": 1619 }, { "epoch": 1.1767643520305087, "grad_norm": 0.4473064645504217, "learning_rate": 9.03946488779887e-06, "loss": 0.558, "step": 1620 }, { "epoch": 1.1774907497786757, "grad_norm": 0.4002867177138744, "learning_rate": 9.038057563944346e-06, "loss": 0.5563, "step": 1621 }, { "epoch": 1.1782171475268426, "grad_norm": 0.3619082015242835, "learning_rate": 9.036649319596895e-06, "loss": 0.5704, "step": 1622 }, { "epoch": 1.1789435452750097, "grad_norm": 0.42685374009998284, "learning_rate": 9.035240155077532e-06, "loss": 0.572, "step": 1623 }, { "epoch": 1.1796699430231765, "grad_norm": 0.38283761766805846, "learning_rate": 9.033830070707485e-06, "loss": 0.5586, "step": 1624 }, { "epoch": 1.1803963407713436, "grad_norm": 0.5072262281997689, "learning_rate": 9.032419066808184e-06, "loss": 0.5596, "step": 1625 }, { "epoch": 1.1811227385195107, "grad_norm": 0.47901385507935723, "learning_rate": 9.03100714370128e-06, "loss": 0.5742, "step": 1626 }, { "epoch": 1.1818491362676775, "grad_norm": 0.4621432294903584, "learning_rate": 9.029594301708622e-06, "loss": 0.559, "step": 1627 }, { "epoch": 1.1825755340158446, "grad_norm": 0.35661902505888965, "learning_rate": 9.028180541152275e-06, "loss": 0.5427, "step": 1628 }, { "epoch": 1.1833019317640114, "grad_norm": 0.39979814096373295, "learning_rate": 9.026765862354512e-06, "loss": 0.5621, "step": 1629 }, { "epoch": 1.1840283295121785, "grad_norm": 0.6290392164977799, "learning_rate": 9.025350265637816e-06, "loss": 0.5631, "step": 1630 }, { "epoch": 1.1847547272603456, "grad_norm": 0.41675307891525165, "learning_rate": 9.023933751324876e-06, "loss": 0.56, "step": 1631 }, { "epoch": 1.1854811250085124, "grad_norm": 0.40915617471251176, "learning_rate": 9.022516319738598e-06, "loss": 0.568, "step": 1632 }, { "epoch": 1.1862075227566795, "grad_norm": 0.36057322747830917, "learning_rate": 9.021097971202085e-06, "loss": 0.548, "step": 1633 }, { "epoch": 1.1869339205048464, "grad_norm": 0.398305816253801, "learning_rate": 9.01967870603866e-06, "loss": 0.5648, "step": 1634 }, { "epoch": 1.1876603182530134, "grad_norm": 0.40047400844909886, "learning_rate": 9.018258524571848e-06, "loss": 0.5648, "step": 1635 }, { "epoch": 1.1883867160011805, "grad_norm": 0.36446634860953764, "learning_rate": 9.016837427125389e-06, "loss": 0.5458, "step": 1636 }, { "epoch": 1.1891131137493474, "grad_norm": 0.4437052109459863, "learning_rate": 9.015415414023226e-06, "loss": 0.5598, "step": 1637 }, { "epoch": 1.1898395114975144, "grad_norm": 0.40903270592078117, "learning_rate": 9.013992485589513e-06, "loss": 0.5544, "step": 1638 }, { "epoch": 1.1905659092456813, "grad_norm": 0.39745344686830697, "learning_rate": 9.012568642148615e-06, "loss": 0.5776, "step": 1639 }, { "epoch": 1.1912923069938484, "grad_norm": 0.5021202842523842, "learning_rate": 9.0111438840251e-06, "loss": 0.5513, "step": 1640 }, { "epoch": 1.1920187047420152, "grad_norm": 0.5116684721496529, "learning_rate": 9.009718211543752e-06, "loss": 0.5615, "step": 1641 }, { "epoch": 1.1927451024901823, "grad_norm": 0.3758199164186561, "learning_rate": 9.008291625029556e-06, "loss": 0.5675, "step": 1642 }, { "epoch": 1.1934715002383491, "grad_norm": 0.3805578386122031, "learning_rate": 9.00686412480771e-06, "loss": 0.5511, "step": 1643 }, { "epoch": 1.1941978979865162, "grad_norm": 0.44933161613407974, "learning_rate": 9.005435711203619e-06, "loss": 0.5492, "step": 1644 }, { "epoch": 1.1949242957346833, "grad_norm": 0.43365968490721074, "learning_rate": 9.004006384542894e-06, "loss": 0.5541, "step": 1645 }, { "epoch": 1.1956506934828501, "grad_norm": 0.3871348134506192, "learning_rate": 9.002576145151359e-06, "loss": 0.5641, "step": 1646 }, { "epoch": 1.1963770912310172, "grad_norm": 0.4064010639567433, "learning_rate": 9.001144993355042e-06, "loss": 0.5753, "step": 1647 }, { "epoch": 1.197103488979184, "grad_norm": 0.46830624397535536, "learning_rate": 8.99971292948018e-06, "loss": 0.5656, "step": 1648 }, { "epoch": 1.1978298867273511, "grad_norm": 0.38093273605602024, "learning_rate": 8.99827995385322e-06, "loss": 0.5537, "step": 1649 }, { "epoch": 1.1985562844755182, "grad_norm": 0.4356470351028575, "learning_rate": 8.996846066800815e-06, "loss": 0.5609, "step": 1650 }, { "epoch": 1.199282682223685, "grad_norm": 0.39422074961192954, "learning_rate": 8.995411268649823e-06, "loss": 0.5608, "step": 1651 }, { "epoch": 1.2000090799718521, "grad_norm": 0.375468512449991, "learning_rate": 8.993975559727316e-06, "loss": 0.5668, "step": 1652 }, { "epoch": 1.200735477720019, "grad_norm": 0.40297827175793516, "learning_rate": 8.992538940360568e-06, "loss": 0.5538, "step": 1653 }, { "epoch": 1.201461875468186, "grad_norm": 0.38635504994472913, "learning_rate": 8.991101410877064e-06, "loss": 0.5646, "step": 1654 }, { "epoch": 1.2021882732163531, "grad_norm": 0.33581752318849784, "learning_rate": 8.989662971604491e-06, "loss": 0.5413, "step": 1655 }, { "epoch": 1.20291467096452, "grad_norm": 0.3409030181703885, "learning_rate": 8.988223622870754e-06, "loss": 0.5442, "step": 1656 }, { "epoch": 1.203641068712687, "grad_norm": 0.4306916478895894, "learning_rate": 8.986783365003955e-06, "loss": 0.5695, "step": 1657 }, { "epoch": 1.204367466460854, "grad_norm": 0.43933251871690226, "learning_rate": 8.985342198332407e-06, "loss": 0.5614, "step": 1658 }, { "epoch": 1.205093864209021, "grad_norm": 0.4168589094786127, "learning_rate": 8.983900123184634e-06, "loss": 0.5626, "step": 1659 }, { "epoch": 1.205820261957188, "grad_norm": 0.39213159298507183, "learning_rate": 8.982457139889358e-06, "loss": 0.5508, "step": 1660 }, { "epoch": 1.206546659705355, "grad_norm": 0.3930281890036753, "learning_rate": 8.981013248775516e-06, "loss": 0.5575, "step": 1661 }, { "epoch": 1.207273057453522, "grad_norm": 0.4032943583882293, "learning_rate": 8.979568450172248e-06, "loss": 0.5513, "step": 1662 }, { "epoch": 1.2079994552016888, "grad_norm": 0.35483539042175233, "learning_rate": 8.978122744408905e-06, "loss": 0.5521, "step": 1663 }, { "epoch": 1.208725852949856, "grad_norm": 0.604044247562166, "learning_rate": 8.976676131815041e-06, "loss": 0.5669, "step": 1664 }, { "epoch": 1.2094522506980228, "grad_norm": 0.42184739662747306, "learning_rate": 8.975228612720415e-06, "loss": 0.5497, "step": 1665 }, { "epoch": 1.2101786484461898, "grad_norm": 0.4423424779217794, "learning_rate": 8.973780187454999e-06, "loss": 0.5508, "step": 1666 }, { "epoch": 1.2109050461943567, "grad_norm": 0.41083410822149735, "learning_rate": 8.972330856348965e-06, "loss": 0.5541, "step": 1667 }, { "epoch": 1.2116314439425238, "grad_norm": 0.3870919292235364, "learning_rate": 8.970880619732695e-06, "loss": 0.5364, "step": 1668 }, { "epoch": 1.2123578416906908, "grad_norm": 0.41285288636232365, "learning_rate": 8.96942947793678e-06, "loss": 0.5753, "step": 1669 }, { "epoch": 1.2130842394388577, "grad_norm": 0.39234857911866095, "learning_rate": 8.967977431292009e-06, "loss": 0.5522, "step": 1670 }, { "epoch": 1.2138106371870248, "grad_norm": 0.4284657100652839, "learning_rate": 8.966524480129386e-06, "loss": 0.558, "step": 1671 }, { "epoch": 1.2145370349351916, "grad_norm": 0.42743619885583367, "learning_rate": 8.965070624780117e-06, "loss": 0.5732, "step": 1672 }, { "epoch": 1.2152634326833587, "grad_norm": 0.38064277890598813, "learning_rate": 8.963615865575613e-06, "loss": 0.541, "step": 1673 }, { "epoch": 1.2159898304315258, "grad_norm": 0.39160633024799835, "learning_rate": 8.962160202847494e-06, "loss": 0.5461, "step": 1674 }, { "epoch": 1.2167162281796926, "grad_norm": 0.45820430910342846, "learning_rate": 8.960703636927585e-06, "loss": 0.5398, "step": 1675 }, { "epoch": 1.2174426259278597, "grad_norm": 0.4594650336331551, "learning_rate": 8.959246168147915e-06, "loss": 0.5433, "step": 1676 }, { "epoch": 1.2181690236760265, "grad_norm": 0.43281785549799423, "learning_rate": 8.957787796840724e-06, "loss": 0.564, "step": 1677 }, { "epoch": 1.2188954214241936, "grad_norm": 0.3444644303493312, "learning_rate": 8.95632852333845e-06, "loss": 0.5567, "step": 1678 }, { "epoch": 1.2196218191723607, "grad_norm": 0.3718226041891921, "learning_rate": 8.954868347973742e-06, "loss": 0.5462, "step": 1679 }, { "epoch": 1.2203482169205275, "grad_norm": 0.408627736693502, "learning_rate": 8.953407271079456e-06, "loss": 0.5365, "step": 1680 }, { "epoch": 1.2210746146686946, "grad_norm": 0.37139547247679977, "learning_rate": 8.951945292988647e-06, "loss": 0.5471, "step": 1681 }, { "epoch": 1.2218010124168615, "grad_norm": 0.3724276111177844, "learning_rate": 8.950482414034583e-06, "loss": 0.5477, "step": 1682 }, { "epoch": 1.2225274101650285, "grad_norm": 0.39946058697205866, "learning_rate": 8.94901863455073e-06, "loss": 0.5371, "step": 1683 }, { "epoch": 1.2232538079131954, "grad_norm": 0.36753474211608916, "learning_rate": 8.947553954870765e-06, "loss": 0.5574, "step": 1684 }, { "epoch": 1.2239802056613625, "grad_norm": 0.5392178107861165, "learning_rate": 8.946088375328569e-06, "loss": 0.5587, "step": 1685 }, { "epoch": 1.2247066034095295, "grad_norm": 0.3644470759919107, "learning_rate": 8.944621896258226e-06, "loss": 0.5621, "step": 1686 }, { "epoch": 1.2254330011576964, "grad_norm": 0.44219202544370106, "learning_rate": 8.943154517994026e-06, "loss": 0.5536, "step": 1687 }, { "epoch": 1.2261593989058635, "grad_norm": 0.3764996895947529, "learning_rate": 8.941686240870464e-06, "loss": 0.5558, "step": 1688 }, { "epoch": 1.2268857966540303, "grad_norm": 0.3462514441906337, "learning_rate": 8.940217065222241e-06, "loss": 0.5434, "step": 1689 }, { "epoch": 1.2276121944021974, "grad_norm": 0.37441059075120486, "learning_rate": 8.938746991384264e-06, "loss": 0.5576, "step": 1690 }, { "epoch": 1.2283385921503642, "grad_norm": 0.4985554959055178, "learning_rate": 8.937276019691638e-06, "loss": 0.5577, "step": 1691 }, { "epoch": 1.2290649898985313, "grad_norm": 0.3864813217862178, "learning_rate": 8.935804150479683e-06, "loss": 0.5442, "step": 1692 }, { "epoch": 1.2297913876466984, "grad_norm": 0.3845478893478187, "learning_rate": 8.934331384083914e-06, "loss": 0.5554, "step": 1693 }, { "epoch": 1.2305177853948652, "grad_norm": 1.6701398796419848, "learning_rate": 8.932857720840056e-06, "loss": 0.5666, "step": 1694 }, { "epoch": 1.2312441831430323, "grad_norm": 0.4220956274028827, "learning_rate": 8.931383161084037e-06, "loss": 0.5488, "step": 1695 }, { "epoch": 1.2319705808911992, "grad_norm": 0.38464465303061424, "learning_rate": 8.92990770515199e-06, "loss": 0.5392, "step": 1696 }, { "epoch": 1.2326969786393662, "grad_norm": 0.38261868231706775, "learning_rate": 8.928431353380249e-06, "loss": 0.551, "step": 1697 }, { "epoch": 1.2334233763875333, "grad_norm": 0.4963784011318068, "learning_rate": 8.926954106105358e-06, "loss": 0.5572, "step": 1698 }, { "epoch": 1.2341497741357002, "grad_norm": 0.4774876780938401, "learning_rate": 8.92547596366406e-06, "loss": 0.5671, "step": 1699 }, { "epoch": 1.2348761718838672, "grad_norm": 0.5664978767143851, "learning_rate": 8.923996926393306e-06, "loss": 0.5343, "step": 1700 }, { "epoch": 1.235602569632034, "grad_norm": 0.4431798160139461, "learning_rate": 8.922516994630246e-06, "loss": 0.5484, "step": 1701 }, { "epoch": 1.2363289673802011, "grad_norm": 0.39504209873632384, "learning_rate": 8.921036168712241e-06, "loss": 0.5636, "step": 1702 }, { "epoch": 1.2370553651283682, "grad_norm": 1.8434104950338672, "learning_rate": 8.919554448976848e-06, "loss": 0.5624, "step": 1703 }, { "epoch": 1.237781762876535, "grad_norm": 2.226707270583763, "learning_rate": 8.918071835761833e-06, "loss": 0.5408, "step": 1704 }, { "epoch": 1.2385081606247021, "grad_norm": 0.46962685875100246, "learning_rate": 8.916588329405164e-06, "loss": 0.5627, "step": 1705 }, { "epoch": 1.239234558372869, "grad_norm": 0.41155769724241303, "learning_rate": 8.915103930245015e-06, "loss": 0.56, "step": 1706 }, { "epoch": 1.239960956121036, "grad_norm": 0.4924690840390668, "learning_rate": 8.913618638619757e-06, "loss": 0.5503, "step": 1707 }, { "epoch": 1.240687353869203, "grad_norm": 0.44712240099824757, "learning_rate": 8.912132454867972e-06, "loss": 0.5658, "step": 1708 }, { "epoch": 1.24141375161737, "grad_norm": 0.5552373112222078, "learning_rate": 8.910645379328442e-06, "loss": 0.5483, "step": 1709 }, { "epoch": 1.2421401493655368, "grad_norm": 0.7778708121407731, "learning_rate": 8.90915741234015e-06, "loss": 0.5438, "step": 1710 }, { "epoch": 1.242866547113704, "grad_norm": 0.48332018315159286, "learning_rate": 8.907668554242286e-06, "loss": 0.5506, "step": 1711 }, { "epoch": 1.243592944861871, "grad_norm": 0.4372921808238204, "learning_rate": 8.906178805374243e-06, "loss": 0.5508, "step": 1712 }, { "epoch": 1.2443193426100378, "grad_norm": 0.4716974668725763, "learning_rate": 8.904688166075614e-06, "loss": 0.5515, "step": 1713 }, { "epoch": 1.245045740358205, "grad_norm": 0.5165417601435339, "learning_rate": 8.903196636686198e-06, "loss": 0.5343, "step": 1714 }, { "epoch": 1.2457721381063718, "grad_norm": 0.47588955550056855, "learning_rate": 8.901704217545995e-06, "loss": 0.5535, "step": 1715 }, { "epoch": 1.2464985358545388, "grad_norm": 0.41375809514814743, "learning_rate": 8.900210908995207e-06, "loss": 0.5495, "step": 1716 }, { "epoch": 1.247224933602706, "grad_norm": 0.3667236648873448, "learning_rate": 8.898716711374243e-06, "loss": 0.5657, "step": 1717 }, { "epoch": 1.2479513313508728, "grad_norm": 0.472340981602009, "learning_rate": 8.89722162502371e-06, "loss": 0.541, "step": 1718 }, { "epoch": 1.2486777290990398, "grad_norm": 0.4053169462679586, "learning_rate": 8.89572565028442e-06, "loss": 0.5594, "step": 1719 }, { "epoch": 1.2494041268472067, "grad_norm": 0.5282769709840852, "learning_rate": 8.894228787497389e-06, "loss": 0.5687, "step": 1720 }, { "epoch": 1.2501305245953738, "grad_norm": 0.4227663685562685, "learning_rate": 8.89273103700383e-06, "loss": 0.5313, "step": 1721 }, { "epoch": 1.2508569223435408, "grad_norm": 0.48655409953672724, "learning_rate": 8.891232399145164e-06, "loss": 0.5461, "step": 1722 }, { "epoch": 1.2515833200917077, "grad_norm": 0.4797567850949162, "learning_rate": 8.88973287426301e-06, "loss": 0.5577, "step": 1723 }, { "epoch": 1.2523097178398748, "grad_norm": 0.37881110627643433, "learning_rate": 8.888232462699196e-06, "loss": 0.5517, "step": 1724 }, { "epoch": 1.2530361155880416, "grad_norm": 0.4458797743651538, "learning_rate": 8.88673116479574e-06, "loss": 0.5386, "step": 1725 }, { "epoch": 1.2537625133362087, "grad_norm": 0.47035221294764884, "learning_rate": 8.885228980894877e-06, "loss": 0.5675, "step": 1726 }, { "epoch": 1.2544889110843758, "grad_norm": 0.4037941136698805, "learning_rate": 8.883725911339032e-06, "loss": 0.5492, "step": 1727 }, { "epoch": 1.2552153088325426, "grad_norm": 0.43376466055770363, "learning_rate": 8.882221956470838e-06, "loss": 0.5576, "step": 1728 }, { "epoch": 1.2559417065807095, "grad_norm": 0.4404900043276078, "learning_rate": 8.880717116633126e-06, "loss": 0.5497, "step": 1729 }, { "epoch": 1.2566681043288765, "grad_norm": 0.4159398594778771, "learning_rate": 8.87921139216893e-06, "loss": 0.5403, "step": 1730 }, { "epoch": 1.2573945020770436, "grad_norm": 0.40016155319282093, "learning_rate": 8.877704783421492e-06, "loss": 0.5596, "step": 1731 }, { "epoch": 1.2581208998252105, "grad_norm": 0.6201551996430268, "learning_rate": 8.876197290734247e-06, "loss": 0.5486, "step": 1732 }, { "epoch": 1.2588472975733775, "grad_norm": 0.423114797642255, "learning_rate": 8.874688914450833e-06, "loss": 0.5619, "step": 1733 }, { "epoch": 1.2595736953215444, "grad_norm": 0.4015212537751768, "learning_rate": 8.873179654915093e-06, "loss": 0.5506, "step": 1734 }, { "epoch": 1.2603000930697115, "grad_norm": 0.4138072597511688, "learning_rate": 8.871669512471068e-06, "loss": 0.5414, "step": 1735 }, { "epoch": 1.2610264908178785, "grad_norm": 0.37740103115812385, "learning_rate": 8.870158487463003e-06, "loss": 0.5652, "step": 1736 }, { "epoch": 1.2617528885660454, "grad_norm": 0.4672154959611961, "learning_rate": 8.86864658023534e-06, "loss": 0.5509, "step": 1737 }, { "epoch": 1.2624792863142125, "grad_norm": 0.3794877865595524, "learning_rate": 8.867133791132729e-06, "loss": 0.5378, "step": 1738 }, { "epoch": 1.2632056840623793, "grad_norm": 0.3941484564763056, "learning_rate": 8.865620120500017e-06, "loss": 0.5313, "step": 1739 }, { "epoch": 1.2639320818105464, "grad_norm": 0.433625775037335, "learning_rate": 8.864105568682245e-06, "loss": 0.5477, "step": 1740 }, { "epoch": 1.2646584795587135, "grad_norm": 0.4256197777407076, "learning_rate": 8.862590136024668e-06, "loss": 0.547, "step": 1741 }, { "epoch": 1.2653848773068803, "grad_norm": 0.36925502674557587, "learning_rate": 8.861073822872735e-06, "loss": 0.5558, "step": 1742 }, { "epoch": 1.2661112750550474, "grad_norm": 0.4404635405799064, "learning_rate": 8.859556629572095e-06, "loss": 0.5528, "step": 1743 }, { "epoch": 1.2668376728032142, "grad_norm": 0.4142530403240027, "learning_rate": 8.858038556468598e-06, "loss": 0.5577, "step": 1744 }, { "epoch": 1.2675640705513813, "grad_norm": 0.418909820251429, "learning_rate": 8.856519603908295e-06, "loss": 0.5482, "step": 1745 }, { "epoch": 1.2682904682995484, "grad_norm": 0.47128421655077196, "learning_rate": 8.854999772237442e-06, "loss": 0.5437, "step": 1746 }, { "epoch": 1.2690168660477152, "grad_norm": 0.5111805391386319, "learning_rate": 8.853479061802489e-06, "loss": 0.5317, "step": 1747 }, { "epoch": 1.2697432637958823, "grad_norm": 0.3996020838695995, "learning_rate": 8.851957472950086e-06, "loss": 0.5593, "step": 1748 }, { "epoch": 1.2704696615440492, "grad_norm": 0.4130100221068787, "learning_rate": 8.85043500602709e-06, "loss": 0.5549, "step": 1749 }, { "epoch": 1.2711960592922162, "grad_norm": 0.3513680463359487, "learning_rate": 8.84891166138055e-06, "loss": 0.5423, "step": 1750 }, { "epoch": 1.271922457040383, "grad_norm": 0.4395399002655679, "learning_rate": 8.847387439357725e-06, "loss": 0.5557, "step": 1751 }, { "epoch": 1.2726488547885502, "grad_norm": 0.38446859995160715, "learning_rate": 8.84586234030606e-06, "loss": 0.5463, "step": 1752 }, { "epoch": 1.273375252536717, "grad_norm": 0.3957873922173363, "learning_rate": 8.844336364573214e-06, "loss": 0.5602, "step": 1753 }, { "epoch": 1.274101650284884, "grad_norm": 0.38643960679786543, "learning_rate": 8.842809512507038e-06, "loss": 0.5458, "step": 1754 }, { "epoch": 1.2748280480330512, "grad_norm": 0.3706602353251092, "learning_rate": 8.841281784455586e-06, "loss": 0.5602, "step": 1755 }, { "epoch": 1.275554445781218, "grad_norm": 0.3796213188989138, "learning_rate": 8.839753180767108e-06, "loss": 0.5635, "step": 1756 }, { "epoch": 1.276280843529385, "grad_norm": 0.45928464006098907, "learning_rate": 8.838223701790057e-06, "loss": 0.5426, "step": 1757 }, { "epoch": 1.277007241277552, "grad_norm": 0.3818371835551699, "learning_rate": 8.836693347873084e-06, "loss": 0.5463, "step": 1758 }, { "epoch": 1.277733639025719, "grad_norm": 0.36166963772964084, "learning_rate": 8.835162119365042e-06, "loss": 0.5264, "step": 1759 }, { "epoch": 1.278460036773886, "grad_norm": 0.385670474786461, "learning_rate": 8.833630016614976e-06, "loss": 0.543, "step": 1760 }, { "epoch": 1.279186434522053, "grad_norm": 0.3990063492555785, "learning_rate": 8.832097039972144e-06, "loss": 0.5338, "step": 1761 }, { "epoch": 1.27991283227022, "grad_norm": 0.6653963038347863, "learning_rate": 8.830563189785986e-06, "loss": 0.5375, "step": 1762 }, { "epoch": 1.2806392300183869, "grad_norm": 0.39286885480174005, "learning_rate": 8.829028466406156e-06, "loss": 0.5575, "step": 1763 }, { "epoch": 1.281365627766554, "grad_norm": 0.39103188453512855, "learning_rate": 8.827492870182496e-06, "loss": 0.5467, "step": 1764 }, { "epoch": 1.282092025514721, "grad_norm": 0.3530567148397571, "learning_rate": 8.825956401465056e-06, "loss": 0.5501, "step": 1765 }, { "epoch": 1.2828184232628879, "grad_norm": 0.3962145821734583, "learning_rate": 8.824419060604081e-06, "loss": 0.5512, "step": 1766 }, { "epoch": 1.283544821011055, "grad_norm": 0.4155954746083735, "learning_rate": 8.822880847950011e-06, "loss": 0.543, "step": 1767 }, { "epoch": 1.2842712187592218, "grad_norm": 0.36229776590475643, "learning_rate": 8.82134176385349e-06, "loss": 0.5494, "step": 1768 }, { "epoch": 1.2849976165073889, "grad_norm": 0.3892187196516395, "learning_rate": 8.819801808665361e-06, "loss": 0.5606, "step": 1769 }, { "epoch": 1.285724014255556, "grad_norm": 0.6272602907160172, "learning_rate": 8.818260982736662e-06, "loss": 0.5466, "step": 1770 }, { "epoch": 1.2864504120037228, "grad_norm": 0.3920063283771201, "learning_rate": 8.81671928641863e-06, "loss": 0.5346, "step": 1771 }, { "epoch": 1.2871768097518896, "grad_norm": 0.34977300906231246, "learning_rate": 8.815176720062701e-06, "loss": 0.5343, "step": 1772 }, { "epoch": 1.2879032075000567, "grad_norm": 0.4164422641969452, "learning_rate": 8.813633284020512e-06, "loss": 0.5552, "step": 1773 }, { "epoch": 1.2886296052482238, "grad_norm": 0.389797438250589, "learning_rate": 8.812088978643894e-06, "loss": 0.5468, "step": 1774 }, { "epoch": 1.2893560029963906, "grad_norm": 0.8495184137168471, "learning_rate": 8.810543804284879e-06, "loss": 0.5501, "step": 1775 }, { "epoch": 1.2900824007445577, "grad_norm": 0.46604450197114805, "learning_rate": 8.808997761295698e-06, "loss": 0.5455, "step": 1776 }, { "epoch": 1.2908087984927246, "grad_norm": 0.39491912481678554, "learning_rate": 8.807450850028776e-06, "loss": 0.5362, "step": 1777 }, { "epoch": 1.2915351962408916, "grad_norm": 0.49234274419716917, "learning_rate": 8.805903070836738e-06, "loss": 0.5406, "step": 1778 }, { "epoch": 1.2922615939890587, "grad_norm": 0.34933402697149396, "learning_rate": 8.80435442407241e-06, "loss": 0.5448, "step": 1779 }, { "epoch": 1.2929879917372256, "grad_norm": 0.4826316850229999, "learning_rate": 8.80280491008881e-06, "loss": 0.5451, "step": 1780 }, { "epoch": 1.2937143894853926, "grad_norm": 0.39767972884114156, "learning_rate": 8.801254529239156e-06, "loss": 0.5499, "step": 1781 }, { "epoch": 1.2944407872335595, "grad_norm": 0.4612738879037992, "learning_rate": 8.799703281876866e-06, "loss": 0.5473, "step": 1782 }, { "epoch": 1.2951671849817266, "grad_norm": 0.4211579062948049, "learning_rate": 8.798151168355555e-06, "loss": 0.5347, "step": 1783 }, { "epoch": 1.2958935827298936, "grad_norm": 0.4422061037768507, "learning_rate": 8.79659818902903e-06, "loss": 0.5535, "step": 1784 }, { "epoch": 1.2966199804780605, "grad_norm": 0.5858687372390492, "learning_rate": 8.795044344251302e-06, "loss": 0.547, "step": 1785 }, { "epoch": 1.2973463782262276, "grad_norm": 0.39423622712419204, "learning_rate": 8.793489634376576e-06, "loss": 0.5372, "step": 1786 }, { "epoch": 1.2980727759743944, "grad_norm": 0.4078027764596252, "learning_rate": 8.791934059759256e-06, "loss": 0.53, "step": 1787 }, { "epoch": 1.2987991737225615, "grad_norm": 0.4120785404484633, "learning_rate": 8.79037762075394e-06, "loss": 0.5541, "step": 1788 }, { "epoch": 1.2995255714707286, "grad_norm": 0.36186240974562306, "learning_rate": 8.788820317715427e-06, "loss": 0.5424, "step": 1789 }, { "epoch": 1.3002519692188954, "grad_norm": 0.4251288194120092, "learning_rate": 8.787262150998713e-06, "loss": 0.5549, "step": 1790 }, { "epoch": 1.3009783669670625, "grad_norm": 0.4915263914925816, "learning_rate": 8.785703120958984e-06, "loss": 0.5326, "step": 1791 }, { "epoch": 1.3017047647152293, "grad_norm": 0.3983668140161739, "learning_rate": 8.784143227951628e-06, "loss": 0.5303, "step": 1792 }, { "epoch": 1.3024311624633964, "grad_norm": 0.3754965619413052, "learning_rate": 8.782582472332236e-06, "loss": 0.5393, "step": 1793 }, { "epoch": 1.3031575602115635, "grad_norm": 0.3773856242093586, "learning_rate": 8.781020854456582e-06, "loss": 0.535, "step": 1794 }, { "epoch": 1.3038839579597303, "grad_norm": 0.4490866762349985, "learning_rate": 8.779458374680646e-06, "loss": 0.5308, "step": 1795 }, { "epoch": 1.3046103557078972, "grad_norm": 0.40733809453212066, "learning_rate": 8.777895033360603e-06, "loss": 0.5478, "step": 1796 }, { "epoch": 1.3053367534560643, "grad_norm": 0.42410169866054975, "learning_rate": 8.776330830852825e-06, "loss": 0.5534, "step": 1797 }, { "epoch": 1.3060631512042313, "grad_norm": 0.34776512661909453, "learning_rate": 8.774765767513876e-06, "loss": 0.5411, "step": 1798 }, { "epoch": 1.3067895489523982, "grad_norm": 0.3769902398807461, "learning_rate": 8.773199843700518e-06, "loss": 0.5309, "step": 1799 }, { "epoch": 1.3075159467005653, "grad_norm": 0.37330832388241414, "learning_rate": 8.771633059769712e-06, "loss": 0.549, "step": 1800 }, { "epoch": 1.308242344448732, "grad_norm": 0.39550029782820983, "learning_rate": 8.770065416078615e-06, "loss": 0.5539, "step": 1801 }, { "epoch": 1.3089687421968992, "grad_norm": 0.3819609212474707, "learning_rate": 8.768496912984574e-06, "loss": 0.5346, "step": 1802 }, { "epoch": 1.3096951399450663, "grad_norm": 0.4118791910236496, "learning_rate": 8.76692755084514e-06, "loss": 0.5563, "step": 1803 }, { "epoch": 1.310421537693233, "grad_norm": 0.44571217160818744, "learning_rate": 8.765357330018056e-06, "loss": 0.5438, "step": 1804 }, { "epoch": 1.3111479354414002, "grad_norm": 0.4121837514951978, "learning_rate": 8.763786250861258e-06, "loss": 0.5497, "step": 1805 }, { "epoch": 1.311874333189567, "grad_norm": 0.4083163261875877, "learning_rate": 8.762214313732881e-06, "loss": 0.5552, "step": 1806 }, { "epoch": 1.312600730937734, "grad_norm": 0.4181152080467721, "learning_rate": 8.760641518991257e-06, "loss": 0.564, "step": 1807 }, { "epoch": 1.3133271286859012, "grad_norm": 0.38364370277204013, "learning_rate": 8.75906786699491e-06, "loss": 0.5378, "step": 1808 }, { "epoch": 1.314053526434068, "grad_norm": 0.41869608928831936, "learning_rate": 8.75749335810256e-06, "loss": 0.5464, "step": 1809 }, { "epoch": 1.314779924182235, "grad_norm": 0.42528426296113725, "learning_rate": 8.755917992673126e-06, "loss": 0.5463, "step": 1810 }, { "epoch": 1.315506321930402, "grad_norm": 0.38483565529483227, "learning_rate": 8.754341771065716e-06, "loss": 0.5398, "step": 1811 }, { "epoch": 1.316232719678569, "grad_norm": 0.46536447305861245, "learning_rate": 8.75276469363964e-06, "loss": 0.5464, "step": 1812 }, { "epoch": 1.316959117426736, "grad_norm": 0.41489340357941334, "learning_rate": 8.751186760754397e-06, "loss": 0.5319, "step": 1813 }, { "epoch": 1.317685515174903, "grad_norm": 0.4578205762462449, "learning_rate": 8.749607972769685e-06, "loss": 0.5439, "step": 1814 }, { "epoch": 1.3184119129230698, "grad_norm": 0.49155744319398526, "learning_rate": 8.748028330045395e-06, "loss": 0.5508, "step": 1815 }, { "epoch": 1.3191383106712369, "grad_norm": 0.37190008962866994, "learning_rate": 8.746447832941614e-06, "loss": 0.5534, "step": 1816 }, { "epoch": 1.319864708419404, "grad_norm": 0.3769835428333359, "learning_rate": 8.744866481818624e-06, "loss": 0.5389, "step": 1817 }, { "epoch": 1.3205911061675708, "grad_norm": 0.41417801427669604, "learning_rate": 8.743284277036899e-06, "loss": 0.5271, "step": 1818 }, { "epoch": 1.3213175039157379, "grad_norm": 0.3655224383838069, "learning_rate": 8.74170121895711e-06, "loss": 0.5684, "step": 1819 }, { "epoch": 1.3220439016639047, "grad_norm": 0.5029173229142005, "learning_rate": 8.740117307940123e-06, "loss": 0.5374, "step": 1820 }, { "epoch": 1.3227702994120718, "grad_norm": 0.4438745767992502, "learning_rate": 8.738532544346998e-06, "loss": 0.544, "step": 1821 }, { "epoch": 1.3234966971602389, "grad_norm": 0.3818937011598134, "learning_rate": 8.736946928538988e-06, "loss": 0.5456, "step": 1822 }, { "epoch": 1.3242230949084057, "grad_norm": 0.4314164584511377, "learning_rate": 8.73536046087754e-06, "loss": 0.5336, "step": 1823 }, { "epoch": 1.3249494926565728, "grad_norm": 0.39232944801618264, "learning_rate": 8.733773141724298e-06, "loss": 0.5363, "step": 1824 }, { "epoch": 1.3256758904047397, "grad_norm": 0.505176054594776, "learning_rate": 8.732184971441098e-06, "loss": 0.5478, "step": 1825 }, { "epoch": 1.3264022881529067, "grad_norm": 0.4141339957842407, "learning_rate": 8.730595950389968e-06, "loss": 0.5758, "step": 1826 }, { "epoch": 1.3271286859010738, "grad_norm": 0.3771489964312615, "learning_rate": 8.729006078933136e-06, "loss": 0.5457, "step": 1827 }, { "epoch": 1.3278550836492407, "grad_norm": 0.3418085527128151, "learning_rate": 8.727415357433018e-06, "loss": 0.541, "step": 1828 }, { "epoch": 1.3285814813974077, "grad_norm": 0.36072786868922635, "learning_rate": 8.725823786252226e-06, "loss": 0.55, "step": 1829 }, { "epoch": 1.3293078791455746, "grad_norm": 0.35881277088044755, "learning_rate": 8.724231365753567e-06, "loss": 0.5497, "step": 1830 }, { "epoch": 1.3300342768937417, "grad_norm": 0.4034179783998635, "learning_rate": 8.722638096300037e-06, "loss": 0.5403, "step": 1831 }, { "epoch": 1.3307606746419087, "grad_norm": 0.38808308265799407, "learning_rate": 8.721043978254833e-06, "loss": 0.5521, "step": 1832 }, { "epoch": 1.3314870723900756, "grad_norm": 0.37141607365637724, "learning_rate": 8.71944901198134e-06, "loss": 0.5288, "step": 1833 }, { "epoch": 1.3322134701382427, "grad_norm": 0.4153027779362984, "learning_rate": 8.717853197843134e-06, "loss": 0.563, "step": 1834 }, { "epoch": 1.3329398678864095, "grad_norm": 0.4063731663632702, "learning_rate": 8.716256536203992e-06, "loss": 0.5381, "step": 1835 }, { "epoch": 1.3336662656345766, "grad_norm": 0.3720949582825728, "learning_rate": 8.714659027427878e-06, "loss": 0.5378, "step": 1836 }, { "epoch": 1.3343926633827436, "grad_norm": 0.38950561831594427, "learning_rate": 8.713060671878953e-06, "loss": 0.5408, "step": 1837 }, { "epoch": 1.3351190611309105, "grad_norm": 0.3921429083344398, "learning_rate": 8.711461469921568e-06, "loss": 0.5313, "step": 1838 }, { "epoch": 1.3358454588790774, "grad_norm": 0.3842738383447121, "learning_rate": 8.709861421920267e-06, "loss": 0.5579, "step": 1839 }, { "epoch": 1.3365718566272444, "grad_norm": 0.4466227404643763, "learning_rate": 8.708260528239788e-06, "loss": 0.5391, "step": 1840 }, { "epoch": 1.3372982543754115, "grad_norm": 0.40000502784137687, "learning_rate": 8.706658789245064e-06, "loss": 0.5329, "step": 1841 }, { "epoch": 1.3380246521235784, "grad_norm": 0.3681548592380934, "learning_rate": 8.705056205301217e-06, "loss": 0.543, "step": 1842 }, { "epoch": 1.3387510498717454, "grad_norm": 0.5442305579733531, "learning_rate": 8.703452776773563e-06, "loss": 0.5572, "step": 1843 }, { "epoch": 1.3394774476199123, "grad_norm": 0.41846847295976275, "learning_rate": 8.70184850402761e-06, "loss": 0.5336, "step": 1844 }, { "epoch": 1.3402038453680793, "grad_norm": 0.39723343483159357, "learning_rate": 8.700243387429061e-06, "loss": 0.5432, "step": 1845 }, { "epoch": 1.3409302431162464, "grad_norm": 0.38939768776621403, "learning_rate": 8.698637427343809e-06, "loss": 0.5229, "step": 1846 }, { "epoch": 1.3416566408644133, "grad_norm": 0.4013658344287758, "learning_rate": 8.697030624137937e-06, "loss": 0.5599, "step": 1847 }, { "epoch": 1.3423830386125803, "grad_norm": 0.38252834404139674, "learning_rate": 8.695422978177724e-06, "loss": 0.5364, "step": 1848 }, { "epoch": 1.3431094363607472, "grad_norm": 0.37487413981443296, "learning_rate": 8.693814489829643e-06, "loss": 0.5305, "step": 1849 }, { "epoch": 1.3438358341089143, "grad_norm": 0.5045145749139772, "learning_rate": 8.692205159460349e-06, "loss": 0.5324, "step": 1850 }, { "epoch": 1.3445622318570813, "grad_norm": 0.3834848107935937, "learning_rate": 8.690594987436705e-06, "loss": 0.5373, "step": 1851 }, { "epoch": 1.3452886296052482, "grad_norm": 0.402784897329814, "learning_rate": 8.68898397412575e-06, "loss": 0.5405, "step": 1852 }, { "epoch": 1.3460150273534153, "grad_norm": 0.42427307386128277, "learning_rate": 8.687372119894723e-06, "loss": 0.5583, "step": 1853 }, { "epoch": 1.3467414251015821, "grad_norm": 0.3911558225143398, "learning_rate": 8.685759425111056e-06, "loss": 0.5583, "step": 1854 }, { "epoch": 1.3474678228497492, "grad_norm": 0.3543476050936793, "learning_rate": 8.684145890142366e-06, "loss": 0.5393, "step": 1855 }, { "epoch": 1.3481942205979163, "grad_norm": 0.4137430614844222, "learning_rate": 8.682531515356467e-06, "loss": 0.5418, "step": 1856 }, { "epoch": 1.3489206183460831, "grad_norm": 0.44802159447591655, "learning_rate": 8.680916301121365e-06, "loss": 0.5558, "step": 1857 }, { "epoch": 1.34964701609425, "grad_norm": 0.39855739423499525, "learning_rate": 8.679300247805252e-06, "loss": 0.5488, "step": 1858 }, { "epoch": 1.350373413842417, "grad_norm": 0.35349526794579944, "learning_rate": 8.677683355776515e-06, "loss": 0.5348, "step": 1859 }, { "epoch": 1.3510998115905841, "grad_norm": 0.4079156229909949, "learning_rate": 8.676065625403733e-06, "loss": 0.5355, "step": 1860 }, { "epoch": 1.351826209338751, "grad_norm": 0.4101135183395125, "learning_rate": 8.674447057055673e-06, "loss": 0.5592, "step": 1861 }, { "epoch": 1.352552607086918, "grad_norm": 0.4373664713387905, "learning_rate": 8.672827651101297e-06, "loss": 0.5451, "step": 1862 }, { "epoch": 1.353279004835085, "grad_norm": 0.3981073036267596, "learning_rate": 8.671207407909754e-06, "loss": 0.5478, "step": 1863 }, { "epoch": 1.354005402583252, "grad_norm": 0.3773769438352129, "learning_rate": 8.669586327850386e-06, "loss": 0.5414, "step": 1864 }, { "epoch": 1.354731800331419, "grad_norm": 0.4121506936367948, "learning_rate": 8.667964411292725e-06, "loss": 0.5418, "step": 1865 }, { "epoch": 1.355458198079586, "grad_norm": 0.3721842200340298, "learning_rate": 8.666341658606493e-06, "loss": 0.533, "step": 1866 }, { "epoch": 1.356184595827753, "grad_norm": 0.4303316522403638, "learning_rate": 8.664718070161607e-06, "loss": 0.5486, "step": 1867 }, { "epoch": 1.3569109935759198, "grad_norm": 0.3803968632015748, "learning_rate": 8.663093646328166e-06, "loss": 0.5482, "step": 1868 }, { "epoch": 1.357637391324087, "grad_norm": 0.4155775190064203, "learning_rate": 8.661468387476471e-06, "loss": 0.5249, "step": 1869 }, { "epoch": 1.358363789072254, "grad_norm": 0.41649246283951763, "learning_rate": 8.659842293977e-06, "loss": 0.5353, "step": 1870 }, { "epoch": 1.3590901868204208, "grad_norm": 0.4117337284313234, "learning_rate": 8.658215366200433e-06, "loss": 0.5451, "step": 1871 }, { "epoch": 1.359816584568588, "grad_norm": 0.4392355311303106, "learning_rate": 8.656587604517635e-06, "loss": 0.5366, "step": 1872 }, { "epoch": 1.3605429823167547, "grad_norm": 0.39648896071408246, "learning_rate": 8.654959009299657e-06, "loss": 0.5404, "step": 1873 }, { "epoch": 1.3612693800649218, "grad_norm": 0.3872675445529076, "learning_rate": 8.653329580917752e-06, "loss": 0.5475, "step": 1874 }, { "epoch": 1.361995777813089, "grad_norm": 0.4218469959253971, "learning_rate": 8.651699319743348e-06, "loss": 0.5293, "step": 1875 }, { "epoch": 1.3627221755612557, "grad_norm": 0.41767884109068387, "learning_rate": 8.650068226148074e-06, "loss": 0.5317, "step": 1876 }, { "epoch": 1.3634485733094228, "grad_norm": 0.3859802721551015, "learning_rate": 8.648436300503742e-06, "loss": 0.5293, "step": 1877 }, { "epoch": 1.3641749710575897, "grad_norm": 0.42945118279457706, "learning_rate": 8.646803543182361e-06, "loss": 0.5376, "step": 1878 }, { "epoch": 1.3649013688057567, "grad_norm": 0.37671501911345834, "learning_rate": 8.645169954556123e-06, "loss": 0.5395, "step": 1879 }, { "epoch": 1.3656277665539238, "grad_norm": 0.369319222907703, "learning_rate": 8.64353553499741e-06, "loss": 0.5338, "step": 1880 }, { "epoch": 1.3663541643020907, "grad_norm": 0.34095153700697944, "learning_rate": 8.641900284878798e-06, "loss": 0.5418, "step": 1881 }, { "epoch": 1.3670805620502575, "grad_norm": 0.6919819010110789, "learning_rate": 8.640264204573049e-06, "loss": 0.5415, "step": 1882 }, { "epoch": 1.3678069597984246, "grad_norm": 0.3531349893202495, "learning_rate": 8.63862729445311e-06, "loss": 0.5551, "step": 1883 }, { "epoch": 1.3685333575465917, "grad_norm": 0.37080974188770977, "learning_rate": 8.63698955489213e-06, "loss": 0.5424, "step": 1884 }, { "epoch": 1.3692597552947585, "grad_norm": 0.40867422693303573, "learning_rate": 8.635350986263431e-06, "loss": 0.5451, "step": 1885 }, { "epoch": 1.3699861530429256, "grad_norm": 0.4680403849119622, "learning_rate": 8.633711588940538e-06, "loss": 0.5471, "step": 1886 }, { "epoch": 1.3707125507910924, "grad_norm": 0.3835701618473411, "learning_rate": 8.632071363297155e-06, "loss": 0.5423, "step": 1887 }, { "epoch": 1.3714389485392595, "grad_norm": 0.38961022680836604, "learning_rate": 8.63043030970718e-06, "loss": 0.5414, "step": 1888 }, { "epoch": 1.3721653462874266, "grad_norm": 0.36131709249510413, "learning_rate": 8.628788428544698e-06, "loss": 0.5316, "step": 1889 }, { "epoch": 1.3728917440355934, "grad_norm": 0.3730237230261701, "learning_rate": 8.627145720183986e-06, "loss": 0.5461, "step": 1890 }, { "epoch": 1.3736181417837605, "grad_norm": 0.44393089036469924, "learning_rate": 8.6255021849995e-06, "loss": 0.5606, "step": 1891 }, { "epoch": 1.3743445395319274, "grad_norm": 0.3878934576688868, "learning_rate": 8.623857823365896e-06, "loss": 0.5303, "step": 1892 }, { "epoch": 1.3750709372800944, "grad_norm": 0.36740185024361566, "learning_rate": 8.622212635658015e-06, "loss": 0.5318, "step": 1893 }, { "epoch": 1.3757973350282615, "grad_norm": 0.3873406422624257, "learning_rate": 8.620566622250878e-06, "loss": 0.5422, "step": 1894 }, { "epoch": 1.3765237327764284, "grad_norm": 0.43456363913952073, "learning_rate": 8.618919783519706e-06, "loss": 0.5392, "step": 1895 }, { "epoch": 1.3772501305245954, "grad_norm": 0.40024030236124175, "learning_rate": 8.617272119839903e-06, "loss": 0.525, "step": 1896 }, { "epoch": 1.3779765282727623, "grad_norm": 0.3761797253882858, "learning_rate": 8.61562363158706e-06, "loss": 0.541, "step": 1897 }, { "epoch": 1.3787029260209294, "grad_norm": 0.3707955979046983, "learning_rate": 8.613974319136959e-06, "loss": 0.5317, "step": 1898 }, { "epoch": 1.3794293237690964, "grad_norm": 0.3760358723392047, "learning_rate": 8.612324182865565e-06, "loss": 0.5465, "step": 1899 }, { "epoch": 1.3801557215172633, "grad_norm": 0.4256562839209359, "learning_rate": 8.610673223149036e-06, "loss": 0.5414, "step": 1900 }, { "epoch": 1.3808821192654301, "grad_norm": 0.4279063471862655, "learning_rate": 8.609021440363712e-06, "loss": 0.5535, "step": 1901 }, { "epoch": 1.3816085170135972, "grad_norm": 0.3509451349635699, "learning_rate": 8.60736883488613e-06, "loss": 0.5357, "step": 1902 }, { "epoch": 1.3823349147617643, "grad_norm": 0.581332340740415, "learning_rate": 8.605715407093005e-06, "loss": 0.5311, "step": 1903 }, { "epoch": 1.3830613125099311, "grad_norm": 0.4407596324959687, "learning_rate": 8.604061157361243e-06, "loss": 0.5467, "step": 1904 }, { "epoch": 1.3837877102580982, "grad_norm": 0.4659283840524074, "learning_rate": 8.602406086067938e-06, "loss": 0.5415, "step": 1905 }, { "epoch": 1.384514108006265, "grad_norm": 0.3802760596789848, "learning_rate": 8.60075019359037e-06, "loss": 0.5515, "step": 1906 }, { "epoch": 1.3852405057544321, "grad_norm": 0.38316526842068516, "learning_rate": 8.599093480306009e-06, "loss": 0.5577, "step": 1907 }, { "epoch": 1.3859669035025992, "grad_norm": 0.49345679046274693, "learning_rate": 8.59743594659251e-06, "loss": 0.5567, "step": 1908 }, { "epoch": 1.386693301250766, "grad_norm": 0.4407146899861917, "learning_rate": 8.595777592827713e-06, "loss": 0.5374, "step": 1909 }, { "epoch": 1.3874196989989331, "grad_norm": 0.3686260662439781, "learning_rate": 8.594118419389648e-06, "loss": 0.5358, "step": 1910 }, { "epoch": 1.3881460967471, "grad_norm": 0.43813341796941524, "learning_rate": 8.592458426656531e-06, "loss": 0.5292, "step": 1911 }, { "epoch": 1.388872494495267, "grad_norm": 0.5289201011280882, "learning_rate": 8.590797615006763e-06, "loss": 0.5314, "step": 1912 }, { "epoch": 1.3895988922434341, "grad_norm": 0.3545435738165874, "learning_rate": 8.589135984818936e-06, "loss": 0.5331, "step": 1913 }, { "epoch": 1.390325289991601, "grad_norm": 0.3880181286798106, "learning_rate": 8.587473536471824e-06, "loss": 0.5292, "step": 1914 }, { "epoch": 1.391051687739768, "grad_norm": 0.3711953855294708, "learning_rate": 8.585810270344391e-06, "loss": 0.5398, "step": 1915 }, { "epoch": 1.391778085487935, "grad_norm": 0.3606452388095397, "learning_rate": 8.584146186815785e-06, "loss": 0.5388, "step": 1916 }, { "epoch": 1.392504483236102, "grad_norm": 0.49144573790253854, "learning_rate": 8.582481286265341e-06, "loss": 0.5331, "step": 1917 }, { "epoch": 1.393230880984269, "grad_norm": 0.44018366120876584, "learning_rate": 8.580815569072579e-06, "loss": 0.553, "step": 1918 }, { "epoch": 1.393957278732436, "grad_norm": 0.4114140131639351, "learning_rate": 8.57914903561721e-06, "loss": 0.5438, "step": 1919 }, { "epoch": 1.394683676480603, "grad_norm": 0.3779639707831259, "learning_rate": 8.577481686279123e-06, "loss": 0.5315, "step": 1920 }, { "epoch": 1.3954100742287698, "grad_norm": 0.5961234774932549, "learning_rate": 8.575813521438401e-06, "loss": 0.53, "step": 1921 }, { "epoch": 1.396136471976937, "grad_norm": 0.5384898478460642, "learning_rate": 8.574144541475309e-06, "loss": 0.5325, "step": 1922 }, { "epoch": 1.396862869725104, "grad_norm": 0.40857235280824467, "learning_rate": 8.572474746770298e-06, "loss": 0.5478, "step": 1923 }, { "epoch": 1.3975892674732708, "grad_norm": 0.425386958242189, "learning_rate": 8.570804137704005e-06, "loss": 0.5312, "step": 1924 }, { "epoch": 1.3983156652214377, "grad_norm": 0.7508299384321971, "learning_rate": 8.56913271465725e-06, "loss": 0.5324, "step": 1925 }, { "epoch": 1.3990420629696048, "grad_norm": 0.36782172271514246, "learning_rate": 8.567460478011048e-06, "loss": 0.5435, "step": 1926 }, { "epoch": 1.3997684607177718, "grad_norm": 0.38067589919289535, "learning_rate": 8.565787428146586e-06, "loss": 0.536, "step": 1927 }, { "epoch": 1.4004948584659387, "grad_norm": 0.5934857184987411, "learning_rate": 8.564113565445247e-06, "loss": 0.5341, "step": 1928 }, { "epoch": 1.4012212562141058, "grad_norm": 0.3596646337745538, "learning_rate": 8.56243889028859e-06, "loss": 0.5165, "step": 1929 }, { "epoch": 1.4019476539622726, "grad_norm": 0.4280239182874118, "learning_rate": 8.560763403058375e-06, "loss": 0.5389, "step": 1930 }, { "epoch": 1.4026740517104397, "grad_norm": 0.41277127732364816, "learning_rate": 8.559087104136525e-06, "loss": 0.5437, "step": 1931 }, { "epoch": 1.4034004494586068, "grad_norm": 0.35672219835666513, "learning_rate": 8.557409993905165e-06, "loss": 0.5297, "step": 1932 }, { "epoch": 1.4041268472067736, "grad_norm": 0.3761535339499059, "learning_rate": 8.5557320727466e-06, "loss": 0.5419, "step": 1933 }, { "epoch": 1.4048532449549407, "grad_norm": 0.403660946737396, "learning_rate": 8.55405334104332e-06, "loss": 0.5283, "step": 1934 }, { "epoch": 1.4055796427031075, "grad_norm": 0.387444269843542, "learning_rate": 8.552373799177997e-06, "loss": 0.5436, "step": 1935 }, { "epoch": 1.4063060404512746, "grad_norm": 0.3691292665136819, "learning_rate": 8.550693447533493e-06, "loss": 0.5567, "step": 1936 }, { "epoch": 1.4070324381994417, "grad_norm": 0.405303329778648, "learning_rate": 8.549012286492848e-06, "loss": 0.5315, "step": 1937 }, { "epoch": 1.4077588359476085, "grad_norm": 0.6503597959594926, "learning_rate": 8.54733031643929e-06, "loss": 0.5369, "step": 1938 }, { "epoch": 1.4084852336957756, "grad_norm": 0.39065256110950697, "learning_rate": 8.545647537756236e-06, "loss": 0.5332, "step": 1939 }, { "epoch": 1.4092116314439425, "grad_norm": 0.4609381596916133, "learning_rate": 8.543963950827279e-06, "loss": 0.5442, "step": 1940 }, { "epoch": 1.4099380291921095, "grad_norm": 0.3649780548895913, "learning_rate": 8.5422795560362e-06, "loss": 0.548, "step": 1941 }, { "epoch": 1.4106644269402766, "grad_norm": 0.4168463039717824, "learning_rate": 8.540594353766966e-06, "loss": 0.5426, "step": 1942 }, { "epoch": 1.4113908246884435, "grad_norm": 0.3605395594921, "learning_rate": 8.538908344403724e-06, "loss": 0.522, "step": 1943 }, { "epoch": 1.4121172224366103, "grad_norm": 0.43891493266382947, "learning_rate": 8.53722152833081e-06, "loss": 0.5485, "step": 1944 }, { "epoch": 1.4128436201847774, "grad_norm": 0.3616022146521361, "learning_rate": 8.535533905932739e-06, "loss": 0.5428, "step": 1945 }, { "epoch": 1.4135700179329445, "grad_norm": 0.4830840461126677, "learning_rate": 8.533845477594212e-06, "loss": 0.5359, "step": 1946 }, { "epoch": 1.4142964156811113, "grad_norm": 0.44953993531184466, "learning_rate": 8.532156243700114e-06, "loss": 0.5396, "step": 1947 }, { "epoch": 1.4150228134292784, "grad_norm": 0.34692781192390154, "learning_rate": 8.530466204635514e-06, "loss": 0.5394, "step": 1948 }, { "epoch": 1.4157492111774452, "grad_norm": 0.383099285241623, "learning_rate": 8.528775360785665e-06, "loss": 0.5512, "step": 1949 }, { "epoch": 1.4164756089256123, "grad_norm": 0.38018322615703837, "learning_rate": 8.527083712535998e-06, "loss": 0.5334, "step": 1950 }, { "epoch": 1.4172020066737794, "grad_norm": 0.4071584173973299, "learning_rate": 8.525391260272134e-06, "loss": 0.5376, "step": 1951 }, { "epoch": 1.4179284044219462, "grad_norm": 0.33721951307742976, "learning_rate": 8.523698004379878e-06, "loss": 0.5279, "step": 1952 }, { "epoch": 1.4186548021701133, "grad_norm": 0.8918147939342991, "learning_rate": 8.52200394524521e-06, "loss": 0.5394, "step": 1953 }, { "epoch": 1.4193811999182802, "grad_norm": 0.38716601747441487, "learning_rate": 8.520309083254301e-06, "loss": 0.5348, "step": 1954 }, { "epoch": 1.4201075976664472, "grad_norm": 0.4193252319693734, "learning_rate": 8.518613418793502e-06, "loss": 0.5232, "step": 1955 }, { "epoch": 1.4208339954146143, "grad_norm": 0.39452248506913185, "learning_rate": 8.516916952249346e-06, "loss": 0.5293, "step": 1956 }, { "epoch": 1.4215603931627812, "grad_norm": 0.47356394532545604, "learning_rate": 8.51521968400855e-06, "loss": 0.5335, "step": 1957 }, { "epoch": 1.4222867909109482, "grad_norm": 0.4721127066478117, "learning_rate": 8.513521614458015e-06, "loss": 0.5395, "step": 1958 }, { "epoch": 1.423013188659115, "grad_norm": 0.37657901193003795, "learning_rate": 8.511822743984824e-06, "loss": 0.5396, "step": 1959 }, { "epoch": 1.4237395864072822, "grad_norm": 0.39741495285863576, "learning_rate": 8.51012307297624e-06, "loss": 0.5153, "step": 1960 }, { "epoch": 1.4244659841554492, "grad_norm": 0.41305756922410347, "learning_rate": 8.508422601819713e-06, "loss": 0.5241, "step": 1961 }, { "epoch": 1.425192381903616, "grad_norm": 0.37996646400149187, "learning_rate": 8.506721330902869e-06, "loss": 0.5295, "step": 1962 }, { "epoch": 1.4259187796517832, "grad_norm": 0.3852696944338915, "learning_rate": 8.505019260613523e-06, "loss": 0.5305, "step": 1963 }, { "epoch": 1.42664517739995, "grad_norm": 0.3958821782540526, "learning_rate": 8.503316391339668e-06, "loss": 0.5323, "step": 1964 }, { "epoch": 1.427371575148117, "grad_norm": 0.37831995894612974, "learning_rate": 8.501612723469483e-06, "loss": 0.5181, "step": 1965 }, { "epoch": 1.4280979728962842, "grad_norm": 0.39926135943658303, "learning_rate": 8.499908257391324e-06, "loss": 0.5417, "step": 1966 }, { "epoch": 1.428824370644451, "grad_norm": 0.358842675107982, "learning_rate": 8.49820299349373e-06, "loss": 0.5428, "step": 1967 }, { "epoch": 1.4295507683926179, "grad_norm": 0.40179609610989964, "learning_rate": 8.49649693216543e-06, "loss": 0.5369, "step": 1968 }, { "epoch": 1.430277166140785, "grad_norm": 0.3789589574251804, "learning_rate": 8.494790073795323e-06, "loss": 0.5423, "step": 1969 }, { "epoch": 1.431003563888952, "grad_norm": 0.3590287073648434, "learning_rate": 8.493082418772494e-06, "loss": 0.5353, "step": 1970 }, { "epoch": 1.4317299616371189, "grad_norm": 0.6134094833899469, "learning_rate": 8.491373967486212e-06, "loss": 0.5349, "step": 1971 }, { "epoch": 1.432456359385286, "grad_norm": 0.36762452749849744, "learning_rate": 8.489664720325928e-06, "loss": 0.5275, "step": 1972 }, { "epoch": 1.4331827571334528, "grad_norm": 0.3692895289988314, "learning_rate": 8.487954677681269e-06, "loss": 0.54, "step": 1973 }, { "epoch": 1.4339091548816199, "grad_norm": 0.3763720711023518, "learning_rate": 8.486243839942048e-06, "loss": 0.5268, "step": 1974 }, { "epoch": 1.434635552629787, "grad_norm": 0.3806714456242494, "learning_rate": 8.48453220749826e-06, "loss": 0.5287, "step": 1975 }, { "epoch": 1.4353619503779538, "grad_norm": 0.35038099734210776, "learning_rate": 8.482819780740076e-06, "loss": 0.5479, "step": 1976 }, { "epoch": 1.4360883481261209, "grad_norm": 0.4317281985340517, "learning_rate": 8.481106560057852e-06, "loss": 0.5417, "step": 1977 }, { "epoch": 1.4368147458742877, "grad_norm": 0.3701376689135592, "learning_rate": 8.479392545842126e-06, "loss": 0.5301, "step": 1978 }, { "epoch": 1.4375411436224548, "grad_norm": 0.3774269963014525, "learning_rate": 8.477677738483614e-06, "loss": 0.5379, "step": 1979 }, { "epoch": 1.4382675413706218, "grad_norm": 0.3857892291442041, "learning_rate": 8.475962138373212e-06, "loss": 0.5456, "step": 1980 }, { "epoch": 1.4389939391187887, "grad_norm": 0.3887872479758726, "learning_rate": 8.474245745902002e-06, "loss": 0.5261, "step": 1981 }, { "epoch": 1.4397203368669558, "grad_norm": 0.3862173859493997, "learning_rate": 8.47252856146124e-06, "loss": 0.5383, "step": 1982 }, { "epoch": 1.4404467346151226, "grad_norm": 0.4132140002809266, "learning_rate": 8.470810585442367e-06, "loss": 0.5508, "step": 1983 }, { "epoch": 1.4411731323632897, "grad_norm": 0.44883189440473237, "learning_rate": 8.469091818237004e-06, "loss": 0.5507, "step": 1984 }, { "epoch": 1.4418995301114568, "grad_norm": 0.9548703866806727, "learning_rate": 8.467372260236951e-06, "loss": 0.5275, "step": 1985 }, { "epoch": 1.4426259278596236, "grad_norm": 0.3560892250131109, "learning_rate": 8.465651911834187e-06, "loss": 0.5245, "step": 1986 }, { "epoch": 1.4433523256077907, "grad_norm": 0.3750927938822984, "learning_rate": 8.463930773420874e-06, "loss": 0.5278, "step": 1987 }, { "epoch": 1.4440787233559575, "grad_norm": 0.3861372560323569, "learning_rate": 8.462208845389356e-06, "loss": 0.5357, "step": 1988 }, { "epoch": 1.4448051211041246, "grad_norm": 0.4953428079781934, "learning_rate": 8.460486128132151e-06, "loss": 0.5322, "step": 1989 }, { "epoch": 1.4455315188522915, "grad_norm": 0.7225665784307765, "learning_rate": 8.458762622041959e-06, "loss": 0.5328, "step": 1990 }, { "epoch": 1.4462579166004585, "grad_norm": 0.3635088658724107, "learning_rate": 8.457038327511663e-06, "loss": 0.5302, "step": 1991 }, { "epoch": 1.4469843143486254, "grad_norm": 0.3951491124497268, "learning_rate": 8.455313244934324e-06, "loss": 0.5407, "step": 1992 }, { "epoch": 1.4477107120967925, "grad_norm": 0.6229750799649701, "learning_rate": 8.45358737470318e-06, "loss": 0.5444, "step": 1993 }, { "epoch": 1.4484371098449595, "grad_norm": 0.4284237318323039, "learning_rate": 8.451860717211653e-06, "loss": 0.5441, "step": 1994 }, { "epoch": 1.4491635075931264, "grad_norm": 0.5036938209369309, "learning_rate": 8.450133272853338e-06, "loss": 0.5438, "step": 1995 }, { "epoch": 1.4498899053412935, "grad_norm": 0.5598656231285419, "learning_rate": 8.448405042022018e-06, "loss": 0.5328, "step": 1996 }, { "epoch": 1.4506163030894603, "grad_norm": 0.4352745677117185, "learning_rate": 8.44667602511165e-06, "loss": 0.5353, "step": 1997 }, { "epoch": 1.4513427008376274, "grad_norm": 0.3727822813407849, "learning_rate": 8.444946222516369e-06, "loss": 0.5382, "step": 1998 }, { "epoch": 1.4520690985857945, "grad_norm": 0.3673506875698348, "learning_rate": 8.443215634630493e-06, "loss": 0.5312, "step": 1999 }, { "epoch": 1.4527954963339613, "grad_norm": 0.42066624613930265, "learning_rate": 8.441484261848514e-06, "loss": 0.5572, "step": 2000 }, { "epoch": 1.4535218940821284, "grad_norm": 0.4167581664824996, "learning_rate": 8.43975210456511e-06, "loss": 0.5439, "step": 2001 }, { "epoch": 1.4542482918302952, "grad_norm": 0.4384370655418264, "learning_rate": 8.438019163175132e-06, "loss": 0.5454, "step": 2002 }, { "epoch": 1.4549746895784623, "grad_norm": 0.41078736364713037, "learning_rate": 8.436285438073612e-06, "loss": 0.5278, "step": 2003 }, { "epoch": 1.4557010873266294, "grad_norm": 0.5068341665843389, "learning_rate": 8.43455092965576e-06, "loss": 0.532, "step": 2004 }, { "epoch": 1.4564274850747962, "grad_norm": 0.3361925311738195, "learning_rate": 8.432815638316964e-06, "loss": 0.5221, "step": 2005 }, { "epoch": 1.4571538828229633, "grad_norm": 0.3907476605116234, "learning_rate": 8.431079564452794e-06, "loss": 0.5259, "step": 2006 }, { "epoch": 1.4578802805711302, "grad_norm": 0.39243927084717206, "learning_rate": 8.429342708458991e-06, "loss": 0.5211, "step": 2007 }, { "epoch": 1.4586066783192972, "grad_norm": 0.4033731808083159, "learning_rate": 8.427605070731482e-06, "loss": 0.5342, "step": 2008 }, { "epoch": 1.4593330760674643, "grad_norm": 0.38186916648202734, "learning_rate": 8.42586665166637e-06, "loss": 0.5202, "step": 2009 }, { "epoch": 1.4600594738156312, "grad_norm": 0.3752026451214445, "learning_rate": 8.424127451659933e-06, "loss": 0.5299, "step": 2010 }, { "epoch": 1.460785871563798, "grad_norm": 0.40256241040537144, "learning_rate": 8.422387471108631e-06, "loss": 0.536, "step": 2011 }, { "epoch": 1.461512269311965, "grad_norm": 0.38801726970593625, "learning_rate": 8.420646710409099e-06, "loss": 0.5298, "step": 2012 }, { "epoch": 1.4622386670601322, "grad_norm": 0.3491406745354709, "learning_rate": 8.418905169958152e-06, "loss": 0.5358, "step": 2013 }, { "epoch": 1.462965064808299, "grad_norm": 0.3743679163458471, "learning_rate": 8.41716285015278e-06, "loss": 0.5434, "step": 2014 }, { "epoch": 1.463691462556466, "grad_norm": 0.37363032051278366, "learning_rate": 8.415419751390155e-06, "loss": 0.5306, "step": 2015 }, { "epoch": 1.464417860304633, "grad_norm": 0.3793987257982108, "learning_rate": 8.413675874067622e-06, "loss": 0.5419, "step": 2016 }, { "epoch": 1.4651442580528, "grad_norm": 0.4060396681017493, "learning_rate": 8.411931218582706e-06, "loss": 0.5306, "step": 2017 }, { "epoch": 1.465870655800967, "grad_norm": 0.7718495729878065, "learning_rate": 8.410185785333111e-06, "loss": 0.5528, "step": 2018 }, { "epoch": 1.466597053549134, "grad_norm": 0.3809931135117274, "learning_rate": 8.408439574716712e-06, "loss": 0.5151, "step": 2019 }, { "epoch": 1.467323451297301, "grad_norm": 0.3464947862227488, "learning_rate": 8.406692587131569e-06, "loss": 0.5268, "step": 2020 }, { "epoch": 1.4680498490454679, "grad_norm": 0.37560537023157703, "learning_rate": 8.404944822975914e-06, "loss": 0.5582, "step": 2021 }, { "epoch": 1.468776246793635, "grad_norm": 0.3527908030684601, "learning_rate": 8.403196282648156e-06, "loss": 0.5464, "step": 2022 }, { "epoch": 1.469502644541802, "grad_norm": 0.3744135464359761, "learning_rate": 8.401446966546885e-06, "loss": 0.5411, "step": 2023 }, { "epoch": 1.4702290422899689, "grad_norm": 0.3898440069807425, "learning_rate": 8.399696875070864e-06, "loss": 0.5505, "step": 2024 }, { "epoch": 1.470955440038136, "grad_norm": 0.44165838423074744, "learning_rate": 8.397946008619035e-06, "loss": 0.5151, "step": 2025 }, { "epoch": 1.4716818377863028, "grad_norm": 0.4152275308276423, "learning_rate": 8.396194367590515e-06, "loss": 0.5274, "step": 2026 }, { "epoch": 1.4724082355344699, "grad_norm": 0.3324160779039609, "learning_rate": 8.394441952384597e-06, "loss": 0.5406, "step": 2027 }, { "epoch": 1.473134633282637, "grad_norm": 0.41106911809184316, "learning_rate": 8.392688763400755e-06, "loss": 0.532, "step": 2028 }, { "epoch": 1.4738610310308038, "grad_norm": 0.3924212014605282, "learning_rate": 8.390934801038632e-06, "loss": 0.5233, "step": 2029 }, { "epoch": 1.4745874287789709, "grad_norm": 0.414921189006647, "learning_rate": 8.389180065698055e-06, "loss": 0.5319, "step": 2030 }, { "epoch": 1.4753138265271377, "grad_norm": 0.8311008021451181, "learning_rate": 8.387424557779022e-06, "loss": 0.551, "step": 2031 }, { "epoch": 1.4760402242753048, "grad_norm": 0.361198159956959, "learning_rate": 8.385668277681709e-06, "loss": 0.5131, "step": 2032 }, { "epoch": 1.4767666220234719, "grad_norm": 0.38745901241674185, "learning_rate": 8.383911225806468e-06, "loss": 0.5474, "step": 2033 }, { "epoch": 1.4774930197716387, "grad_norm": 0.37053434565256693, "learning_rate": 8.382153402553825e-06, "loss": 0.5409, "step": 2034 }, { "epoch": 1.4782194175198056, "grad_norm": 0.3666466711492958, "learning_rate": 8.380394808324484e-06, "loss": 0.5218, "step": 2035 }, { "epoch": 1.4789458152679726, "grad_norm": 0.43161781550679396, "learning_rate": 8.378635443519327e-06, "loss": 0.5225, "step": 2036 }, { "epoch": 1.4796722130161397, "grad_norm": 0.47557759375567127, "learning_rate": 8.376875308539406e-06, "loss": 0.5555, "step": 2037 }, { "epoch": 1.4803986107643066, "grad_norm": 0.4422804846214722, "learning_rate": 8.375114403785953e-06, "loss": 0.5229, "step": 2038 }, { "epoch": 1.4811250085124736, "grad_norm": 0.35316778327064985, "learning_rate": 8.373352729660373e-06, "loss": 0.532, "step": 2039 }, { "epoch": 1.4818514062606405, "grad_norm": 0.43635553639514263, "learning_rate": 8.371590286564247e-06, "loss": 0.5267, "step": 2040 }, { "epoch": 1.4825778040088076, "grad_norm": 0.3647953718925828, "learning_rate": 8.369827074899333e-06, "loss": 0.5172, "step": 2041 }, { "epoch": 1.4833042017569746, "grad_norm": 0.43758131127371425, "learning_rate": 8.368063095067564e-06, "loss": 0.5299, "step": 2042 }, { "epoch": 1.4840305995051415, "grad_norm": 0.3481965420729561, "learning_rate": 8.366298347471043e-06, "loss": 0.5529, "step": 2043 }, { "epoch": 1.4847569972533086, "grad_norm": 0.3809271562611856, "learning_rate": 8.364532832512055e-06, "loss": 0.5298, "step": 2044 }, { "epoch": 1.4854833950014754, "grad_norm": 0.39900438510168235, "learning_rate": 8.362766550593055e-06, "loss": 0.553, "step": 2045 }, { "epoch": 1.4862097927496425, "grad_norm": 0.4024051219002026, "learning_rate": 8.360999502116678e-06, "loss": 0.5251, "step": 2046 }, { "epoch": 1.4869361904978096, "grad_norm": 0.5664636582845303, "learning_rate": 8.359231687485724e-06, "loss": 0.5268, "step": 2047 }, { "epoch": 1.4876625882459764, "grad_norm": 0.321380891619596, "learning_rate": 8.357463107103182e-06, "loss": 0.5262, "step": 2048 }, { "epoch": 1.4883889859941435, "grad_norm": 0.3781206568900052, "learning_rate": 8.355693761372203e-06, "loss": 0.5265, "step": 2049 }, { "epoch": 1.4891153837423103, "grad_norm": 0.4901907426957913, "learning_rate": 8.353923650696119e-06, "loss": 0.5352, "step": 2050 }, { "epoch": 1.4898417814904774, "grad_norm": 0.4383754725203851, "learning_rate": 8.352152775478431e-06, "loss": 0.5351, "step": 2051 }, { "epoch": 1.4905681792386445, "grad_norm": 0.3793071731555198, "learning_rate": 8.350381136122823e-06, "loss": 0.533, "step": 2052 }, { "epoch": 1.4912945769868113, "grad_norm": 0.3936571281921193, "learning_rate": 8.348608733033143e-06, "loss": 0.5423, "step": 2053 }, { "epoch": 1.4920209747349782, "grad_norm": 0.5586778199848161, "learning_rate": 8.34683556661342e-06, "loss": 0.542, "step": 2054 }, { "epoch": 1.4927473724831453, "grad_norm": 0.4083828436352312, "learning_rate": 8.345061637267858e-06, "loss": 0.5437, "step": 2055 }, { "epoch": 1.4934737702313123, "grad_norm": 0.39828639694777723, "learning_rate": 8.343286945400827e-06, "loss": 0.5404, "step": 2056 }, { "epoch": 1.4942001679794792, "grad_norm": 0.4341716027611792, "learning_rate": 8.341511491416877e-06, "loss": 0.5419, "step": 2057 }, { "epoch": 1.4949265657276463, "grad_norm": 0.3771833000560631, "learning_rate": 8.339735275720735e-06, "loss": 0.5272, "step": 2058 }, { "epoch": 1.4956529634758131, "grad_norm": 0.4369176038701713, "learning_rate": 8.337958298717293e-06, "loss": 0.5344, "step": 2059 }, { "epoch": 1.4963793612239802, "grad_norm": 0.3844043904597473, "learning_rate": 8.336180560811619e-06, "loss": 0.5323, "step": 2060 }, { "epoch": 1.4971057589721473, "grad_norm": 0.3560037681982256, "learning_rate": 8.334402062408962e-06, "loss": 0.53, "step": 2061 }, { "epoch": 1.4978321567203141, "grad_norm": 0.3683510079298262, "learning_rate": 8.332622803914734e-06, "loss": 0.5232, "step": 2062 }, { "epoch": 1.4985585544684812, "grad_norm": 0.37159086861045465, "learning_rate": 8.330842785734525e-06, "loss": 0.5415, "step": 2063 }, { "epoch": 1.499284952216648, "grad_norm": 0.6276047408660594, "learning_rate": 8.3290620082741e-06, "loss": 0.5279, "step": 2064 }, { "epoch": 1.500011349964815, "grad_norm": 0.372171797078306, "learning_rate": 8.327280471939392e-06, "loss": 0.534, "step": 2065 }, { "epoch": 1.5007377477129822, "grad_norm": 0.36192295787407436, "learning_rate": 8.325498177136514e-06, "loss": 0.5276, "step": 2066 }, { "epoch": 1.501464145461149, "grad_norm": 0.4151167746324928, "learning_rate": 8.323715124271745e-06, "loss": 0.5465, "step": 2067 }, { "epoch": 1.5021905432093159, "grad_norm": 0.5345148655537466, "learning_rate": 8.32193131375154e-06, "loss": 0.5248, "step": 2068 }, { "epoch": 1.502916940957483, "grad_norm": 0.4010625713543608, "learning_rate": 8.320146745982528e-06, "loss": 0.5394, "step": 2069 }, { "epoch": 1.50364333870565, "grad_norm": 0.4242426717100806, "learning_rate": 8.318361421371507e-06, "loss": 0.5432, "step": 2070 }, { "epoch": 1.504369736453817, "grad_norm": 0.3769937648385446, "learning_rate": 8.31657534032545e-06, "loss": 0.5404, "step": 2071 }, { "epoch": 1.505096134201984, "grad_norm": 0.4002762206548443, "learning_rate": 8.314788503251506e-06, "loss": 0.5268, "step": 2072 }, { "epoch": 1.5058225319501508, "grad_norm": 0.49513683689221805, "learning_rate": 8.313000910556986e-06, "loss": 0.5325, "step": 2073 }, { "epoch": 1.5065489296983179, "grad_norm": 0.37293996421107484, "learning_rate": 8.311212562649383e-06, "loss": 0.5347, "step": 2074 }, { "epoch": 1.507275327446485, "grad_norm": 0.3735325900469624, "learning_rate": 8.30942345993636e-06, "loss": 0.541, "step": 2075 }, { "epoch": 1.508001725194652, "grad_norm": 0.513815208479049, "learning_rate": 8.307633602825746e-06, "loss": 0.5335, "step": 2076 }, { "epoch": 1.5087281229428189, "grad_norm": 0.42651791780626036, "learning_rate": 8.305842991725552e-06, "loss": 0.5359, "step": 2077 }, { "epoch": 1.5094545206909857, "grad_norm": 0.7149161389443535, "learning_rate": 8.304051627043952e-06, "loss": 0.5337, "step": 2078 }, { "epoch": 1.5101809184391528, "grad_norm": 0.5309869942657326, "learning_rate": 8.302259509189299e-06, "loss": 0.5139, "step": 2079 }, { "epoch": 1.5109073161873199, "grad_norm": 0.44461112251803003, "learning_rate": 8.30046663857011e-06, "loss": 0.5289, "step": 2080 }, { "epoch": 1.511633713935487, "grad_norm": 0.3676877067349289, "learning_rate": 8.298673015595083e-06, "loss": 0.5325, "step": 2081 }, { "epoch": 1.5123601116836538, "grad_norm": 0.5061828033162785, "learning_rate": 8.296878640673077e-06, "loss": 0.5153, "step": 2082 }, { "epoch": 1.5130865094318207, "grad_norm": 0.414329324752058, "learning_rate": 8.29508351421313e-06, "loss": 0.5184, "step": 2083 }, { "epoch": 1.5138129071799877, "grad_norm": 0.44121955910229965, "learning_rate": 8.293287636624447e-06, "loss": 0.5379, "step": 2084 }, { "epoch": 1.5145393049281548, "grad_norm": 0.4154035008830126, "learning_rate": 8.291491008316409e-06, "loss": 0.5287, "step": 2085 }, { "epoch": 1.5152657026763217, "grad_norm": 0.3225024595739773, "learning_rate": 8.289693629698564e-06, "loss": 0.5381, "step": 2086 }, { "epoch": 1.5159921004244887, "grad_norm": 0.5334881633732634, "learning_rate": 8.287895501180632e-06, "loss": 0.5442, "step": 2087 }, { "epoch": 1.5167184981726556, "grad_norm": 0.44290513871966686, "learning_rate": 8.286096623172506e-06, "loss": 0.5247, "step": 2088 }, { "epoch": 1.5174448959208227, "grad_norm": 0.4355594276107615, "learning_rate": 8.284296996084244e-06, "loss": 0.5331, "step": 2089 }, { "epoch": 1.5181712936689897, "grad_norm": 0.47966811639145174, "learning_rate": 8.282496620326085e-06, "loss": 0.5411, "step": 2090 }, { "epoch": 1.5188976914171566, "grad_norm": 0.43302335492933264, "learning_rate": 8.280695496308428e-06, "loss": 0.5407, "step": 2091 }, { "epoch": 1.5196240891653234, "grad_norm": 0.3857374366033102, "learning_rate": 8.278893624441849e-06, "loss": 0.5361, "step": 2092 }, { "epoch": 1.5203504869134905, "grad_norm": 0.375465296561444, "learning_rate": 8.27709100513709e-06, "loss": 0.5375, "step": 2093 }, { "epoch": 1.5210768846616576, "grad_norm": 0.3682581338047892, "learning_rate": 8.275287638805069e-06, "loss": 0.5327, "step": 2094 }, { "epoch": 1.5218032824098247, "grad_norm": 0.3535435043848988, "learning_rate": 8.273483525856871e-06, "loss": 0.5423, "step": 2095 }, { "epoch": 1.5225296801579915, "grad_norm": 0.36136545072253023, "learning_rate": 8.27167866670375e-06, "loss": 0.5315, "step": 2096 }, { "epoch": 1.5232560779061584, "grad_norm": 0.4133601831129233, "learning_rate": 8.269873061757133e-06, "loss": 0.5307, "step": 2097 }, { "epoch": 1.5239824756543254, "grad_norm": 0.5370894632116522, "learning_rate": 8.268066711428614e-06, "loss": 0.543, "step": 2098 }, { "epoch": 1.5247088734024925, "grad_norm": 0.3613559837813718, "learning_rate": 8.266259616129959e-06, "loss": 0.5283, "step": 2099 }, { "epoch": 1.5254352711506596, "grad_norm": 0.35068415153350707, "learning_rate": 8.264451776273104e-06, "loss": 0.5246, "step": 2100 }, { "epoch": 1.5261616688988264, "grad_norm": 0.4693794087880394, "learning_rate": 8.262643192270152e-06, "loss": 0.5271, "step": 2101 }, { "epoch": 1.5268880666469933, "grad_norm": 0.36685476737449635, "learning_rate": 8.26083386453338e-06, "loss": 0.5402, "step": 2102 }, { "epoch": 1.5276144643951604, "grad_norm": 0.4240709701742305, "learning_rate": 8.259023793475233e-06, "loss": 0.531, "step": 2103 }, { "epoch": 1.5283408621433274, "grad_norm": 0.3992723948801737, "learning_rate": 8.257212979508321e-06, "loss": 0.5403, "step": 2104 }, { "epoch": 1.5290672598914945, "grad_norm": 0.39217915177008994, "learning_rate": 8.25540142304543e-06, "loss": 0.5176, "step": 2105 }, { "epoch": 1.5297936576396614, "grad_norm": 0.3691939997812454, "learning_rate": 8.253589124499513e-06, "loss": 0.5234, "step": 2106 }, { "epoch": 1.5305200553878282, "grad_norm": 0.34426922828140977, "learning_rate": 8.251776084283687e-06, "loss": 0.5255, "step": 2107 }, { "epoch": 1.5312464531359953, "grad_norm": 0.4228685867855801, "learning_rate": 8.249962302811246e-06, "loss": 0.5355, "step": 2108 }, { "epoch": 1.5319728508841624, "grad_norm": 0.6702335615461068, "learning_rate": 8.248147780495648e-06, "loss": 0.53, "step": 2109 }, { "epoch": 1.5326992486323292, "grad_norm": 0.3620865144375418, "learning_rate": 8.246332517750524e-06, "loss": 0.5256, "step": 2110 }, { "epoch": 1.533425646380496, "grad_norm": 0.3508763292761052, "learning_rate": 8.244516514989667e-06, "loss": 0.5156, "step": 2111 }, { "epoch": 1.5341520441286631, "grad_norm": 0.42627555830890596, "learning_rate": 8.242699772627044e-06, "loss": 0.5355, "step": 2112 }, { "epoch": 1.5348784418768302, "grad_norm": 0.3684308599352157, "learning_rate": 8.240882291076794e-06, "loss": 0.5182, "step": 2113 }, { "epoch": 1.5356048396249973, "grad_norm": 0.43590036510920216, "learning_rate": 8.239064070753213e-06, "loss": 0.5165, "step": 2114 }, { "epoch": 1.5363312373731641, "grad_norm": 0.3774981089091263, "learning_rate": 8.237245112070775e-06, "loss": 0.5433, "step": 2115 }, { "epoch": 1.537057635121331, "grad_norm": 0.5796489601067345, "learning_rate": 8.235425415444123e-06, "loss": 0.5255, "step": 2116 }, { "epoch": 1.537784032869498, "grad_norm": 0.4040368368012006, "learning_rate": 8.233604981288059e-06, "loss": 0.5207, "step": 2117 }, { "epoch": 1.5385104306176651, "grad_norm": 0.4050182472662472, "learning_rate": 8.231783810017562e-06, "loss": 0.5281, "step": 2118 }, { "epoch": 1.5392368283658322, "grad_norm": 0.43834797443504225, "learning_rate": 8.229961902047777e-06, "loss": 0.5269, "step": 2119 }, { "epoch": 1.539963226113999, "grad_norm": 0.3867432482382013, "learning_rate": 8.228139257794012e-06, "loss": 0.5305, "step": 2120 }, { "epoch": 1.540689623862166, "grad_norm": 0.3334578775651982, "learning_rate": 8.22631587767175e-06, "loss": 0.53, "step": 2121 }, { "epoch": 1.541416021610333, "grad_norm": 0.3670398743223365, "learning_rate": 8.22449176209664e-06, "loss": 0.5321, "step": 2122 }, { "epoch": 1.5421424193585, "grad_norm": 0.4172991422978223, "learning_rate": 8.222666911484494e-06, "loss": 0.5422, "step": 2123 }, { "epoch": 1.5428688171066671, "grad_norm": 0.394926083100875, "learning_rate": 8.220841326251297e-06, "loss": 0.5323, "step": 2124 }, { "epoch": 1.543595214854834, "grad_norm": 0.3701822483507409, "learning_rate": 8.219015006813197e-06, "loss": 0.5333, "step": 2125 }, { "epoch": 1.5443216126030008, "grad_norm": 0.3830765074130067, "learning_rate": 8.217187953586512e-06, "loss": 0.533, "step": 2126 }, { "epoch": 1.545048010351168, "grad_norm": 0.3495916719722912, "learning_rate": 8.215360166987728e-06, "loss": 0.529, "step": 2127 }, { "epoch": 1.545774408099335, "grad_norm": 0.38420382108991424, "learning_rate": 8.213531647433494e-06, "loss": 0.5231, "step": 2128 }, { "epoch": 1.5465008058475018, "grad_norm": 0.3664634654071994, "learning_rate": 8.211702395340633e-06, "loss": 0.5232, "step": 2129 }, { "epoch": 1.547227203595669, "grad_norm": 0.39373304275204196, "learning_rate": 8.209872411126127e-06, "loss": 0.5404, "step": 2130 }, { "epoch": 1.5479536013438358, "grad_norm": 0.5226087869025603, "learning_rate": 8.208041695207134e-06, "loss": 0.5271, "step": 2131 }, { "epoch": 1.5486799990920028, "grad_norm": 0.37897080800573535, "learning_rate": 8.20621024800097e-06, "loss": 0.5257, "step": 2132 }, { "epoch": 1.54940639684017, "grad_norm": 0.3607192702673823, "learning_rate": 8.204378069925121e-06, "loss": 0.5278, "step": 2133 }, { "epoch": 1.5501327945883367, "grad_norm": 0.4491567190506299, "learning_rate": 8.202545161397242e-06, "loss": 0.5212, "step": 2134 }, { "epoch": 1.5508591923365036, "grad_norm": 0.4648783169043336, "learning_rate": 8.200711522835153e-06, "loss": 0.5255, "step": 2135 }, { "epoch": 1.5515855900846707, "grad_norm": 0.40825688941196847, "learning_rate": 8.198877154656838e-06, "loss": 0.5187, "step": 2136 }, { "epoch": 1.5523119878328377, "grad_norm": 0.5055858786813612, "learning_rate": 8.19704205728045e-06, "loss": 0.5214, "step": 2137 }, { "epoch": 1.5530383855810048, "grad_norm": 0.452686470398235, "learning_rate": 8.195206231124309e-06, "loss": 0.5272, "step": 2138 }, { "epoch": 1.5537647833291717, "grad_norm": 0.37942785376928945, "learning_rate": 8.193369676606896e-06, "loss": 0.5348, "step": 2139 }, { "epoch": 1.5544911810773385, "grad_norm": 0.4065149027015626, "learning_rate": 8.191532394146865e-06, "loss": 0.5292, "step": 2140 }, { "epoch": 1.5552175788255056, "grad_norm": 0.5083598648566944, "learning_rate": 8.189694384163032e-06, "loss": 0.5176, "step": 2141 }, { "epoch": 1.5559439765736727, "grad_norm": 0.3722742765668192, "learning_rate": 8.187855647074376e-06, "loss": 0.5262, "step": 2142 }, { "epoch": 1.5566703743218397, "grad_norm": 0.40226452791696343, "learning_rate": 8.186016183300052e-06, "loss": 0.5231, "step": 2143 }, { "epoch": 1.5573967720700066, "grad_norm": 0.43419760435625404, "learning_rate": 8.184175993259367e-06, "loss": 0.5258, "step": 2144 }, { "epoch": 1.5581231698181734, "grad_norm": 0.4013434470364981, "learning_rate": 8.182335077371803e-06, "loss": 0.5259, "step": 2145 }, { "epoch": 1.5588495675663405, "grad_norm": 0.5035945890529423, "learning_rate": 8.180493436057008e-06, "loss": 0.5227, "step": 2146 }, { "epoch": 1.5595759653145076, "grad_norm": 0.37150966714124883, "learning_rate": 8.178651069734787e-06, "loss": 0.5336, "step": 2147 }, { "epoch": 1.5603023630626747, "grad_norm": 0.4429586606794134, "learning_rate": 8.17680797882512e-06, "loss": 0.5453, "step": 2148 }, { "epoch": 1.5610287608108415, "grad_norm": 0.37270998378690173, "learning_rate": 8.174964163748142e-06, "loss": 0.5269, "step": 2149 }, { "epoch": 1.5617551585590084, "grad_norm": 0.4122813391997847, "learning_rate": 8.173119624924164e-06, "loss": 0.5204, "step": 2150 }, { "epoch": 1.5624815563071754, "grad_norm": 0.37256623087813406, "learning_rate": 8.171274362773657e-06, "loss": 0.5284, "step": 2151 }, { "epoch": 1.5632079540553425, "grad_norm": 0.366519633839237, "learning_rate": 8.169428377717253e-06, "loss": 0.5292, "step": 2152 }, { "epoch": 1.5639343518035094, "grad_norm": 0.39571030872429297, "learning_rate": 8.167581670175752e-06, "loss": 0.5244, "step": 2153 }, { "epoch": 1.5646607495516764, "grad_norm": 0.49398026531249895, "learning_rate": 8.165734240570124e-06, "loss": 0.5162, "step": 2154 }, { "epoch": 1.5653871472998433, "grad_norm": 0.3702564028006646, "learning_rate": 8.163886089321493e-06, "loss": 0.5471, "step": 2155 }, { "epoch": 1.5661135450480104, "grad_norm": 0.3958082047337379, "learning_rate": 8.162037216851158e-06, "loss": 0.5361, "step": 2156 }, { "epoch": 1.5668399427961774, "grad_norm": 0.38027394888118066, "learning_rate": 8.160187623580575e-06, "loss": 0.5101, "step": 2157 }, { "epoch": 1.5675663405443443, "grad_norm": 0.6188493033488481, "learning_rate": 8.158337309931365e-06, "loss": 0.5206, "step": 2158 }, { "epoch": 1.5682927382925111, "grad_norm": 0.485014339706956, "learning_rate": 8.15648627632532e-06, "loss": 0.522, "step": 2159 }, { "epoch": 1.5690191360406782, "grad_norm": 0.3372921872528581, "learning_rate": 8.154634523184389e-06, "loss": 0.5204, "step": 2160 }, { "epoch": 1.5697455337888453, "grad_norm": 0.3775246242830134, "learning_rate": 8.152782050930685e-06, "loss": 0.5139, "step": 2161 }, { "epoch": 1.5704719315370124, "grad_norm": 0.3441463438997507, "learning_rate": 8.150928859986488e-06, "loss": 0.5341, "step": 2162 }, { "epoch": 1.5711983292851792, "grad_norm": 0.3958596216052891, "learning_rate": 8.149074950774244e-06, "loss": 0.5325, "step": 2163 }, { "epoch": 1.571924727033346, "grad_norm": 0.3555124159077814, "learning_rate": 8.14722032371656e-06, "loss": 0.531, "step": 2164 }, { "epoch": 1.5726511247815131, "grad_norm": 0.4197910562498298, "learning_rate": 8.145364979236201e-06, "loss": 0.5183, "step": 2165 }, { "epoch": 1.5733775225296802, "grad_norm": 0.38377856071557065, "learning_rate": 8.143508917756105e-06, "loss": 0.539, "step": 2166 }, { "epoch": 1.5741039202778473, "grad_norm": 0.3498504206619673, "learning_rate": 8.14165213969937e-06, "loss": 0.5318, "step": 2167 }, { "epoch": 1.5748303180260141, "grad_norm": 0.35939784890857573, "learning_rate": 8.139794645489252e-06, "loss": 0.5244, "step": 2168 }, { "epoch": 1.575556715774181, "grad_norm": 0.5191106846293712, "learning_rate": 8.13793643554918e-06, "loss": 0.5236, "step": 2169 }, { "epoch": 1.576283113522348, "grad_norm": 0.3754443761805671, "learning_rate": 8.13607751030274e-06, "loss": 0.5218, "step": 2170 }, { "epoch": 1.5770095112705151, "grad_norm": 0.3765275515814297, "learning_rate": 8.13421787017368e-06, "loss": 0.525, "step": 2171 }, { "epoch": 1.577735909018682, "grad_norm": 0.3619776638617869, "learning_rate": 8.132357515585913e-06, "loss": 0.5387, "step": 2172 }, { "epoch": 1.578462306766849, "grad_norm": 0.43674111006208605, "learning_rate": 8.130496446963518e-06, "loss": 0.528, "step": 2173 }, { "epoch": 1.579188704515016, "grad_norm": 0.45352096537017633, "learning_rate": 8.128634664730734e-06, "loss": 0.5223, "step": 2174 }, { "epoch": 1.579915102263183, "grad_norm": 0.3638288881531629, "learning_rate": 8.126772169311959e-06, "loss": 0.5272, "step": 2175 }, { "epoch": 1.58064150001135, "grad_norm": 0.3678937439443033, "learning_rate": 8.124908961131759e-06, "loss": 0.5327, "step": 2176 }, { "epoch": 1.581367897759517, "grad_norm": 0.42819149568379095, "learning_rate": 8.123045040614859e-06, "loss": 0.5273, "step": 2177 }, { "epoch": 1.5820942955076838, "grad_norm": 0.451401867343068, "learning_rate": 8.121180408186151e-06, "loss": 0.52, "step": 2178 }, { "epoch": 1.5828206932558508, "grad_norm": 0.35404518347187564, "learning_rate": 8.119315064270683e-06, "loss": 0.5418, "step": 2179 }, { "epoch": 1.583547091004018, "grad_norm": 0.3874862248814074, "learning_rate": 8.117449009293668e-06, "loss": 0.515, "step": 2180 }, { "epoch": 1.584273488752185, "grad_norm": 0.35996595384110947, "learning_rate": 8.115582243680484e-06, "loss": 0.5231, "step": 2181 }, { "epoch": 1.5849998865003518, "grad_norm": 0.49032221582780916, "learning_rate": 8.113714767856668e-06, "loss": 0.533, "step": 2182 }, { "epoch": 1.5857262842485187, "grad_norm": 0.3789203895555697, "learning_rate": 8.111846582247917e-06, "loss": 0.5204, "step": 2183 }, { "epoch": 1.5864526819966858, "grad_norm": 0.44070282653119874, "learning_rate": 8.109977687280095e-06, "loss": 0.5182, "step": 2184 }, { "epoch": 1.5871790797448528, "grad_norm": 0.3734368116904144, "learning_rate": 8.108108083379224e-06, "loss": 0.5239, "step": 2185 }, { "epoch": 1.58790547749302, "grad_norm": 0.7794357692303779, "learning_rate": 8.106237770971486e-06, "loss": 0.5348, "step": 2186 }, { "epoch": 1.5886318752411868, "grad_norm": 0.40048407359634847, "learning_rate": 8.10436675048323e-06, "loss": 0.5329, "step": 2187 }, { "epoch": 1.5893582729893536, "grad_norm": 0.41996079281176507, "learning_rate": 8.102495022340962e-06, "loss": 0.5348, "step": 2188 }, { "epoch": 1.5900846707375207, "grad_norm": 0.4141606229227644, "learning_rate": 8.100622586971349e-06, "loss": 0.5231, "step": 2189 }, { "epoch": 1.5908110684856878, "grad_norm": 0.39613241249403197, "learning_rate": 8.098749444801226e-06, "loss": 0.5154, "step": 2190 }, { "epoch": 1.5915374662338548, "grad_norm": 0.40699509976783327, "learning_rate": 8.096875596257578e-06, "loss": 0.5361, "step": 2191 }, { "epoch": 1.5922638639820217, "grad_norm": 0.39720514745810015, "learning_rate": 8.095001041767561e-06, "loss": 0.5384, "step": 2192 }, { "epoch": 1.5929902617301885, "grad_norm": 0.379944597113303, "learning_rate": 8.093125781758485e-06, "loss": 0.5191, "step": 2193 }, { "epoch": 1.5937166594783556, "grad_norm": 0.40124115305103786, "learning_rate": 8.091249816657826e-06, "loss": 0.5319, "step": 2194 }, { "epoch": 1.5944430572265227, "grad_norm": 0.36199001054735935, "learning_rate": 8.089373146893216e-06, "loss": 0.53, "step": 2195 }, { "epoch": 1.5951694549746895, "grad_norm": 0.3839271041750099, "learning_rate": 8.087495772892455e-06, "loss": 0.5209, "step": 2196 }, { "epoch": 1.5958958527228566, "grad_norm": 0.3633683466203577, "learning_rate": 8.085617695083493e-06, "loss": 0.5127, "step": 2197 }, { "epoch": 1.5966222504710235, "grad_norm": 0.4480574212500137, "learning_rate": 8.08373891389445e-06, "loss": 0.5171, "step": 2198 }, { "epoch": 1.5973486482191905, "grad_norm": 0.4441680664452495, "learning_rate": 8.0818594297536e-06, "loss": 0.5235, "step": 2199 }, { "epoch": 1.5980750459673576, "grad_norm": 0.3848708364131823, "learning_rate": 8.07997924308938e-06, "loss": 0.5272, "step": 2200 }, { "epoch": 1.5988014437155245, "grad_norm": 0.46643645595697314, "learning_rate": 8.078098354330386e-06, "loss": 0.5416, "step": 2201 }, { "epoch": 1.5995278414636913, "grad_norm": 0.42482180204664727, "learning_rate": 8.076216763905379e-06, "loss": 0.5199, "step": 2202 }, { "epoch": 1.6002542392118584, "grad_norm": 0.38351767411017756, "learning_rate": 8.074334472243273e-06, "loss": 0.5385, "step": 2203 }, { "epoch": 1.6009806369600255, "grad_norm": 0.44335436503062614, "learning_rate": 8.072451479773143e-06, "loss": 0.5221, "step": 2204 }, { "epoch": 1.6017070347081925, "grad_norm": 0.33046482661721827, "learning_rate": 8.070567786924228e-06, "loss": 0.5243, "step": 2205 }, { "epoch": 1.6024334324563594, "grad_norm": 0.36748965271151585, "learning_rate": 8.068683394125923e-06, "loss": 0.531, "step": 2206 }, { "epoch": 1.6031598302045262, "grad_norm": 0.3833859585923725, "learning_rate": 8.066798301807782e-06, "loss": 0.5254, "step": 2207 }, { "epoch": 1.6038862279526933, "grad_norm": 0.3393603194185105, "learning_rate": 8.064912510399524e-06, "loss": 0.522, "step": 2208 }, { "epoch": 1.6046126257008604, "grad_norm": 0.41474782851520825, "learning_rate": 8.06302602033102e-06, "loss": 0.5344, "step": 2209 }, { "epoch": 1.6053390234490275, "grad_norm": 0.4576990738112103, "learning_rate": 8.061138832032304e-06, "loss": 0.5024, "step": 2210 }, { "epoch": 1.6060654211971943, "grad_norm": 0.349589634316685, "learning_rate": 8.05925094593357e-06, "loss": 0.5231, "step": 2211 }, { "epoch": 1.6067918189453612, "grad_norm": 0.3592881325763187, "learning_rate": 8.05736236246517e-06, "loss": 0.5243, "step": 2212 }, { "epoch": 1.6075182166935282, "grad_norm": 0.342386535794566, "learning_rate": 8.055473082057615e-06, "loss": 0.5243, "step": 2213 }, { "epoch": 1.6082446144416953, "grad_norm": 0.34698494413806036, "learning_rate": 8.053583105141573e-06, "loss": 0.5267, "step": 2214 }, { "epoch": 1.6089710121898622, "grad_norm": 0.3444169193095043, "learning_rate": 8.051692432147876e-06, "loss": 0.5124, "step": 2215 }, { "epoch": 1.6096974099380292, "grad_norm": 0.3523998685772173, "learning_rate": 8.049801063507505e-06, "loss": 0.5279, "step": 2216 }, { "epoch": 1.610423807686196, "grad_norm": 0.3600581649454914, "learning_rate": 8.047908999651613e-06, "loss": 0.5243, "step": 2217 }, { "epoch": 1.6111502054343632, "grad_norm": 0.3770696606265363, "learning_rate": 8.0460162410115e-06, "loss": 0.5323, "step": 2218 }, { "epoch": 1.6118766031825302, "grad_norm": 0.3627755782168038, "learning_rate": 8.04412278801863e-06, "loss": 0.5322, "step": 2219 }, { "epoch": 1.612603000930697, "grad_norm": 0.39535887557139776, "learning_rate": 8.042228641104622e-06, "loss": 0.5211, "step": 2220 }, { "epoch": 1.613329398678864, "grad_norm": 0.5402785851323881, "learning_rate": 8.040333800701258e-06, "loss": 0.533, "step": 2221 }, { "epoch": 1.614055796427031, "grad_norm": 0.3958450342645416, "learning_rate": 8.038438267240473e-06, "loss": 0.5338, "step": 2222 }, { "epoch": 1.614782194175198, "grad_norm": 0.3819125676640373, "learning_rate": 8.036542041154363e-06, "loss": 0.5186, "step": 2223 }, { "epoch": 1.6155085919233652, "grad_norm": 0.5474682629830405, "learning_rate": 8.034645122875183e-06, "loss": 0.5155, "step": 2224 }, { "epoch": 1.616234989671532, "grad_norm": 0.36761066631477124, "learning_rate": 8.032747512835338e-06, "loss": 0.5128, "step": 2225 }, { "epoch": 1.6169613874196989, "grad_norm": 0.3553974845209146, "learning_rate": 8.030849211467401e-06, "loss": 0.5339, "step": 2226 }, { "epoch": 1.617687785167866, "grad_norm": 0.4054275992193891, "learning_rate": 8.0289502192041e-06, "loss": 0.5192, "step": 2227 }, { "epoch": 1.618414182916033, "grad_norm": 0.5004251882280716, "learning_rate": 8.027050536478315e-06, "loss": 0.5182, "step": 2228 }, { "epoch": 1.6191405806642, "grad_norm": 0.505586412419713, "learning_rate": 8.025150163723087e-06, "loss": 0.5328, "step": 2229 }, { "epoch": 1.619866978412367, "grad_norm": 0.33396295149729055, "learning_rate": 8.023249101371616e-06, "loss": 0.5086, "step": 2230 }, { "epoch": 1.6205933761605338, "grad_norm": 0.3865239183606997, "learning_rate": 8.021347349857258e-06, "loss": 0.5302, "step": 2231 }, { "epoch": 1.6213197739087009, "grad_norm": 0.5417414484743928, "learning_rate": 8.019444909613524e-06, "loss": 0.5416, "step": 2232 }, { "epoch": 1.622046171656868, "grad_norm": 0.381620451485267, "learning_rate": 8.017541781074083e-06, "loss": 0.5274, "step": 2233 }, { "epoch": 1.622772569405035, "grad_norm": 0.4633290472028216, "learning_rate": 8.015637964672764e-06, "loss": 0.524, "step": 2234 }, { "epoch": 1.6234989671532019, "grad_norm": 0.3435908519465226, "learning_rate": 8.013733460843546e-06, "loss": 0.5147, "step": 2235 }, { "epoch": 1.6242253649013687, "grad_norm": 0.3724847066613284, "learning_rate": 8.011828270020575e-06, "loss": 0.5137, "step": 2236 }, { "epoch": 1.6249517626495358, "grad_norm": 0.41822288543551656, "learning_rate": 8.009922392638143e-06, "loss": 0.5292, "step": 2237 }, { "epoch": 1.6256781603977029, "grad_norm": 0.36389885402184613, "learning_rate": 8.008015829130704e-06, "loss": 0.5559, "step": 2238 }, { "epoch": 1.6264045581458697, "grad_norm": 0.49820298992503304, "learning_rate": 8.006108579932869e-06, "loss": 0.5115, "step": 2239 }, { "epoch": 1.6271309558940368, "grad_norm": 0.3858200969364259, "learning_rate": 8.004200645479403e-06, "loss": 0.5297, "step": 2240 }, { "epoch": 1.6278573536422036, "grad_norm": 0.36454219334100507, "learning_rate": 8.002292026205229e-06, "loss": 0.5304, "step": 2241 }, { "epoch": 1.6285837513903707, "grad_norm": 0.4297533946554223, "learning_rate": 8.000382722545423e-06, "loss": 0.5203, "step": 2242 }, { "epoch": 1.6293101491385378, "grad_norm": 0.4333838989231198, "learning_rate": 7.99847273493522e-06, "loss": 0.5165, "step": 2243 }, { "epoch": 1.6300365468867046, "grad_norm": 0.3353522195529111, "learning_rate": 7.996562063810009e-06, "loss": 0.521, "step": 2244 }, { "epoch": 1.6307629446348715, "grad_norm": 0.3506370164538408, "learning_rate": 7.994650709605338e-06, "loss": 0.5357, "step": 2245 }, { "epoch": 1.6314893423830386, "grad_norm": 0.3793671010753837, "learning_rate": 7.992738672756909e-06, "loss": 0.5138, "step": 2246 }, { "epoch": 1.6322157401312056, "grad_norm": 0.5060055389925995, "learning_rate": 7.990825953700577e-06, "loss": 0.5169, "step": 2247 }, { "epoch": 1.6329421378793727, "grad_norm": 0.3501754362435583, "learning_rate": 7.988912552872354e-06, "loss": 0.5225, "step": 2248 }, { "epoch": 1.6336685356275396, "grad_norm": 0.42524904353834486, "learning_rate": 7.98699847070841e-06, "loss": 0.5216, "step": 2249 }, { "epoch": 1.6343949333757064, "grad_norm": 0.38999245002865135, "learning_rate": 7.985083707645071e-06, "loss": 0.5222, "step": 2250 }, { "epoch": 1.6351213311238735, "grad_norm": 0.4078484544716897, "learning_rate": 7.983168264118811e-06, "loss": 0.5276, "step": 2251 }, { "epoch": 1.6358477288720406, "grad_norm": 0.36978836915791335, "learning_rate": 7.981252140566264e-06, "loss": 0.5311, "step": 2252 }, { "epoch": 1.6365741266202076, "grad_norm": 0.36068856400219423, "learning_rate": 7.979335337424222e-06, "loss": 0.5069, "step": 2253 }, { "epoch": 1.6373005243683745, "grad_norm": 0.4777656356045701, "learning_rate": 7.977417855129626e-06, "loss": 0.5218, "step": 2254 }, { "epoch": 1.6380269221165413, "grad_norm": 0.40092908368657715, "learning_rate": 7.975499694119576e-06, "loss": 0.5226, "step": 2255 }, { "epoch": 1.6387533198647084, "grad_norm": 0.4112514216593581, "learning_rate": 7.973580854831323e-06, "loss": 0.5216, "step": 2256 }, { "epoch": 1.6394797176128755, "grad_norm": 0.557912888011925, "learning_rate": 7.971661337702278e-06, "loss": 0.5367, "step": 2257 }, { "epoch": 1.6402061153610423, "grad_norm": 0.36306247064629654, "learning_rate": 7.969741143170003e-06, "loss": 0.5131, "step": 2258 }, { "epoch": 1.6409325131092094, "grad_norm": 0.4098049589502914, "learning_rate": 7.967820271672211e-06, "loss": 0.5294, "step": 2259 }, { "epoch": 1.6416589108573763, "grad_norm": 0.42971189819772837, "learning_rate": 7.965898723646777e-06, "loss": 0.5162, "step": 2260 }, { "epoch": 1.6423853086055433, "grad_norm": 0.43974465607779384, "learning_rate": 7.963976499531724e-06, "loss": 0.5198, "step": 2261 }, { "epoch": 1.6431117063537104, "grad_norm": 0.3782390833088789, "learning_rate": 7.962053599765234e-06, "loss": 0.5199, "step": 2262 }, { "epoch": 1.6438381041018773, "grad_norm": 0.3642711346135813, "learning_rate": 7.960130024785636e-06, "loss": 0.5045, "step": 2263 }, { "epoch": 1.644564501850044, "grad_norm": 0.4462608703081043, "learning_rate": 7.958205775031423e-06, "loss": 0.5165, "step": 2264 }, { "epoch": 1.6452908995982112, "grad_norm": 0.3937762431924642, "learning_rate": 7.95628085094123e-06, "loss": 0.5187, "step": 2265 }, { "epoch": 1.6460172973463782, "grad_norm": 0.3763979930994422, "learning_rate": 7.954355252953859e-06, "loss": 0.5141, "step": 2266 }, { "epoch": 1.6467436950945453, "grad_norm": 0.7372133545800805, "learning_rate": 7.952428981508254e-06, "loss": 0.532, "step": 2267 }, { "epoch": 1.6474700928427122, "grad_norm": 0.3950337311935571, "learning_rate": 7.950502037043515e-06, "loss": 0.5261, "step": 2268 }, { "epoch": 1.648196490590879, "grad_norm": 0.48949465520274943, "learning_rate": 7.948574419998903e-06, "loss": 0.5136, "step": 2269 }, { "epoch": 1.648922888339046, "grad_norm": 0.4338054113514785, "learning_rate": 7.946646130813822e-06, "loss": 0.5222, "step": 2270 }, { "epoch": 1.6496492860872132, "grad_norm": 0.4816646656555247, "learning_rate": 7.944717169927838e-06, "loss": 0.5307, "step": 2271 }, { "epoch": 1.6503756838353802, "grad_norm": 0.44931497631662126, "learning_rate": 7.942787537780663e-06, "loss": 0.518, "step": 2272 }, { "epoch": 1.651102081583547, "grad_norm": 0.5414627251303079, "learning_rate": 7.940857234812164e-06, "loss": 0.5002, "step": 2273 }, { "epoch": 1.651828479331714, "grad_norm": 0.39261828520787145, "learning_rate": 7.938926261462366e-06, "loss": 0.5141, "step": 2274 }, { "epoch": 1.652554877079881, "grad_norm": 0.4903351786500484, "learning_rate": 7.93699461817144e-06, "loss": 0.5296, "step": 2275 }, { "epoch": 1.653281274828048, "grad_norm": 0.47308665326073535, "learning_rate": 7.935062305379715e-06, "loss": 0.5141, "step": 2276 }, { "epoch": 1.6540076725762152, "grad_norm": 0.4171785888876544, "learning_rate": 7.933129323527668e-06, "loss": 0.5163, "step": 2277 }, { "epoch": 1.654734070324382, "grad_norm": 0.3629005440782251, "learning_rate": 7.931195673055932e-06, "loss": 0.5203, "step": 2278 }, { "epoch": 1.6554604680725489, "grad_norm": 0.45630944853496325, "learning_rate": 7.929261354405289e-06, "loss": 0.5286, "step": 2279 }, { "epoch": 1.656186865820716, "grad_norm": 0.39043909477075656, "learning_rate": 7.927326368016677e-06, "loss": 0.5315, "step": 2280 }, { "epoch": 1.656913263568883, "grad_norm": 0.45459187248014943, "learning_rate": 7.925390714331185e-06, "loss": 0.5211, "step": 2281 }, { "epoch": 1.6576396613170499, "grad_norm": 0.4232053463313131, "learning_rate": 7.923454393790053e-06, "loss": 0.5393, "step": 2282 }, { "epoch": 1.658366059065217, "grad_norm": 0.44803108722347296, "learning_rate": 7.921517406834675e-06, "loss": 0.528, "step": 2283 }, { "epoch": 1.6590924568133838, "grad_norm": 0.514049972418491, "learning_rate": 7.919579753906595e-06, "loss": 0.5276, "step": 2284 }, { "epoch": 1.6598188545615509, "grad_norm": 0.36688233278611193, "learning_rate": 7.917641435447508e-06, "loss": 0.5106, "step": 2285 }, { "epoch": 1.660545252309718, "grad_norm": 0.37359403943964403, "learning_rate": 7.915702451899264e-06, "loss": 0.5164, "step": 2286 }, { "epoch": 1.6612716500578848, "grad_norm": 0.41369210429929865, "learning_rate": 7.913762803703861e-06, "loss": 0.521, "step": 2287 }, { "epoch": 1.6619980478060516, "grad_norm": 0.4009306112880066, "learning_rate": 7.911822491303453e-06, "loss": 0.5239, "step": 2288 }, { "epoch": 1.6627244455542187, "grad_norm": 0.7311945919702445, "learning_rate": 7.90988151514034e-06, "loss": 0.5308, "step": 2289 }, { "epoch": 1.6634508433023858, "grad_norm": 0.43626534803107847, "learning_rate": 7.907939875656978e-06, "loss": 0.5288, "step": 2290 }, { "epoch": 1.6641772410505529, "grad_norm": 0.35224352677829485, "learning_rate": 7.90599757329597e-06, "loss": 0.5175, "step": 2291 }, { "epoch": 1.6649036387987197, "grad_norm": 0.5331153980860087, "learning_rate": 7.904054608500075e-06, "loss": 0.5116, "step": 2292 }, { "epoch": 1.6656300365468866, "grad_norm": 0.6710273266272868, "learning_rate": 7.902110981712198e-06, "loss": 0.5107, "step": 2293 }, { "epoch": 1.6663564342950536, "grad_norm": 0.3526683617151688, "learning_rate": 7.9001666933754e-06, "loss": 0.5233, "step": 2294 }, { "epoch": 1.6670828320432207, "grad_norm": 0.36532635665255275, "learning_rate": 7.898221743932887e-06, "loss": 0.5169, "step": 2295 }, { "epoch": 1.6678092297913878, "grad_norm": 0.3585418169457838, "learning_rate": 7.896276133828023e-06, "loss": 0.5082, "step": 2296 }, { "epoch": 1.6685356275395546, "grad_norm": 0.3544722750485833, "learning_rate": 7.894329863504314e-06, "loss": 0.5072, "step": 2297 }, { "epoch": 1.6692620252877215, "grad_norm": 0.3546616658809727, "learning_rate": 7.89238293340542e-06, "loss": 0.5185, "step": 2298 }, { "epoch": 1.6699884230358886, "grad_norm": 0.3625346473206743, "learning_rate": 7.890435343975158e-06, "loss": 0.5204, "step": 2299 }, { "epoch": 1.6707148207840556, "grad_norm": 1.0160843919774494, "learning_rate": 7.888487095657484e-06, "loss": 0.5225, "step": 2300 }, { "epoch": 1.6714412185322225, "grad_norm": 0.4678669118863533, "learning_rate": 7.886538188896511e-06, "loss": 0.5134, "step": 2301 }, { "epoch": 1.6721676162803896, "grad_norm": 0.4058252989421966, "learning_rate": 7.884588624136505e-06, "loss": 0.537, "step": 2302 }, { "epoch": 1.6728940140285564, "grad_norm": 0.5993490154767858, "learning_rate": 7.882638401821873e-06, "loss": 0.5204, "step": 2303 }, { "epoch": 1.6736204117767235, "grad_norm": 0.4303193883714196, "learning_rate": 7.880687522397177e-06, "loss": 0.5125, "step": 2304 }, { "epoch": 1.6743468095248906, "grad_norm": 0.35404110515752385, "learning_rate": 7.878735986307133e-06, "loss": 0.5295, "step": 2305 }, { "epoch": 1.6750732072730574, "grad_norm": 0.39844908796784556, "learning_rate": 7.876783793996597e-06, "loss": 0.5211, "step": 2306 }, { "epoch": 1.6757996050212243, "grad_norm": 0.5182957330420442, "learning_rate": 7.874830945910581e-06, "loss": 0.5266, "step": 2307 }, { "epoch": 1.6765260027693913, "grad_norm": 0.5028901218850625, "learning_rate": 7.872877442494248e-06, "loss": 0.5172, "step": 2308 }, { "epoch": 1.6772524005175584, "grad_norm": 0.3663308757833073, "learning_rate": 7.870923284192904e-06, "loss": 0.525, "step": 2309 }, { "epoch": 1.6779787982657255, "grad_norm": 0.3306688367658942, "learning_rate": 7.868968471452012e-06, "loss": 0.5097, "step": 2310 }, { "epoch": 1.6787051960138923, "grad_norm": 0.38291734663388316, "learning_rate": 7.867013004717177e-06, "loss": 0.5036, "step": 2311 }, { "epoch": 1.6794315937620592, "grad_norm": 0.37384892481056603, "learning_rate": 7.865056884434156e-06, "loss": 0.5048, "step": 2312 }, { "epoch": 1.6801579915102263, "grad_norm": 0.40607943931890755, "learning_rate": 7.863100111048858e-06, "loss": 0.541, "step": 2313 }, { "epoch": 1.6808843892583933, "grad_norm": 0.3674587638957362, "learning_rate": 7.861142685007335e-06, "loss": 0.5415, "step": 2314 }, { "epoch": 1.6816107870065604, "grad_norm": 0.3971339138125036, "learning_rate": 7.859184606755793e-06, "loss": 0.5155, "step": 2315 }, { "epoch": 1.6823371847547273, "grad_norm": 0.4463837403698997, "learning_rate": 7.857225876740585e-06, "loss": 0.5256, "step": 2316 }, { "epoch": 1.6830635825028941, "grad_norm": 0.39645405562986596, "learning_rate": 7.855266495408208e-06, "loss": 0.5257, "step": 2317 }, { "epoch": 1.6837899802510612, "grad_norm": 0.3647194703278608, "learning_rate": 7.853306463205317e-06, "loss": 0.5159, "step": 2318 }, { "epoch": 1.6845163779992283, "grad_norm": 0.3589570080440384, "learning_rate": 7.851345780578705e-06, "loss": 0.5033, "step": 2319 }, { "epoch": 1.6852427757473953, "grad_norm": 0.38129920749398744, "learning_rate": 7.849384447975322e-06, "loss": 0.5071, "step": 2320 }, { "epoch": 1.6859691734955622, "grad_norm": 0.42195602445792807, "learning_rate": 7.84742246584226e-06, "loss": 0.518, "step": 2321 }, { "epoch": 1.686695571243729, "grad_norm": 0.3687914244924957, "learning_rate": 7.845459834626763e-06, "loss": 0.5051, "step": 2322 }, { "epoch": 1.6874219689918961, "grad_norm": 0.3685612246975215, "learning_rate": 7.843496554776222e-06, "loss": 0.5182, "step": 2323 }, { "epoch": 1.6881483667400632, "grad_norm": 0.3708898771698278, "learning_rate": 7.841532626738174e-06, "loss": 0.5192, "step": 2324 }, { "epoch": 1.68887476448823, "grad_norm": 0.3794685872043508, "learning_rate": 7.839568050960302e-06, "loss": 0.5147, "step": 2325 }, { "epoch": 1.6896011622363971, "grad_norm": 0.4042012250094266, "learning_rate": 7.837602827890445e-06, "loss": 0.5179, "step": 2326 }, { "epoch": 1.690327559984564, "grad_norm": 0.46794334682587546, "learning_rate": 7.83563695797658e-06, "loss": 0.5236, "step": 2327 }, { "epoch": 1.691053957732731, "grad_norm": 0.401175275572764, "learning_rate": 7.83367044166684e-06, "loss": 0.5271, "step": 2328 }, { "epoch": 1.6917803554808981, "grad_norm": 0.34682830425628947, "learning_rate": 7.831703279409498e-06, "loss": 0.5151, "step": 2329 }, { "epoch": 1.692506753229065, "grad_norm": 0.43242771033611604, "learning_rate": 7.829735471652978e-06, "loss": 0.5253, "step": 2330 }, { "epoch": 1.6932331509772318, "grad_norm": 0.3938986346700491, "learning_rate": 7.827767018845847e-06, "loss": 0.5106, "step": 2331 }, { "epoch": 1.693959548725399, "grad_norm": 0.37655455018986816, "learning_rate": 7.825797921436829e-06, "loss": 0.5207, "step": 2332 }, { "epoch": 1.694685946473566, "grad_norm": 0.42286674943570335, "learning_rate": 7.823828179874784e-06, "loss": 0.5227, "step": 2333 }, { "epoch": 1.695412344221733, "grad_norm": 2.153158814997957, "learning_rate": 7.821857794608724e-06, "loss": 0.5097, "step": 2334 }, { "epoch": 1.6961387419698999, "grad_norm": 0.3888254323572561, "learning_rate": 7.819886766087808e-06, "loss": 0.5073, "step": 2335 }, { "epoch": 1.6968651397180667, "grad_norm": 0.4613541494505823, "learning_rate": 7.81791509476134e-06, "loss": 0.5133, "step": 2336 }, { "epoch": 1.6975915374662338, "grad_norm": 0.39905679540451405, "learning_rate": 7.815942781078772e-06, "loss": 0.4997, "step": 2337 }, { "epoch": 1.6983179352144009, "grad_norm": 0.42740584656532915, "learning_rate": 7.813969825489698e-06, "loss": 0.5125, "step": 2338 }, { "epoch": 1.699044332962568, "grad_norm": 0.39208560533952436, "learning_rate": 7.811996228443866e-06, "loss": 0.5203, "step": 2339 }, { "epoch": 1.6997707307107348, "grad_norm": 0.4894039824183243, "learning_rate": 7.810021990391163e-06, "loss": 0.5213, "step": 2340 }, { "epoch": 1.7004971284589017, "grad_norm": 0.5137340338976691, "learning_rate": 7.808047111781628e-06, "loss": 0.5201, "step": 2341 }, { "epoch": 1.7012235262070687, "grad_norm": 0.5554929278674202, "learning_rate": 7.806071593065441e-06, "loss": 0.5193, "step": 2342 }, { "epoch": 1.7019499239552358, "grad_norm": 0.6216775677657115, "learning_rate": 7.804095434692933e-06, "loss": 0.5321, "step": 2343 }, { "epoch": 1.7026763217034029, "grad_norm": 0.5283921114968795, "learning_rate": 7.802118637114575e-06, "loss": 0.5177, "step": 2344 }, { "epoch": 1.7034027194515697, "grad_norm": 0.5158106759029534, "learning_rate": 7.800141200780987e-06, "loss": 0.5214, "step": 2345 }, { "epoch": 1.7041291171997366, "grad_norm": 0.3910353597249199, "learning_rate": 7.798163126142935e-06, "loss": 0.5214, "step": 2346 }, { "epoch": 1.7048555149479037, "grad_norm": 0.357667881957513, "learning_rate": 7.796184413651331e-06, "loss": 0.5209, "step": 2347 }, { "epoch": 1.7055819126960707, "grad_norm": 0.3663707168426187, "learning_rate": 7.79420506375723e-06, "loss": 0.5095, "step": 2348 }, { "epoch": 1.7063083104442376, "grad_norm": 0.34462530618496784, "learning_rate": 7.792225076911833e-06, "loss": 0.5144, "step": 2349 }, { "epoch": 1.7070347081924044, "grad_norm": 0.43033742655038754, "learning_rate": 7.790244453566486e-06, "loss": 0.5156, "step": 2350 }, { "epoch": 1.7077611059405715, "grad_norm": 0.39115934719776607, "learning_rate": 7.788263194172684e-06, "loss": 0.5296, "step": 2351 }, { "epoch": 1.7084875036887386, "grad_norm": 0.3751449069052403, "learning_rate": 7.78628129918206e-06, "loss": 0.4988, "step": 2352 }, { "epoch": 1.7092139014369057, "grad_norm": 2.2865318787607047, "learning_rate": 7.784298769046399e-06, "loss": 0.5238, "step": 2353 }, { "epoch": 1.7099402991850725, "grad_norm": 0.3308255892497287, "learning_rate": 7.782315604217623e-06, "loss": 0.5084, "step": 2354 }, { "epoch": 1.7106666969332394, "grad_norm": 0.42055271857264337, "learning_rate": 7.780331805147809e-06, "loss": 0.5109, "step": 2355 }, { "epoch": 1.7113930946814064, "grad_norm": 0.3484879952270989, "learning_rate": 7.778347372289168e-06, "loss": 0.5209, "step": 2356 }, { "epoch": 1.7121194924295735, "grad_norm": 0.36894370842792445, "learning_rate": 7.776362306094063e-06, "loss": 0.5071, "step": 2357 }, { "epoch": 1.7128458901777406, "grad_norm": 0.4033215008387943, "learning_rate": 7.774376607014995e-06, "loss": 0.5123, "step": 2358 }, { "epoch": 1.7135722879259074, "grad_norm": 0.34254343652123953, "learning_rate": 7.772390275504618e-06, "loss": 0.5221, "step": 2359 }, { "epoch": 1.7142986856740743, "grad_norm": 0.6806500999901334, "learning_rate": 7.77040331201572e-06, "loss": 0.528, "step": 2360 }, { "epoch": 1.7150250834222414, "grad_norm": 0.3957327094391318, "learning_rate": 7.768415717001241e-06, "loss": 0.5061, "step": 2361 }, { "epoch": 1.7157514811704084, "grad_norm": 0.8887630735489043, "learning_rate": 7.766427490914261e-06, "loss": 0.5009, "step": 2362 }, { "epoch": 1.7164778789185755, "grad_norm": 0.41414804407176514, "learning_rate": 7.764438634208007e-06, "loss": 0.5091, "step": 2363 }, { "epoch": 1.7172042766667424, "grad_norm": 0.3997412655268966, "learning_rate": 7.762449147335843e-06, "loss": 0.5262, "step": 2364 }, { "epoch": 1.7179306744149092, "grad_norm": 0.41272868045457767, "learning_rate": 7.760459030751285e-06, "loss": 0.5122, "step": 2365 }, { "epoch": 1.7186570721630763, "grad_norm": 0.35094999229763957, "learning_rate": 7.758468284907988e-06, "loss": 0.5122, "step": 2366 }, { "epoch": 1.7193834699112434, "grad_norm": 0.45851342079117346, "learning_rate": 7.756476910259749e-06, "loss": 0.5184, "step": 2367 }, { "epoch": 1.7201098676594102, "grad_norm": 0.4626631942589889, "learning_rate": 7.754484907260513e-06, "loss": 0.523, "step": 2368 }, { "epoch": 1.7208362654075773, "grad_norm": 0.3868549743980831, "learning_rate": 7.752492276364368e-06, "loss": 0.5176, "step": 2369 }, { "epoch": 1.7215626631557441, "grad_norm": 0.3810781296920432, "learning_rate": 7.750499018025537e-06, "loss": 0.5064, "step": 2370 }, { "epoch": 1.7222890609039112, "grad_norm": 0.5430177338034557, "learning_rate": 7.748505132698396e-06, "loss": 0.5297, "step": 2371 }, { "epoch": 1.7230154586520783, "grad_norm": 0.3297386109193866, "learning_rate": 7.74651062083746e-06, "loss": 0.5241, "step": 2372 }, { "epoch": 1.7237418564002451, "grad_norm": 0.3606346057085869, "learning_rate": 7.744515482897386e-06, "loss": 0.539, "step": 2373 }, { "epoch": 1.724468254148412, "grad_norm": 0.34521028847501406, "learning_rate": 7.742519719332972e-06, "loss": 0.5191, "step": 2374 }, { "epoch": 1.725194651896579, "grad_norm": 0.3755384059531963, "learning_rate": 7.740523330599166e-06, "loss": 0.5433, "step": 2375 }, { "epoch": 1.7259210496447461, "grad_norm": 0.4504796185974387, "learning_rate": 7.73852631715105e-06, "loss": 0.506, "step": 2376 }, { "epoch": 1.7266474473929132, "grad_norm": 0.5107682186808763, "learning_rate": 7.736528679443853e-06, "loss": 0.5193, "step": 2377 }, { "epoch": 1.72737384514108, "grad_norm": 0.4141904772610753, "learning_rate": 7.734530417932947e-06, "loss": 0.5169, "step": 2378 }, { "epoch": 1.728100242889247, "grad_norm": 0.36227736569282754, "learning_rate": 7.73253153307384e-06, "loss": 0.5149, "step": 2379 }, { "epoch": 1.728826640637414, "grad_norm": 0.4409459557738853, "learning_rate": 7.73053202532219e-06, "loss": 0.5239, "step": 2380 }, { "epoch": 1.729553038385581, "grad_norm": 0.33578102943722477, "learning_rate": 7.728531895133792e-06, "loss": 0.5109, "step": 2381 }, { "epoch": 1.7302794361337481, "grad_norm": 0.4250354978387207, "learning_rate": 7.726531142964587e-06, "loss": 0.5155, "step": 2382 }, { "epoch": 1.731005833881915, "grad_norm": 0.4302601157882407, "learning_rate": 7.724529769270652e-06, "loss": 0.5141, "step": 2383 }, { "epoch": 1.7317322316300818, "grad_norm": 0.43407033758643465, "learning_rate": 7.722527774508212e-06, "loss": 0.5511, "step": 2384 }, { "epoch": 1.732458629378249, "grad_norm": 0.40151669451134714, "learning_rate": 7.720525159133627e-06, "loss": 0.5219, "step": 2385 }, { "epoch": 1.733185027126416, "grad_norm": 0.35884136542676387, "learning_rate": 7.718521923603404e-06, "loss": 0.5093, "step": 2386 }, { "epoch": 1.733911424874583, "grad_norm": 0.3816066124029688, "learning_rate": 7.71651806837419e-06, "loss": 0.5049, "step": 2387 }, { "epoch": 1.73463782262275, "grad_norm": 0.3676995514155556, "learning_rate": 7.714513593902771e-06, "loss": 0.521, "step": 2388 }, { "epoch": 1.7353642203709168, "grad_norm": 0.4667926009034362, "learning_rate": 7.712508500646077e-06, "loss": 0.5317, "step": 2389 }, { "epoch": 1.7360906181190838, "grad_norm": 0.4129366004482653, "learning_rate": 7.710502789061178e-06, "loss": 0.5099, "step": 2390 }, { "epoch": 1.736817015867251, "grad_norm": 0.3592680433090348, "learning_rate": 7.708496459605283e-06, "loss": 0.5212, "step": 2391 }, { "epoch": 1.7375434136154178, "grad_norm": 0.34614013237021196, "learning_rate": 7.706489512735745e-06, "loss": 0.5303, "step": 2392 }, { "epoch": 1.7382698113635848, "grad_norm": 0.3555021535589424, "learning_rate": 7.704481948910057e-06, "loss": 0.5277, "step": 2393 }, { "epoch": 1.7389962091117517, "grad_norm": 0.37639501186215735, "learning_rate": 7.702473768585847e-06, "loss": 0.525, "step": 2394 }, { "epoch": 1.7397226068599188, "grad_norm": 0.37361879877081183, "learning_rate": 7.700464972220895e-06, "loss": 0.5289, "step": 2395 }, { "epoch": 1.7404490046080858, "grad_norm": 0.39992477600936793, "learning_rate": 7.698455560273112e-06, "loss": 0.5312, "step": 2396 }, { "epoch": 1.7411754023562527, "grad_norm": 0.37406681284872073, "learning_rate": 7.696445533200553e-06, "loss": 0.5307, "step": 2397 }, { "epoch": 1.7419018001044195, "grad_norm": 0.42683497458023184, "learning_rate": 7.694434891461413e-06, "loss": 0.5001, "step": 2398 }, { "epoch": 1.7426281978525866, "grad_norm": 0.41158580911618886, "learning_rate": 7.692423635514025e-06, "loss": 0.5332, "step": 2399 }, { "epoch": 1.7433545956007537, "grad_norm": 0.36702630887820314, "learning_rate": 7.690411765816864e-06, "loss": 0.5278, "step": 2400 }, { "epoch": 1.7440809933489207, "grad_norm": 0.37401723327421216, "learning_rate": 7.688399282828544e-06, "loss": 0.5189, "step": 2401 }, { "epoch": 1.7448073910970876, "grad_norm": 0.4612737879621514, "learning_rate": 7.686386187007822e-06, "loss": 0.5219, "step": 2402 }, { "epoch": 1.7455337888452545, "grad_norm": 0.36477017291251335, "learning_rate": 7.68437247881359e-06, "loss": 0.5103, "step": 2403 }, { "epoch": 1.7462601865934215, "grad_norm": 0.3617003411712773, "learning_rate": 7.682358158704882e-06, "loss": 0.5025, "step": 2404 }, { "epoch": 1.7469865843415886, "grad_norm": 0.37483282727802664, "learning_rate": 7.68034322714087e-06, "loss": 0.5273, "step": 2405 }, { "epoch": 1.7477129820897557, "grad_norm": 0.3495224935981692, "learning_rate": 7.678327684580867e-06, "loss": 0.5176, "step": 2406 }, { "epoch": 1.7484393798379225, "grad_norm": 0.3385413622434483, "learning_rate": 7.676311531484324e-06, "loss": 0.5274, "step": 2407 }, { "epoch": 1.7491657775860894, "grad_norm": 0.3414632763254147, "learning_rate": 7.674294768310836e-06, "loss": 0.5324, "step": 2408 }, { "epoch": 1.7498921753342565, "grad_norm": 0.3305838987379325, "learning_rate": 7.672277395520127e-06, "loss": 0.5109, "step": 2409 }, { "epoch": 1.7506185730824235, "grad_norm": 0.38555130400427856, "learning_rate": 7.670259413572073e-06, "loss": 0.5024, "step": 2410 }, { "epoch": 1.7513449708305904, "grad_norm": 0.3199334271176942, "learning_rate": 7.668240822926674e-06, "loss": 0.508, "step": 2411 }, { "epoch": 1.7520713685787574, "grad_norm": 0.3747470270663056, "learning_rate": 7.66622162404408e-06, "loss": 0.5014, "step": 2412 }, { "epoch": 1.7527977663269243, "grad_norm": 0.3682403245152217, "learning_rate": 7.66420181738458e-06, "loss": 0.506, "step": 2413 }, { "epoch": 1.7535241640750914, "grad_norm": 0.38757289195608097, "learning_rate": 7.662181403408593e-06, "loss": 0.522, "step": 2414 }, { "epoch": 1.7542505618232584, "grad_norm": 0.3894174612444933, "learning_rate": 7.660160382576683e-06, "loss": 0.5237, "step": 2415 }, { "epoch": 1.7549769595714253, "grad_norm": 0.3553310739437953, "learning_rate": 7.658138755349552e-06, "loss": 0.5176, "step": 2416 }, { "epoch": 1.7557033573195922, "grad_norm": 0.412183295644057, "learning_rate": 7.656116522188034e-06, "loss": 0.5145, "step": 2417 }, { "epoch": 1.7564297550677592, "grad_norm": 0.4269731415373766, "learning_rate": 7.654093683553111e-06, "loss": 0.505, "step": 2418 }, { "epoch": 1.7571561528159263, "grad_norm": 0.40445962828489695, "learning_rate": 7.652070239905896e-06, "loss": 0.5385, "step": 2419 }, { "epoch": 1.7578825505640934, "grad_norm": 0.35438541605346585, "learning_rate": 7.650046191707641e-06, "loss": 0.5234, "step": 2420 }, { "epoch": 1.7586089483122602, "grad_norm": 0.36150585874514246, "learning_rate": 7.648021539419737e-06, "loss": 0.5071, "step": 2421 }, { "epoch": 1.759335346060427, "grad_norm": 0.43918572767685066, "learning_rate": 7.645996283503713e-06, "loss": 0.509, "step": 2422 }, { "epoch": 1.7600617438085941, "grad_norm": 0.41897424823585466, "learning_rate": 7.643970424421235e-06, "loss": 0.5289, "step": 2423 }, { "epoch": 1.7607881415567612, "grad_norm": 0.3721778531489851, "learning_rate": 7.641943962634106e-06, "loss": 0.5076, "step": 2424 }, { "epoch": 1.7615145393049283, "grad_norm": 0.4098431311637956, "learning_rate": 7.639916898604267e-06, "loss": 0.5307, "step": 2425 }, { "epoch": 1.7622409370530951, "grad_norm": 0.3773059345853294, "learning_rate": 7.637889232793795e-06, "loss": 0.5181, "step": 2426 }, { "epoch": 1.762967334801262, "grad_norm": 0.35280772424239176, "learning_rate": 7.63586096566491e-06, "loss": 0.5145, "step": 2427 }, { "epoch": 1.763693732549429, "grad_norm": 0.3423930857113253, "learning_rate": 7.633832097679959e-06, "loss": 0.53, "step": 2428 }, { "epoch": 1.7644201302975961, "grad_norm": 0.41118629604911827, "learning_rate": 7.631802629301433e-06, "loss": 0.5146, "step": 2429 }, { "epoch": 1.7651465280457632, "grad_norm": 0.3759577573061447, "learning_rate": 7.62977256099196e-06, "loss": 0.5162, "step": 2430 }, { "epoch": 1.76587292579393, "grad_norm": 0.33315814587839315, "learning_rate": 7.6277418932143e-06, "loss": 0.5309, "step": 2431 }, { "epoch": 1.766599323542097, "grad_norm": 0.37737468694841797, "learning_rate": 7.625710626431354e-06, "loss": 0.5203, "step": 2432 }, { "epoch": 1.767325721290264, "grad_norm": 0.38578068172952584, "learning_rate": 7.623678761106159e-06, "loss": 0.5209, "step": 2433 }, { "epoch": 1.768052119038431, "grad_norm": 0.3471234798763853, "learning_rate": 7.621646297701886e-06, "loss": 0.5175, "step": 2434 }, { "epoch": 1.768778516786598, "grad_norm": 0.33769756380371013, "learning_rate": 7.619613236681845e-06, "loss": 0.5155, "step": 2435 }, { "epoch": 1.769504914534765, "grad_norm": 0.3484262277106607, "learning_rate": 7.617579578509481e-06, "loss": 0.5151, "step": 2436 }, { "epoch": 1.7702313122829318, "grad_norm": 0.3472024381402114, "learning_rate": 7.6155453236483745e-06, "loss": 0.5287, "step": 2437 }, { "epoch": 1.770957710031099, "grad_norm": 0.4865050975301496, "learning_rate": 7.613510472562245e-06, "loss": 0.5106, "step": 2438 }, { "epoch": 1.771684107779266, "grad_norm": 0.43499596061429086, "learning_rate": 7.611475025714945e-06, "loss": 0.5126, "step": 2439 }, { "epoch": 1.7724105055274328, "grad_norm": 0.35984637513168016, "learning_rate": 7.609438983570461e-06, "loss": 0.5126, "step": 2440 }, { "epoch": 1.7731369032755997, "grad_norm": 0.3777402269226685, "learning_rate": 7.6074023465929206e-06, "loss": 0.5063, "step": 2441 }, { "epoch": 1.7738633010237668, "grad_norm": 0.3368264229441657, "learning_rate": 7.605365115246581e-06, "loss": 0.5253, "step": 2442 }, { "epoch": 1.7745896987719338, "grad_norm": 0.36427617602043305, "learning_rate": 7.603327289995843e-06, "loss": 0.5067, "step": 2443 }, { "epoch": 1.775316096520101, "grad_norm": 0.36829048675097775, "learning_rate": 7.601288871305235e-06, "loss": 0.4993, "step": 2444 }, { "epoch": 1.7760424942682678, "grad_norm": 0.3274746744967745, "learning_rate": 7.5992498596394215e-06, "loss": 0.5023, "step": 2445 }, { "epoch": 1.7767688920164346, "grad_norm": 0.3690904489859233, "learning_rate": 7.597210255463206e-06, "loss": 0.5089, "step": 2446 }, { "epoch": 1.7774952897646017, "grad_norm": 0.39823221371440226, "learning_rate": 7.595170059241527e-06, "loss": 0.5216, "step": 2447 }, { "epoch": 1.7782216875127688, "grad_norm": 0.4064508140087145, "learning_rate": 7.593129271439454e-06, "loss": 0.5179, "step": 2448 }, { "epoch": 1.7789480852609358, "grad_norm": 0.3365068405492714, "learning_rate": 7.591087892522193e-06, "loss": 0.5069, "step": 2449 }, { "epoch": 1.7796744830091027, "grad_norm": 0.368825203113945, "learning_rate": 7.589045922955085e-06, "loss": 0.509, "step": 2450 }, { "epoch": 1.7804008807572695, "grad_norm": 0.3702408504105955, "learning_rate": 7.587003363203609e-06, "loss": 0.5239, "step": 2451 }, { "epoch": 1.7811272785054366, "grad_norm": 0.54516242684235, "learning_rate": 7.5849602137333745e-06, "loss": 0.5017, "step": 2452 }, { "epoch": 1.7818536762536037, "grad_norm": 0.3789259608047946, "learning_rate": 7.582916475010125e-06, "loss": 0.5291, "step": 2453 }, { "epoch": 1.7825800740017705, "grad_norm": 0.3929344951040863, "learning_rate": 7.580872147499738e-06, "loss": 0.5099, "step": 2454 }, { "epoch": 1.7833064717499376, "grad_norm": 0.4521458967644181, "learning_rate": 7.578827231668231e-06, "loss": 0.5137, "step": 2455 }, { "epoch": 1.7840328694981045, "grad_norm": 0.3396557213768358, "learning_rate": 7.5767817279817505e-06, "loss": 0.5165, "step": 2456 }, { "epoch": 1.7847592672462715, "grad_norm": 0.3591207918432544, "learning_rate": 7.574735636906576e-06, "loss": 0.5031, "step": 2457 }, { "epoch": 1.7854856649944386, "grad_norm": 0.3280120304606946, "learning_rate": 7.572688958909124e-06, "loss": 0.5184, "step": 2458 }, { "epoch": 1.7862120627426055, "grad_norm": 0.3910837966006328, "learning_rate": 7.570641694455945e-06, "loss": 0.5042, "step": 2459 }, { "epoch": 1.7869384604907723, "grad_norm": 0.4156980995760029, "learning_rate": 7.5685938440137185e-06, "loss": 0.5105, "step": 2460 }, { "epoch": 1.7876648582389394, "grad_norm": 0.37191043977151966, "learning_rate": 7.566545408049264e-06, "loss": 0.5194, "step": 2461 }, { "epoch": 1.7883912559871065, "grad_norm": 0.41929543282454695, "learning_rate": 7.564496387029532e-06, "loss": 0.4994, "step": 2462 }, { "epoch": 1.7891176537352735, "grad_norm": 0.5535216928962473, "learning_rate": 7.562446781421604e-06, "loss": 0.5113, "step": 2463 }, { "epoch": 1.7898440514834404, "grad_norm": 0.4440265170284457, "learning_rate": 7.560396591692696e-06, "loss": 0.5071, "step": 2464 }, { "epoch": 1.7905704492316072, "grad_norm": 0.31691196067500516, "learning_rate": 7.558345818310159e-06, "loss": 0.5133, "step": 2465 }, { "epoch": 1.7912968469797743, "grad_norm": 0.3976398324340962, "learning_rate": 7.556294461741476e-06, "loss": 0.5091, "step": 2466 }, { "epoch": 1.7920232447279414, "grad_norm": 0.47629872897047004, "learning_rate": 7.554242522454262e-06, "loss": 0.5077, "step": 2467 }, { "epoch": 1.7927496424761085, "grad_norm": 0.3915447918725823, "learning_rate": 7.552190000916267e-06, "loss": 0.5142, "step": 2468 }, { "epoch": 1.7934760402242753, "grad_norm": 0.34586851694938503, "learning_rate": 7.55013689759537e-06, "loss": 0.5197, "step": 2469 }, { "epoch": 1.7942024379724422, "grad_norm": 0.39615903534776675, "learning_rate": 7.548083212959588e-06, "loss": 0.5102, "step": 2470 }, { "epoch": 1.7949288357206092, "grad_norm": 0.3824072375603738, "learning_rate": 7.5460289474770645e-06, "loss": 0.5051, "step": 2471 }, { "epoch": 1.7956552334687763, "grad_norm": 0.3984984887068014, "learning_rate": 7.5439741016160785e-06, "loss": 0.5264, "step": 2472 }, { "epoch": 1.7963816312169434, "grad_norm": 0.3854979324219856, "learning_rate": 7.541918675845044e-06, "loss": 0.5112, "step": 2473 }, { "epoch": 1.7971080289651102, "grad_norm": 0.4373313917377871, "learning_rate": 7.539862670632501e-06, "loss": 0.502, "step": 2474 }, { "epoch": 1.797834426713277, "grad_norm": 0.392190615648265, "learning_rate": 7.537806086447124e-06, "loss": 0.5299, "step": 2475 }, { "epoch": 1.7985608244614442, "grad_norm": 0.33446120395684537, "learning_rate": 7.5357489237577246e-06, "loss": 0.4955, "step": 2476 }, { "epoch": 1.7992872222096112, "grad_norm": 0.3924100537430557, "learning_rate": 7.53369118303324e-06, "loss": 0.5157, "step": 2477 }, { "epoch": 1.800013619957778, "grad_norm": 0.3947888140852665, "learning_rate": 7.531632864742742e-06, "loss": 0.5108, "step": 2478 }, { "epoch": 1.8007400177059452, "grad_norm": 0.41413216803259373, "learning_rate": 7.52957396935543e-06, "loss": 0.5258, "step": 2479 }, { "epoch": 1.801466415454112, "grad_norm": 0.3430398095552677, "learning_rate": 7.527514497340642e-06, "loss": 0.5216, "step": 2480 }, { "epoch": 1.802192813202279, "grad_norm": 0.3803404160910686, "learning_rate": 7.525454449167844e-06, "loss": 0.5112, "step": 2481 }, { "epoch": 1.8029192109504462, "grad_norm": 0.362708206016911, "learning_rate": 7.52339382530663e-06, "loss": 0.5031, "step": 2482 }, { "epoch": 1.803645608698613, "grad_norm": 0.3874709070824654, "learning_rate": 7.52133262622673e-06, "loss": 0.5206, "step": 2483 }, { "epoch": 1.8043720064467799, "grad_norm": 0.35370761771305603, "learning_rate": 7.519270852398002e-06, "loss": 0.5128, "step": 2484 }, { "epoch": 1.805098404194947, "grad_norm": 0.3378750964273353, "learning_rate": 7.517208504290438e-06, "loss": 0.5139, "step": 2485 }, { "epoch": 1.805824801943114, "grad_norm": 0.47739310413310937, "learning_rate": 7.5151455823741605e-06, "loss": 0.5325, "step": 2486 }, { "epoch": 1.806551199691281, "grad_norm": 1.0220584924467446, "learning_rate": 7.513082087119419e-06, "loss": 0.5134, "step": 2487 }, { "epoch": 1.807277597439448, "grad_norm": 0.34733949051067253, "learning_rate": 7.511018018996597e-06, "loss": 0.5258, "step": 2488 }, { "epoch": 1.8080039951876148, "grad_norm": 0.43738246407528614, "learning_rate": 7.508953378476207e-06, "loss": 0.5223, "step": 2489 }, { "epoch": 1.8087303929357819, "grad_norm": 0.3884686553974104, "learning_rate": 7.506888166028893e-06, "loss": 0.524, "step": 2490 }, { "epoch": 1.809456790683949, "grad_norm": 0.3664437825331931, "learning_rate": 7.504822382125432e-06, "loss": 0.5057, "step": 2491 }, { "epoch": 1.810183188432116, "grad_norm": 0.4718259906344069, "learning_rate": 7.502756027236725e-06, "loss": 0.5269, "step": 2492 }, { "epoch": 1.8109095861802829, "grad_norm": 0.3578874000507274, "learning_rate": 7.500689101833809e-06, "loss": 0.4972, "step": 2493 }, { "epoch": 1.8116359839284497, "grad_norm": 0.4239146637573705, "learning_rate": 7.498621606387848e-06, "loss": 0.5037, "step": 2494 }, { "epoch": 1.8123623816766168, "grad_norm": 0.4527212658614285, "learning_rate": 7.496553541370136e-06, "loss": 0.5138, "step": 2495 }, { "epoch": 1.8130887794247839, "grad_norm": 0.38027801188865673, "learning_rate": 7.4944849072520994e-06, "loss": 0.5002, "step": 2496 }, { "epoch": 1.8138151771729507, "grad_norm": 0.4033291849795312, "learning_rate": 7.492415704505291e-06, "loss": 0.5226, "step": 2497 }, { "epoch": 1.8145415749211178, "grad_norm": 0.3807979142898502, "learning_rate": 7.490345933601395e-06, "loss": 0.5204, "step": 2498 }, { "epoch": 1.8152679726692846, "grad_norm": 0.36024441295502735, "learning_rate": 7.488275595012222e-06, "loss": 0.5123, "step": 2499 }, { "epoch": 1.8159943704174517, "grad_norm": 0.34085085330306947, "learning_rate": 7.486204689209719e-06, "loss": 0.5172, "step": 2500 }, { "epoch": 1.8167207681656188, "grad_norm": 0.384830670861468, "learning_rate": 7.484133216665956e-06, "loss": 0.4996, "step": 2501 }, { "epoch": 1.8174471659137856, "grad_norm": 0.32600758570920274, "learning_rate": 7.482061177853135e-06, "loss": 0.5017, "step": 2502 }, { "epoch": 1.8181735636619525, "grad_norm": 0.451413015847686, "learning_rate": 7.479988573243586e-06, "loss": 0.4986, "step": 2503 }, { "epoch": 1.8188999614101196, "grad_norm": 0.36176694758470074, "learning_rate": 7.477915403309768e-06, "loss": 0.5233, "step": 2504 }, { "epoch": 1.8196263591582866, "grad_norm": 0.4201765718524903, "learning_rate": 7.475841668524268e-06, "loss": 0.5158, "step": 2505 }, { "epoch": 1.8203527569064537, "grad_norm": 0.349747935999797, "learning_rate": 7.473767369359805e-06, "loss": 0.5084, "step": 2506 }, { "epoch": 1.8210791546546206, "grad_norm": 0.37715608223832664, "learning_rate": 7.471692506289225e-06, "loss": 0.5133, "step": 2507 }, { "epoch": 1.8218055524027874, "grad_norm": 0.3781868222530327, "learning_rate": 7.4696170797855005e-06, "loss": 0.5018, "step": 2508 }, { "epoch": 1.8225319501509545, "grad_norm": 0.3251394185869421, "learning_rate": 7.467541090321735e-06, "loss": 0.5001, "step": 2509 }, { "epoch": 1.8232583478991216, "grad_norm": 0.3696746423083357, "learning_rate": 7.465464538371159e-06, "loss": 0.5067, "step": 2510 }, { "epoch": 1.8239847456472886, "grad_norm": 0.359042070785887, "learning_rate": 7.463387424407131e-06, "loss": 0.5146, "step": 2511 }, { "epoch": 1.8247111433954555, "grad_norm": 0.467326002019918, "learning_rate": 7.461309748903138e-06, "loss": 0.5228, "step": 2512 }, { "epoch": 1.8254375411436223, "grad_norm": 0.43379456229117774, "learning_rate": 7.459231512332799e-06, "loss": 0.5165, "step": 2513 }, { "epoch": 1.8261639388917894, "grad_norm": 0.39953052494671776, "learning_rate": 7.4571527151698505e-06, "loss": 0.5054, "step": 2514 }, { "epoch": 1.8268903366399565, "grad_norm": 0.4293974894737732, "learning_rate": 7.45507335788817e-06, "loss": 0.5028, "step": 2515 }, { "epoch": 1.8276167343881236, "grad_norm": 0.3691821182232711, "learning_rate": 7.452993440961751e-06, "loss": 0.5014, "step": 2516 }, { "epoch": 1.8283431321362904, "grad_norm": 0.3315988208836827, "learning_rate": 7.450912964864724e-06, "loss": 0.5079, "step": 2517 }, { "epoch": 1.8290695298844573, "grad_norm": 0.38090799522614277, "learning_rate": 7.44883193007134e-06, "loss": 0.5174, "step": 2518 }, { "epoch": 1.8297959276326243, "grad_norm": 0.37861845120198107, "learning_rate": 7.4467503370559806e-06, "loss": 0.5203, "step": 2519 }, { "epoch": 1.8305223253807914, "grad_norm": 0.3700730085426923, "learning_rate": 7.444668186293153e-06, "loss": 0.5108, "step": 2520 }, { "epoch": 1.8312487231289583, "grad_norm": 0.541136540489763, "learning_rate": 7.4425854782574935e-06, "loss": 0.5103, "step": 2521 }, { "epoch": 1.8319751208771253, "grad_norm": 0.3980367064113061, "learning_rate": 7.440502213423766e-06, "loss": 0.5072, "step": 2522 }, { "epoch": 1.8327015186252922, "grad_norm": 0.3341368904221697, "learning_rate": 7.43841839226686e-06, "loss": 0.5087, "step": 2523 }, { "epoch": 1.8334279163734593, "grad_norm": 0.6917196441784205, "learning_rate": 7.436334015261787e-06, "loss": 0.522, "step": 2524 }, { "epoch": 1.8341543141216263, "grad_norm": 0.3987386461839978, "learning_rate": 7.434249082883694e-06, "loss": 0.5026, "step": 2525 }, { "epoch": 1.8348807118697932, "grad_norm": 0.39046462611388316, "learning_rate": 7.432163595607851e-06, "loss": 0.5129, "step": 2526 }, { "epoch": 1.83560710961796, "grad_norm": 0.3474126740060321, "learning_rate": 7.430077553909651e-06, "loss": 0.5111, "step": 2527 }, { "epoch": 1.836333507366127, "grad_norm": 0.4391027195312627, "learning_rate": 7.427990958264619e-06, "loss": 0.5074, "step": 2528 }, { "epoch": 1.8370599051142942, "grad_norm": 0.3674104817530332, "learning_rate": 7.425903809148401e-06, "loss": 0.5142, "step": 2529 }, { "epoch": 1.8377863028624613, "grad_norm": 0.4269078954287343, "learning_rate": 7.423816107036774e-06, "loss": 0.5234, "step": 2530 }, { "epoch": 1.838512700610628, "grad_norm": 0.3256909453235737, "learning_rate": 7.421727852405639e-06, "loss": 0.5067, "step": 2531 }, { "epoch": 1.839239098358795, "grad_norm": 0.35553526649015293, "learning_rate": 7.419639045731022e-06, "loss": 0.5097, "step": 2532 }, { "epoch": 1.839965496106962, "grad_norm": 0.370387646589311, "learning_rate": 7.417549687489074e-06, "loss": 0.5112, "step": 2533 }, { "epoch": 1.840691893855129, "grad_norm": 0.4062361187580708, "learning_rate": 7.415459778156075e-06, "loss": 0.5266, "step": 2534 }, { "epoch": 1.8414182916032962, "grad_norm": 0.38909458471907526, "learning_rate": 7.413369318208431e-06, "loss": 0.52, "step": 2535 }, { "epoch": 1.842144689351463, "grad_norm": 0.3450664136417991, "learning_rate": 7.411278308122669e-06, "loss": 0.5115, "step": 2536 }, { "epoch": 1.8428710870996299, "grad_norm": 0.3481338815009868, "learning_rate": 7.409186748375443e-06, "loss": 0.4961, "step": 2537 }, { "epoch": 1.843597484847797, "grad_norm": 0.3794933531601418, "learning_rate": 7.4070946394435364e-06, "loss": 0.5213, "step": 2538 }, { "epoch": 1.844323882595964, "grad_norm": 0.34680653454325094, "learning_rate": 7.405001981803851e-06, "loss": 0.4881, "step": 2539 }, { "epoch": 1.845050280344131, "grad_norm": 0.38012119673509104, "learning_rate": 7.402908775933419e-06, "loss": 0.5223, "step": 2540 }, { "epoch": 1.845776678092298, "grad_norm": 0.391126869682353, "learning_rate": 7.4008150223093974e-06, "loss": 0.5082, "step": 2541 }, { "epoch": 1.8465030758404648, "grad_norm": 0.3950671051221416, "learning_rate": 7.398720721409065e-06, "loss": 0.5038, "step": 2542 }, { "epoch": 1.8472294735886319, "grad_norm": 0.7518357674558912, "learning_rate": 7.396625873709826e-06, "loss": 0.524, "step": 2543 }, { "epoch": 1.847955871336799, "grad_norm": 0.440563224216234, "learning_rate": 7.394530479689211e-06, "loss": 0.5075, "step": 2544 }, { "epoch": 1.8486822690849658, "grad_norm": 0.37825420804775123, "learning_rate": 7.392434539824874e-06, "loss": 0.5067, "step": 2545 }, { "epoch": 1.8494086668331327, "grad_norm": 0.4329732625380953, "learning_rate": 7.390338054594595e-06, "loss": 0.5144, "step": 2546 }, { "epoch": 1.8501350645812997, "grad_norm": 0.37240168722065525, "learning_rate": 7.388241024476276e-06, "loss": 0.5066, "step": 2547 }, { "epoch": 1.8508614623294668, "grad_norm": 0.39353747779853976, "learning_rate": 7.386143449947945e-06, "loss": 0.4925, "step": 2548 }, { "epoch": 1.8515878600776339, "grad_norm": 0.3359778131925848, "learning_rate": 7.3840453314877505e-06, "loss": 0.505, "step": 2549 }, { "epoch": 1.8523142578258007, "grad_norm": 0.4354222874826661, "learning_rate": 7.381946669573971e-06, "loss": 0.5012, "step": 2550 }, { "epoch": 1.8530406555739676, "grad_norm": 0.36701161656457704, "learning_rate": 7.3798474646850034e-06, "loss": 0.5013, "step": 2551 }, { "epoch": 1.8537670533221347, "grad_norm": 0.33308540835740597, "learning_rate": 7.377747717299373e-06, "loss": 0.5023, "step": 2552 }, { "epoch": 1.8544934510703017, "grad_norm": 0.3908552653407151, "learning_rate": 7.375647427895725e-06, "loss": 0.5154, "step": 2553 }, { "epoch": 1.8552198488184688, "grad_norm": 0.3887867886469977, "learning_rate": 7.373546596952829e-06, "loss": 0.5071, "step": 2554 }, { "epoch": 1.8559462465666356, "grad_norm": 0.4647679307155514, "learning_rate": 7.371445224949581e-06, "loss": 0.5118, "step": 2555 }, { "epoch": 1.8566726443148025, "grad_norm": 0.44954855385912307, "learning_rate": 7.369343312364994e-06, "loss": 0.5154, "step": 2556 }, { "epoch": 1.8573990420629696, "grad_norm": 0.38040083522847523, "learning_rate": 7.367240859678212e-06, "loss": 0.5199, "step": 2557 }, { "epoch": 1.8581254398111366, "grad_norm": 0.4046416379097866, "learning_rate": 7.365137867368497e-06, "loss": 0.5123, "step": 2558 }, { "epoch": 1.8588518375593037, "grad_norm": 0.3872560720187017, "learning_rate": 7.363034335915234e-06, "loss": 0.5152, "step": 2559 }, { "epoch": 1.8595782353074706, "grad_norm": 0.36183881336332757, "learning_rate": 7.360930265797934e-06, "loss": 0.505, "step": 2560 }, { "epoch": 1.8603046330556374, "grad_norm": 0.4649911594672085, "learning_rate": 7.358825657496228e-06, "loss": 0.5054, "step": 2561 }, { "epoch": 1.8610310308038045, "grad_norm": 0.3458025268789646, "learning_rate": 7.356720511489873e-06, "loss": 0.4952, "step": 2562 }, { "epoch": 1.8617574285519716, "grad_norm": 0.45459208605712137, "learning_rate": 7.354614828258741e-06, "loss": 0.529, "step": 2563 }, { "epoch": 1.8624838263001384, "grad_norm": 0.33773416586056954, "learning_rate": 7.352508608282838e-06, "loss": 0.5128, "step": 2564 }, { "epoch": 1.8632102240483055, "grad_norm": 0.3808978498416382, "learning_rate": 7.3504018520422825e-06, "loss": 0.4986, "step": 2565 }, { "epoch": 1.8639366217964723, "grad_norm": 0.37701174485289957, "learning_rate": 7.3482945600173195e-06, "loss": 0.512, "step": 2566 }, { "epoch": 1.8646630195446394, "grad_norm": 0.38208771506041556, "learning_rate": 7.346186732688314e-06, "loss": 0.5125, "step": 2567 }, { "epoch": 1.8653894172928065, "grad_norm": 0.3494274116064339, "learning_rate": 7.344078370535757e-06, "loss": 0.517, "step": 2568 }, { "epoch": 1.8661158150409733, "grad_norm": 0.3780497325193841, "learning_rate": 7.341969474040257e-06, "loss": 0.4967, "step": 2569 }, { "epoch": 1.8668422127891402, "grad_norm": 0.4486675122297359, "learning_rate": 7.339860043682548e-06, "loss": 0.5147, "step": 2570 }, { "epoch": 1.8675686105373073, "grad_norm": 0.4138925018957802, "learning_rate": 7.337750079943483e-06, "loss": 0.4959, "step": 2571 }, { "epoch": 1.8682950082854743, "grad_norm": 0.44735718137758135, "learning_rate": 7.335639583304037e-06, "loss": 0.5104, "step": 2572 }, { "epoch": 1.8690214060336414, "grad_norm": 0.3372381434600118, "learning_rate": 7.333528554245307e-06, "loss": 0.5245, "step": 2573 }, { "epoch": 1.8697478037818083, "grad_norm": 0.4449369599629364, "learning_rate": 7.331416993248511e-06, "loss": 0.5205, "step": 2574 }, { "epoch": 1.8704742015299751, "grad_norm": 0.36217733835115423, "learning_rate": 7.329304900794991e-06, "loss": 0.499, "step": 2575 }, { "epoch": 1.8712005992781422, "grad_norm": 0.3573225113196394, "learning_rate": 7.327192277366206e-06, "loss": 0.5075, "step": 2576 }, { "epoch": 1.8719269970263093, "grad_norm": 0.3530999968916237, "learning_rate": 7.325079123443737e-06, "loss": 0.5111, "step": 2577 }, { "epoch": 1.8726533947744763, "grad_norm": 0.381411425903793, "learning_rate": 7.322965439509287e-06, "loss": 0.5145, "step": 2578 }, { "epoch": 1.8733797925226432, "grad_norm": 0.43926138695135614, "learning_rate": 7.3208512260446805e-06, "loss": 0.5094, "step": 2579 }, { "epoch": 1.87410619027081, "grad_norm": 0.48230232367488945, "learning_rate": 7.318736483531861e-06, "loss": 0.4854, "step": 2580 }, { "epoch": 1.8748325880189771, "grad_norm": 0.41277915585386155, "learning_rate": 7.316621212452895e-06, "loss": 0.4943, "step": 2581 }, { "epoch": 1.8755589857671442, "grad_norm": 0.3723827383864031, "learning_rate": 7.314505413289964e-06, "loss": 0.5323, "step": 2582 }, { "epoch": 1.8762853835153113, "grad_norm": 0.3684546635367408, "learning_rate": 7.3123890865253765e-06, "loss": 0.5198, "step": 2583 }, { "epoch": 1.8770117812634781, "grad_norm": 0.4422550701185108, "learning_rate": 7.310272232641559e-06, "loss": 0.4991, "step": 2584 }, { "epoch": 1.877738179011645, "grad_norm": 0.35634373073083137, "learning_rate": 7.308154852121057e-06, "loss": 0.5054, "step": 2585 }, { "epoch": 1.878464576759812, "grad_norm": 0.3807604321924993, "learning_rate": 7.306036945446535e-06, "loss": 0.5041, "step": 2586 }, { "epoch": 1.8791909745079791, "grad_norm": 0.43867415636051965, "learning_rate": 7.30391851310078e-06, "loss": 0.4928, "step": 2587 }, { "epoch": 1.879917372256146, "grad_norm": 0.563585109354283, "learning_rate": 7.301799555566701e-06, "loss": 0.5268, "step": 2588 }, { "epoch": 1.8806437700043128, "grad_norm": 0.45682805335053467, "learning_rate": 7.2996800733273196e-06, "loss": 0.5193, "step": 2589 }, { "epoch": 1.88137016775248, "grad_norm": 0.4653425530708123, "learning_rate": 7.297560066865782e-06, "loss": 0.5199, "step": 2590 }, { "epoch": 1.882096565500647, "grad_norm": 0.46759940311509823, "learning_rate": 7.2954395366653545e-06, "loss": 0.4955, "step": 2591 }, { "epoch": 1.882822963248814, "grad_norm": 0.4788522864372003, "learning_rate": 7.293318483209419e-06, "loss": 0.52, "step": 2592 }, { "epoch": 1.883549360996981, "grad_norm": 0.3746450129791861, "learning_rate": 7.29119690698148e-06, "loss": 0.5057, "step": 2593 }, { "epoch": 1.8842757587451477, "grad_norm": 0.4550524021827555, "learning_rate": 7.289074808465162e-06, "loss": 0.507, "step": 2594 }, { "epoch": 1.8850021564933148, "grad_norm": 0.36358004633730967, "learning_rate": 7.286952188144204e-06, "loss": 0.509, "step": 2595 }, { "epoch": 1.885728554241482, "grad_norm": 0.3871569601109488, "learning_rate": 7.284829046502467e-06, "loss": 0.5017, "step": 2596 }, { "epoch": 1.886454951989649, "grad_norm": 0.3555485582102289, "learning_rate": 7.282705384023933e-06, "loss": 0.5198, "step": 2597 }, { "epoch": 1.8871813497378158, "grad_norm": 0.3741738055108657, "learning_rate": 7.280581201192696e-06, "loss": 0.501, "step": 2598 }, { "epoch": 1.8879077474859827, "grad_norm": 0.3219865204438388, "learning_rate": 7.278456498492975e-06, "loss": 0.5235, "step": 2599 }, { "epoch": 1.8886341452341497, "grad_norm": 0.40093648121874215, "learning_rate": 7.2763312764091055e-06, "loss": 0.4906, "step": 2600 }, { "epoch": 1.8893605429823168, "grad_norm": 0.3942335836495865, "learning_rate": 7.274205535425543e-06, "loss": 0.5226, "step": 2601 }, { "epoch": 1.8900869407304839, "grad_norm": 0.3743931565246306, "learning_rate": 7.272079276026855e-06, "loss": 0.5086, "step": 2602 }, { "epoch": 1.8908133384786507, "grad_norm": 0.3921609659047992, "learning_rate": 7.269952498697734e-06, "loss": 0.4963, "step": 2603 }, { "epoch": 1.8915397362268176, "grad_norm": 0.4079084279428097, "learning_rate": 7.26782520392299e-06, "loss": 0.4983, "step": 2604 }, { "epoch": 1.8922661339749847, "grad_norm": 0.4573292211751414, "learning_rate": 7.265697392187547e-06, "loss": 0.5138, "step": 2605 }, { "epoch": 1.8929925317231517, "grad_norm": 0.39025123088916785, "learning_rate": 7.263569063976449e-06, "loss": 0.5126, "step": 2606 }, { "epoch": 1.8937189294713186, "grad_norm": 0.527907757792281, "learning_rate": 7.261440219774858e-06, "loss": 0.5029, "step": 2607 }, { "epoch": 1.8944453272194857, "grad_norm": 0.3581702843772708, "learning_rate": 7.2593108600680524e-06, "loss": 0.5176, "step": 2608 }, { "epoch": 1.8951717249676525, "grad_norm": 0.39048666471502463, "learning_rate": 7.257180985341432e-06, "loss": 0.5152, "step": 2609 }, { "epoch": 1.8958981227158196, "grad_norm": 0.42110401723381574, "learning_rate": 7.25505059608051e-06, "loss": 0.5083, "step": 2610 }, { "epoch": 1.8966245204639867, "grad_norm": 0.4571340129019425, "learning_rate": 7.252919692770916e-06, "loss": 0.5125, "step": 2611 }, { "epoch": 1.8973509182121535, "grad_norm": 0.36420915076613447, "learning_rate": 7.250788275898401e-06, "loss": 0.5118, "step": 2612 }, { "epoch": 1.8980773159603204, "grad_norm": 0.37701735727524127, "learning_rate": 7.248656345948828e-06, "loss": 0.5109, "step": 2613 }, { "epoch": 1.8988037137084874, "grad_norm": 0.4371017222186262, "learning_rate": 7.2465239034081835e-06, "loss": 0.4916, "step": 2614 }, { "epoch": 1.8995301114566545, "grad_norm": 0.3654714865890906, "learning_rate": 7.244390948762566e-06, "loss": 0.5172, "step": 2615 }, { "epoch": 1.9002565092048216, "grad_norm": 0.47754877856771066, "learning_rate": 7.242257482498191e-06, "loss": 0.5287, "step": 2616 }, { "epoch": 1.9009829069529884, "grad_norm": 0.36891648606279626, "learning_rate": 7.2401235051013885e-06, "loss": 0.5142, "step": 2617 }, { "epoch": 1.9017093047011553, "grad_norm": 0.4223943980005817, "learning_rate": 7.237989017058614e-06, "loss": 0.4889, "step": 2618 }, { "epoch": 1.9024357024493224, "grad_norm": 0.35807149966277446, "learning_rate": 7.235854018856429e-06, "loss": 0.509, "step": 2619 }, { "epoch": 1.9031621001974894, "grad_norm": 0.4103889588551492, "learning_rate": 7.23371851098152e-06, "loss": 0.5074, "step": 2620 }, { "epoch": 1.9038884979456565, "grad_norm": 0.47569057216314453, "learning_rate": 7.23158249392068e-06, "loss": 0.5167, "step": 2621 }, { "epoch": 1.9046148956938234, "grad_norm": 0.7088526151457004, "learning_rate": 7.2294459681608275e-06, "loss": 0.5204, "step": 2622 }, { "epoch": 1.9053412934419902, "grad_norm": 0.4343984003997984, "learning_rate": 7.22730893418899e-06, "loss": 0.5124, "step": 2623 }, { "epoch": 1.9060676911901573, "grad_norm": 0.3905184752532141, "learning_rate": 7.225171392492316e-06, "loss": 0.5247, "step": 2624 }, { "epoch": 1.9067940889383244, "grad_norm": 0.35839885716842484, "learning_rate": 7.223033343558068e-06, "loss": 0.52, "step": 2625 }, { "epoch": 1.9075204866864914, "grad_norm": 0.40567822205512094, "learning_rate": 7.220894787873621e-06, "loss": 0.5013, "step": 2626 }, { "epoch": 1.9082468844346583, "grad_norm": 0.49459869136202017, "learning_rate": 7.218755725926471e-06, "loss": 0.5093, "step": 2627 }, { "epoch": 1.9089732821828251, "grad_norm": 0.3794644738069637, "learning_rate": 7.216616158204223e-06, "loss": 0.4999, "step": 2628 }, { "epoch": 1.9096996799309922, "grad_norm": 0.4855621032827998, "learning_rate": 7.214476085194605e-06, "loss": 0.5087, "step": 2629 }, { "epoch": 1.9104260776791593, "grad_norm": 0.36297925771622874, "learning_rate": 7.212335507385453e-06, "loss": 0.5063, "step": 2630 }, { "epoch": 1.9111524754273261, "grad_norm": 0.3944990999511598, "learning_rate": 7.210194425264723e-06, "loss": 0.5068, "step": 2631 }, { "epoch": 1.9118788731754932, "grad_norm": 0.4122143059291724, "learning_rate": 7.208052839320481e-06, "loss": 0.5162, "step": 2632 }, { "epoch": 1.91260527092366, "grad_norm": 0.41692514963900895, "learning_rate": 7.205910750040914e-06, "loss": 0.4828, "step": 2633 }, { "epoch": 1.9133316686718271, "grad_norm": 0.3663441755784621, "learning_rate": 7.203768157914321e-06, "loss": 0.494, "step": 2634 }, { "epoch": 1.9140580664199942, "grad_norm": 0.4070037156583176, "learning_rate": 7.201625063429113e-06, "loss": 0.5121, "step": 2635 }, { "epoch": 1.914784464168161, "grad_norm": 0.4380005820938112, "learning_rate": 7.199481467073819e-06, "loss": 0.5157, "step": 2636 }, { "epoch": 1.915510861916328, "grad_norm": 0.39134139767667614, "learning_rate": 7.197337369337081e-06, "loss": 0.5115, "step": 2637 }, { "epoch": 1.916237259664495, "grad_norm": 0.3780955623983413, "learning_rate": 7.1951927707076545e-06, "loss": 0.5138, "step": 2638 }, { "epoch": 1.916963657412662, "grad_norm": 0.37420155415019496, "learning_rate": 7.193047671674411e-06, "loss": 0.5109, "step": 2639 }, { "epoch": 1.9176900551608291, "grad_norm": 0.3975633927338851, "learning_rate": 7.190902072726336e-06, "loss": 0.5175, "step": 2640 }, { "epoch": 1.918416452908996, "grad_norm": 0.41788467337021007, "learning_rate": 7.188755974352528e-06, "loss": 0.5034, "step": 2641 }, { "epoch": 1.9191428506571628, "grad_norm": 0.36881038549681205, "learning_rate": 7.186609377042199e-06, "loss": 0.4932, "step": 2642 }, { "epoch": 1.91986924840533, "grad_norm": 0.4923105516525616, "learning_rate": 7.1844622812846745e-06, "loss": 0.5074, "step": 2643 }, { "epoch": 1.920595646153497, "grad_norm": 0.4675468904655994, "learning_rate": 7.182314687569395e-06, "loss": 0.4971, "step": 2644 }, { "epoch": 1.921322043901664, "grad_norm": 0.33391764389643824, "learning_rate": 7.180166596385915e-06, "loss": 0.5141, "step": 2645 }, { "epoch": 1.922048441649831, "grad_norm": 0.37264094779898216, "learning_rate": 7.1780180082238994e-06, "loss": 0.5082, "step": 2646 }, { "epoch": 1.9227748393979978, "grad_norm": 0.33950669946292117, "learning_rate": 7.175868923573129e-06, "loss": 0.519, "step": 2647 }, { "epoch": 1.9235012371461648, "grad_norm": 0.3907280044051392, "learning_rate": 7.1737193429234985e-06, "loss": 0.5033, "step": 2648 }, { "epoch": 1.924227634894332, "grad_norm": 0.3981209785053024, "learning_rate": 7.171569266765012e-06, "loss": 0.5113, "step": 2649 }, { "epoch": 1.9249540326424988, "grad_norm": 0.3552940618648031, "learning_rate": 7.169418695587791e-06, "loss": 0.5262, "step": 2650 }, { "epoch": 1.9256804303906658, "grad_norm": 0.45671174402732945, "learning_rate": 7.167267629882066e-06, "loss": 0.5087, "step": 2651 }, { "epoch": 1.9264068281388327, "grad_norm": 0.36036782434775116, "learning_rate": 7.165116070138183e-06, "loss": 0.5093, "step": 2652 }, { "epoch": 1.9271332258869998, "grad_norm": 0.36770341112039956, "learning_rate": 7.162964016846597e-06, "loss": 0.5015, "step": 2653 }, { "epoch": 1.9278596236351668, "grad_norm": 0.4171887047487715, "learning_rate": 7.160811470497881e-06, "loss": 0.4875, "step": 2654 }, { "epoch": 1.9285860213833337, "grad_norm": 0.7588013368151402, "learning_rate": 7.158658431582718e-06, "loss": 0.4812, "step": 2655 }, { "epoch": 1.9293124191315005, "grad_norm": 0.4838546627793747, "learning_rate": 7.156504900591899e-06, "loss": 0.5061, "step": 2656 }, { "epoch": 1.9300388168796676, "grad_norm": 0.3893073278129443, "learning_rate": 7.154350878016332e-06, "loss": 0.4997, "step": 2657 }, { "epoch": 1.9307652146278347, "grad_norm": 0.35598578094946026, "learning_rate": 7.152196364347037e-06, "loss": 0.4818, "step": 2658 }, { "epoch": 1.9314916123760018, "grad_norm": 0.7028633945623345, "learning_rate": 7.1500413600751465e-06, "loss": 0.5064, "step": 2659 }, { "epoch": 1.9322180101241686, "grad_norm": 0.5021000366038473, "learning_rate": 7.147885865691899e-06, "loss": 0.4953, "step": 2660 }, { "epoch": 1.9329444078723355, "grad_norm": 0.4521851195168597, "learning_rate": 7.145729881688651e-06, "loss": 0.508, "step": 2661 }, { "epoch": 1.9336708056205025, "grad_norm": 0.32339465479764373, "learning_rate": 7.143573408556867e-06, "loss": 0.4954, "step": 2662 }, { "epoch": 1.9343972033686696, "grad_norm": 0.401677364787166, "learning_rate": 7.141416446788128e-06, "loss": 0.4913, "step": 2663 }, { "epoch": 1.9351236011168367, "grad_norm": 0.41714933742992666, "learning_rate": 7.139258996874122e-06, "loss": 0.5232, "step": 2664 }, { "epoch": 1.9358499988650035, "grad_norm": 0.4588630688440973, "learning_rate": 7.137101059306647e-06, "loss": 0.5167, "step": 2665 }, { "epoch": 1.9365763966131704, "grad_norm": 0.5128126005709326, "learning_rate": 7.134942634577615e-06, "loss": 0.5073, "step": 2666 }, { "epoch": 1.9373027943613375, "grad_norm": 0.3220816956148639, "learning_rate": 7.1327837231790484e-06, "loss": 0.4918, "step": 2667 }, { "epoch": 1.9380291921095045, "grad_norm": 0.3864453720966872, "learning_rate": 7.1306243256030815e-06, "loss": 0.5051, "step": 2668 }, { "epoch": 1.9387555898576716, "grad_norm": 0.3417311969733972, "learning_rate": 7.128464442341958e-06, "loss": 0.4992, "step": 2669 }, { "epoch": 1.9394819876058385, "grad_norm": 0.3666997641598726, "learning_rate": 7.126304073888033e-06, "loss": 0.4915, "step": 2670 }, { "epoch": 1.9402083853540053, "grad_norm": 0.32912302124120496, "learning_rate": 7.124143220733772e-06, "loss": 0.5132, "step": 2671 }, { "epoch": 1.9409347831021724, "grad_norm": 0.5894784767394627, "learning_rate": 7.121981883371748e-06, "loss": 0.5106, "step": 2672 }, { "epoch": 1.9416611808503395, "grad_norm": 0.36134194753601506, "learning_rate": 7.1198200622946516e-06, "loss": 0.5322, "step": 2673 }, { "epoch": 1.9423875785985063, "grad_norm": 0.5233500856029822, "learning_rate": 7.117657757995277e-06, "loss": 0.5006, "step": 2674 }, { "epoch": 1.9431139763466734, "grad_norm": 0.45441916828563017, "learning_rate": 7.115494970966532e-06, "loss": 0.5015, "step": 2675 }, { "epoch": 1.9438403740948402, "grad_norm": 0.42922725575066273, "learning_rate": 7.113331701701433e-06, "loss": 0.5118, "step": 2676 }, { "epoch": 1.9445667718430073, "grad_norm": 0.37288209324065047, "learning_rate": 7.111167950693106e-06, "loss": 0.5133, "step": 2677 }, { "epoch": 1.9452931695911744, "grad_norm": 0.5428767194900757, "learning_rate": 7.109003718434788e-06, "loss": 0.5109, "step": 2678 }, { "epoch": 1.9460195673393412, "grad_norm": 0.48476276527501894, "learning_rate": 7.106839005419825e-06, "loss": 0.5195, "step": 2679 }, { "epoch": 1.946745965087508, "grad_norm": 0.42779549179479115, "learning_rate": 7.104673812141676e-06, "loss": 0.5278, "step": 2680 }, { "epoch": 1.9474723628356752, "grad_norm": 0.4172577512072656, "learning_rate": 7.102508139093902e-06, "loss": 0.5225, "step": 2681 }, { "epoch": 1.9481987605838422, "grad_norm": 0.6445236653546356, "learning_rate": 7.100341986770178e-06, "loss": 0.4956, "step": 2682 }, { "epoch": 1.9489251583320093, "grad_norm": 0.36704041066233434, "learning_rate": 7.09817535566429e-06, "loss": 0.5056, "step": 2683 }, { "epoch": 1.9496515560801762, "grad_norm": 0.42608468891834045, "learning_rate": 7.0960082462701315e-06, "loss": 0.4976, "step": 2684 }, { "epoch": 1.950377953828343, "grad_norm": 0.3725547949696026, "learning_rate": 7.093840659081702e-06, "loss": 0.5209, "step": 2685 }, { "epoch": 1.95110435157651, "grad_norm": 0.347960151425129, "learning_rate": 7.091672594593114e-06, "loss": 0.4956, "step": 2686 }, { "epoch": 1.9518307493246772, "grad_norm": 0.3919601507999021, "learning_rate": 7.089504053298587e-06, "loss": 0.499, "step": 2687 }, { "epoch": 1.9525571470728442, "grad_norm": 0.43156269797326463, "learning_rate": 7.0873350356924495e-06, "loss": 0.5023, "step": 2688 }, { "epoch": 1.953283544821011, "grad_norm": 0.34776606928147463, "learning_rate": 7.085165542269139e-06, "loss": 0.4955, "step": 2689 }, { "epoch": 1.954009942569178, "grad_norm": 0.7017977923984215, "learning_rate": 7.082995573523202e-06, "loss": 0.4999, "step": 2690 }, { "epoch": 1.954736340317345, "grad_norm": 0.4815505166236814, "learning_rate": 7.080825129949289e-06, "loss": 0.5146, "step": 2691 }, { "epoch": 1.955462738065512, "grad_norm": 0.43130480748007916, "learning_rate": 7.0786542120421645e-06, "loss": 0.5215, "step": 2692 }, { "epoch": 1.956189135813679, "grad_norm": 0.38558580874476744, "learning_rate": 7.0764828202966986e-06, "loss": 0.5011, "step": 2693 }, { "epoch": 1.956915533561846, "grad_norm": 0.4036358786260398, "learning_rate": 7.074310955207869e-06, "loss": 0.5061, "step": 2694 }, { "epoch": 1.9576419313100129, "grad_norm": 0.33067727483602, "learning_rate": 7.072138617270765e-06, "loss": 0.4895, "step": 2695 }, { "epoch": 1.95836832905818, "grad_norm": 0.5605426386928977, "learning_rate": 7.069965806980574e-06, "loss": 0.5062, "step": 2696 }, { "epoch": 1.959094726806347, "grad_norm": 0.35218801878956085, "learning_rate": 7.067792524832604e-06, "loss": 0.5043, "step": 2697 }, { "epoch": 1.9598211245545138, "grad_norm": 0.3887738969745063, "learning_rate": 7.0656187713222615e-06, "loss": 0.5165, "step": 2698 }, { "epoch": 1.9605475223026807, "grad_norm": 0.3868917511130432, "learning_rate": 7.063444546945063e-06, "loss": 0.5117, "step": 2699 }, { "epoch": 1.9612739200508478, "grad_norm": 0.3402687842463978, "learning_rate": 7.061269852196633e-06, "loss": 0.4885, "step": 2700 }, { "epoch": 1.9620003177990148, "grad_norm": 0.36804469030096626, "learning_rate": 7.059094687572701e-06, "loss": 0.509, "step": 2701 }, { "epoch": 1.962726715547182, "grad_norm": 0.39457259575266557, "learning_rate": 7.056919053569109e-06, "loss": 0.4954, "step": 2702 }, { "epoch": 1.9634531132953488, "grad_norm": 0.3204056385239754, "learning_rate": 7.0547429506817986e-06, "loss": 0.512, "step": 2703 }, { "epoch": 1.9641795110435156, "grad_norm": 0.3629638300335219, "learning_rate": 7.052566379406824e-06, "loss": 0.4915, "step": 2704 }, { "epoch": 1.9649059087916827, "grad_norm": 0.4589761089027009, "learning_rate": 7.050389340240345e-06, "loss": 0.4938, "step": 2705 }, { "epoch": 1.9656323065398498, "grad_norm": 0.5304412242646968, "learning_rate": 7.048211833678624e-06, "loss": 0.5124, "step": 2706 }, { "epoch": 1.9663587042880168, "grad_norm": 0.36321087701659993, "learning_rate": 7.046033860218036e-06, "loss": 0.4953, "step": 2707 }, { "epoch": 1.9670851020361837, "grad_norm": 0.4130645277458678, "learning_rate": 7.04385542035506e-06, "loss": 0.4892, "step": 2708 }, { "epoch": 1.9678114997843505, "grad_norm": 0.35499258033837994, "learning_rate": 7.041676514586279e-06, "loss": 0.4795, "step": 2709 }, { "epoch": 1.9685378975325176, "grad_norm": 0.3760642593459764, "learning_rate": 7.039497143408384e-06, "loss": 0.5112, "step": 2710 }, { "epoch": 1.9692642952806847, "grad_norm": 0.39847541288606975, "learning_rate": 7.037317307318172e-06, "loss": 0.5131, "step": 2711 }, { "epoch": 1.9699906930288518, "grad_norm": 0.3325102212417593, "learning_rate": 7.035137006812548e-06, "loss": 0.5095, "step": 2712 }, { "epoch": 1.9707170907770186, "grad_norm": 0.41951767852434335, "learning_rate": 7.03295624238852e-06, "loss": 0.5029, "step": 2713 }, { "epoch": 1.9714434885251855, "grad_norm": 0.3882568800021323, "learning_rate": 7.030775014543204e-06, "loss": 0.5161, "step": 2714 }, { "epoch": 1.9721698862733525, "grad_norm": 0.3246175477769834, "learning_rate": 7.028593323773819e-06, "loss": 0.5173, "step": 2715 }, { "epoch": 1.9728962840215196, "grad_norm": 0.4320032858507335, "learning_rate": 7.026411170577691e-06, "loss": 0.4991, "step": 2716 }, { "epoch": 1.9736226817696865, "grad_norm": 0.3582516058922341, "learning_rate": 7.024228555452253e-06, "loss": 0.4979, "step": 2717 }, { "epoch": 1.9743490795178535, "grad_norm": 0.3865649105180352, "learning_rate": 7.022045478895038e-06, "loss": 0.5052, "step": 2718 }, { "epoch": 1.9750754772660204, "grad_norm": 0.3865623049303795, "learning_rate": 7.019861941403693e-06, "loss": 0.5102, "step": 2719 }, { "epoch": 1.9758018750141875, "grad_norm": 0.3744366218022015, "learning_rate": 7.017677943475962e-06, "loss": 0.5037, "step": 2720 }, { "epoch": 1.9765282727623545, "grad_norm": 0.4049135491697213, "learning_rate": 7.015493485609697e-06, "loss": 0.5103, "step": 2721 }, { "epoch": 1.9772546705105214, "grad_norm": 0.41669353889570854, "learning_rate": 7.013308568302855e-06, "loss": 0.496, "step": 2722 }, { "epoch": 1.9779810682586882, "grad_norm": 1.55599304714908, "learning_rate": 7.011123192053497e-06, "loss": 0.4885, "step": 2723 }, { "epoch": 1.9787074660068553, "grad_norm": 0.3766075391392106, "learning_rate": 7.00893735735979e-06, "loss": 0.5091, "step": 2724 }, { "epoch": 1.9794338637550224, "grad_norm": 0.39432289032043133, "learning_rate": 7.006751064720003e-06, "loss": 0.5043, "step": 2725 }, { "epoch": 1.9801602615031895, "grad_norm": 0.42252648320613373, "learning_rate": 7.004564314632514e-06, "loss": 0.4971, "step": 2726 }, { "epoch": 1.9808866592513563, "grad_norm": 0.5220318083615987, "learning_rate": 7.002377107595799e-06, "loss": 0.5131, "step": 2727 }, { "epoch": 1.9816130569995232, "grad_norm": 0.439124427096527, "learning_rate": 7.000189444108443e-06, "loss": 0.4987, "step": 2728 }, { "epoch": 1.9823394547476902, "grad_norm": 0.6087596878574801, "learning_rate": 6.998001324669135e-06, "loss": 0.5007, "step": 2729 }, { "epoch": 1.9830658524958573, "grad_norm": 0.4689995890970593, "learning_rate": 6.995812749776663e-06, "loss": 0.4928, "step": 2730 }, { "epoch": 1.9837922502440244, "grad_norm": 0.4303674417050366, "learning_rate": 6.993623719929924e-06, "loss": 0.4972, "step": 2731 }, { "epoch": 1.9845186479921912, "grad_norm": 1.7806687410524744, "learning_rate": 6.991434235627918e-06, "loss": 0.5007, "step": 2732 }, { "epoch": 1.985245045740358, "grad_norm": 0.45617373447370785, "learning_rate": 6.989244297369746e-06, "loss": 0.5038, "step": 2733 }, { "epoch": 1.9859714434885252, "grad_norm": 0.39328968595405445, "learning_rate": 6.9870539056546145e-06, "loss": 0.5095, "step": 2734 }, { "epoch": 1.9866978412366922, "grad_norm": 0.43161678194415737, "learning_rate": 6.984863060981835e-06, "loss": 0.5066, "step": 2735 }, { "epoch": 1.987424238984859, "grad_norm": 0.3628627971741911, "learning_rate": 6.982671763850814e-06, "loss": 0.4919, "step": 2736 }, { "epoch": 1.9881506367330262, "grad_norm": 0.5086084610925337, "learning_rate": 6.980480014761074e-06, "loss": 0.51, "step": 2737 }, { "epoch": 1.988877034481193, "grad_norm": 0.3982110835234876, "learning_rate": 6.9782878142122315e-06, "loss": 0.5163, "step": 2738 }, { "epoch": 1.98960343222936, "grad_norm": 0.3966667256287206, "learning_rate": 6.976095162704006e-06, "loss": 0.4951, "step": 2739 }, { "epoch": 1.9903298299775272, "grad_norm": 0.5304184543224457, "learning_rate": 6.973902060736226e-06, "loss": 0.5118, "step": 2740 }, { "epoch": 1.991056227725694, "grad_norm": 0.3981730445998376, "learning_rate": 6.971708508808815e-06, "loss": 0.5081, "step": 2741 }, { "epoch": 1.9917826254738609, "grad_norm": 0.5015706091927886, "learning_rate": 6.969514507421805e-06, "loss": 0.5093, "step": 2742 }, { "epoch": 1.992509023222028, "grad_norm": 0.37103098758948716, "learning_rate": 6.967320057075329e-06, "loss": 0.4965, "step": 2743 }, { "epoch": 1.993235420970195, "grad_norm": 0.5781372172121526, "learning_rate": 6.965125158269619e-06, "loss": 0.5011, "step": 2744 }, { "epoch": 1.993961818718362, "grad_norm": 0.4157387952447532, "learning_rate": 6.962929811505013e-06, "loss": 0.5058, "step": 2745 }, { "epoch": 1.994688216466529, "grad_norm": 0.4072625761207944, "learning_rate": 6.9607340172819495e-06, "loss": 0.5087, "step": 2746 }, { "epoch": 1.9954146142146958, "grad_norm": 0.4575323537660217, "learning_rate": 6.958537776100971e-06, "loss": 0.4713, "step": 2747 }, { "epoch": 1.9961410119628629, "grad_norm": 0.4236304013107574, "learning_rate": 6.9563410884627195e-06, "loss": 0.4951, "step": 2748 }, { "epoch": 1.99686740971103, "grad_norm": 0.3395318180298695, "learning_rate": 6.9541439548679394e-06, "loss": 0.5169, "step": 2749 }, { "epoch": 1.997593807459197, "grad_norm": 0.40012977930638305, "learning_rate": 6.9519463758174745e-06, "loss": 0.4898, "step": 2750 }, { "epoch": 1.9983202052073639, "grad_norm": 0.6044468828057461, "learning_rate": 6.949748351812277e-06, "loss": 0.4968, "step": 2751 }, { "epoch": 1.9990466029555307, "grad_norm": 0.3290317646035132, "learning_rate": 6.947549883353393e-06, "loss": 0.4896, "step": 2752 }, { "epoch": 1.9997730007036978, "grad_norm": 0.4191772501374024, "learning_rate": 6.9453509709419754e-06, "loss": 0.5034, "step": 2753 }, { "epoch": 2.000499398451865, "grad_norm": 0.4383592732805349, "learning_rate": 6.943151615079273e-06, "loss": 0.4997, "step": 2754 }, { "epoch": 2.001225796200032, "grad_norm": 0.4310380742661681, "learning_rate": 6.9409518162666416e-06, "loss": 0.5013, "step": 2755 }, { "epoch": 2.0019521939481986, "grad_norm": 0.6634042092614508, "learning_rate": 6.938751575005531e-06, "loss": 0.5085, "step": 2756 }, { "epoch": 2.0026785916963656, "grad_norm": 0.43186399669748915, "learning_rate": 6.9365508917975e-06, "loss": 0.5018, "step": 2757 }, { "epoch": 2.0034049894445327, "grad_norm": 0.34231458896554495, "learning_rate": 6.934349767144203e-06, "loss": 0.4969, "step": 2758 }, { "epoch": 2.0041313871927, "grad_norm": 0.3852057248556303, "learning_rate": 6.932148201547395e-06, "loss": 0.5099, "step": 2759 }, { "epoch": 2.004857784940867, "grad_norm": 0.5008104603895283, "learning_rate": 6.929946195508933e-06, "loss": 0.5165, "step": 2760 }, { "epoch": 2.0055841826890335, "grad_norm": 0.3317056532033946, "learning_rate": 6.9277437495307745e-06, "loss": 0.5002, "step": 2761 }, { "epoch": 2.0063105804372006, "grad_norm": 0.5412828602249329, "learning_rate": 6.925540864114975e-06, "loss": 0.4962, "step": 2762 }, { "epoch": 2.0070369781853676, "grad_norm": 0.3430388566201603, "learning_rate": 6.923337539763693e-06, "loss": 0.4937, "step": 2763 }, { "epoch": 2.0077633759335347, "grad_norm": 0.3647393285999755, "learning_rate": 6.921133776979186e-06, "loss": 0.4975, "step": 2764 }, { "epoch": 2.008489773681702, "grad_norm": 0.40829618718724847, "learning_rate": 6.918929576263811e-06, "loss": 0.4984, "step": 2765 }, { "epoch": 2.0092161714298684, "grad_norm": 0.40555403400281487, "learning_rate": 6.916724938120026e-06, "loss": 0.5066, "step": 2766 }, { "epoch": 2.0099425691780355, "grad_norm": 0.34064395872559766, "learning_rate": 6.914519863050388e-06, "loss": 0.5029, "step": 2767 }, { "epoch": 2.0106689669262026, "grad_norm": 0.38967717514091643, "learning_rate": 6.912314351557552e-06, "loss": 0.5132, "step": 2768 }, { "epoch": 2.0113953646743696, "grad_norm": 0.3360598328932662, "learning_rate": 6.910108404144276e-06, "loss": 0.4864, "step": 2769 }, { "epoch": 2.0121217624225367, "grad_norm": 0.36912864206432744, "learning_rate": 6.907902021313414e-06, "loss": 0.4959, "step": 2770 }, { "epoch": 2.0128481601707033, "grad_norm": 0.35828330181110757, "learning_rate": 6.905695203567919e-06, "loss": 0.4931, "step": 2771 }, { "epoch": 2.0135745579188704, "grad_norm": 0.3555368715816114, "learning_rate": 6.90348795141085e-06, "loss": 0.5029, "step": 2772 }, { "epoch": 2.0143009556670375, "grad_norm": 0.33659315686379043, "learning_rate": 6.901280265345355e-06, "loss": 0.4989, "step": 2773 }, { "epoch": 2.0150273534152046, "grad_norm": 0.37550584227442796, "learning_rate": 6.8990721458746875e-06, "loss": 0.5201, "step": 2774 }, { "epoch": 2.015753751163371, "grad_norm": 0.3783837186018702, "learning_rate": 6.896863593502198e-06, "loss": 0.5071, "step": 2775 }, { "epoch": 2.0164801489115383, "grad_norm": 0.3258935438932034, "learning_rate": 6.894654608731335e-06, "loss": 0.5182, "step": 2776 }, { "epoch": 2.0172065466597053, "grad_norm": 0.39315885025393604, "learning_rate": 6.8924451920656475e-06, "loss": 0.4923, "step": 2777 }, { "epoch": 2.0179329444078724, "grad_norm": 0.3632809798562676, "learning_rate": 6.890235344008781e-06, "loss": 0.4815, "step": 2778 }, { "epoch": 2.0186593421560395, "grad_norm": 0.36527444592888336, "learning_rate": 6.88802506506448e-06, "loss": 0.4956, "step": 2779 }, { "epoch": 2.019385739904206, "grad_norm": 0.3295945817112341, "learning_rate": 6.8858143557365865e-06, "loss": 0.5091, "step": 2780 }, { "epoch": 2.020112137652373, "grad_norm": 0.38931789216868123, "learning_rate": 6.883603216529043e-06, "loss": 0.5059, "step": 2781 }, { "epoch": 2.0208385354005403, "grad_norm": 0.3801269142307661, "learning_rate": 6.881391647945887e-06, "loss": 0.4952, "step": 2782 }, { "epoch": 2.0215649331487073, "grad_norm": 0.3763092423880549, "learning_rate": 6.879179650491257e-06, "loss": 0.4924, "step": 2783 }, { "epoch": 2.0222913308968744, "grad_norm": 0.36364396731621496, "learning_rate": 6.8769672246693865e-06, "loss": 0.487, "step": 2784 }, { "epoch": 2.023017728645041, "grad_norm": 0.48214570723774847, "learning_rate": 6.8747543709846064e-06, "loss": 0.4799, "step": 2785 }, { "epoch": 2.023744126393208, "grad_norm": 0.3373087430212078, "learning_rate": 6.872541089941347e-06, "loss": 0.5032, "step": 2786 }, { "epoch": 2.024470524141375, "grad_norm": 0.4246028661887048, "learning_rate": 6.870327382044138e-06, "loss": 0.514, "step": 2787 }, { "epoch": 2.0251969218895423, "grad_norm": 0.3637838866454337, "learning_rate": 6.868113247797601e-06, "loss": 0.4958, "step": 2788 }, { "epoch": 2.0259233196377093, "grad_norm": 0.3624459628075546, "learning_rate": 6.865898687706458e-06, "loss": 0.5183, "step": 2789 }, { "epoch": 2.026649717385876, "grad_norm": 0.3497999660081715, "learning_rate": 6.8636837022755275e-06, "loss": 0.4969, "step": 2790 }, { "epoch": 2.027376115134043, "grad_norm": 0.30925439334485977, "learning_rate": 6.8614682920097265e-06, "loss": 0.4905, "step": 2791 }, { "epoch": 2.02810251288221, "grad_norm": 0.4779644594533042, "learning_rate": 6.859252457414067e-06, "loss": 0.4983, "step": 2792 }, { "epoch": 2.028828910630377, "grad_norm": 0.4156555631857716, "learning_rate": 6.857036198993658e-06, "loss": 0.5111, "step": 2793 }, { "epoch": 2.029555308378544, "grad_norm": 0.4231134051622082, "learning_rate": 6.8548195172537045e-06, "loss": 0.5015, "step": 2794 }, { "epoch": 2.030281706126711, "grad_norm": 0.35883752557089954, "learning_rate": 6.8526024126995096e-06, "loss": 0.4901, "step": 2795 }, { "epoch": 2.031008103874878, "grad_norm": 0.5176921934711561, "learning_rate": 6.850384885836472e-06, "loss": 0.4762, "step": 2796 }, { "epoch": 2.031734501623045, "grad_norm": 0.47898011289447745, "learning_rate": 6.8481669371700865e-06, "loss": 0.5091, "step": 2797 }, { "epoch": 2.032460899371212, "grad_norm": 0.38298304746184586, "learning_rate": 6.845948567205945e-06, "loss": 0.4839, "step": 2798 }, { "epoch": 2.0331872971193787, "grad_norm": 0.49166223619894195, "learning_rate": 6.843729776449734e-06, "loss": 0.4936, "step": 2799 }, { "epoch": 2.033913694867546, "grad_norm": 0.36732722633298054, "learning_rate": 6.841510565407235e-06, "loss": 0.5045, "step": 2800 }, { "epoch": 2.034640092615713, "grad_norm": 0.5769337316874685, "learning_rate": 6.839290934584332e-06, "loss": 0.5023, "step": 2801 }, { "epoch": 2.03536649036388, "grad_norm": 0.34290008976429126, "learning_rate": 6.837070884486994e-06, "loss": 0.504, "step": 2802 }, { "epoch": 2.036092888112047, "grad_norm": 0.36473122595802115, "learning_rate": 6.8348504156212925e-06, "loss": 0.5153, "step": 2803 }, { "epoch": 2.0368192858602137, "grad_norm": 0.4304895612429296, "learning_rate": 6.832629528493395e-06, "loss": 0.4997, "step": 2804 }, { "epoch": 2.0375456836083807, "grad_norm": 0.4387726012724692, "learning_rate": 6.830408223609559e-06, "loss": 0.5105, "step": 2805 }, { "epoch": 2.038272081356548, "grad_norm": 0.9160945864401482, "learning_rate": 6.828186501476145e-06, "loss": 0.4922, "step": 2806 }, { "epoch": 2.038998479104715, "grad_norm": 0.36725867499766507, "learning_rate": 6.8259643625996016e-06, "loss": 0.5068, "step": 2807 }, { "epoch": 2.039724876852882, "grad_norm": 0.36862502375795786, "learning_rate": 6.8237418074864766e-06, "loss": 0.5122, "step": 2808 }, { "epoch": 2.0404512746010486, "grad_norm": 0.33923787542671696, "learning_rate": 6.8215188366434104e-06, "loss": 0.5058, "step": 2809 }, { "epoch": 2.0411776723492157, "grad_norm": 0.44498404380627354, "learning_rate": 6.819295450577138e-06, "loss": 0.4926, "step": 2810 }, { "epoch": 2.0419040700973827, "grad_norm": 0.33216610091469256, "learning_rate": 6.817071649794491e-06, "loss": 0.4973, "step": 2811 }, { "epoch": 2.04263046784555, "grad_norm": 0.3335571997556061, "learning_rate": 6.8148474348023954e-06, "loss": 0.4831, "step": 2812 }, { "epoch": 2.043356865593717, "grad_norm": 0.360857049482772, "learning_rate": 6.812622806107869e-06, "loss": 0.5019, "step": 2813 }, { "epoch": 2.0440832633418835, "grad_norm": 0.4074737961264872, "learning_rate": 6.810397764218027e-06, "loss": 0.5036, "step": 2814 }, { "epoch": 2.0448096610900506, "grad_norm": 0.334689546487221, "learning_rate": 6.808172309640078e-06, "loss": 0.5003, "step": 2815 }, { "epoch": 2.0455360588382177, "grad_norm": 0.37500374203543435, "learning_rate": 6.805946442881322e-06, "loss": 0.5213, "step": 2816 }, { "epoch": 2.0462624565863847, "grad_norm": 0.4206563647325095, "learning_rate": 6.803720164449155e-06, "loss": 0.4977, "step": 2817 }, { "epoch": 2.0469888543345514, "grad_norm": 0.370548084274352, "learning_rate": 6.801493474851069e-06, "loss": 0.5225, "step": 2818 }, { "epoch": 2.0477152520827184, "grad_norm": 0.354149466875892, "learning_rate": 6.799266374594646e-06, "loss": 0.4976, "step": 2819 }, { "epoch": 2.0484416498308855, "grad_norm": 0.3426096343449712, "learning_rate": 6.797038864187564e-06, "loss": 0.5028, "step": 2820 }, { "epoch": 2.0491680475790526, "grad_norm": 0.3460308554783054, "learning_rate": 6.794810944137595e-06, "loss": 0.5117, "step": 2821 }, { "epoch": 2.0498944453272196, "grad_norm": 0.3678978614040748, "learning_rate": 6.792582614952602e-06, "loss": 0.4935, "step": 2822 }, { "epoch": 2.0506208430753863, "grad_norm": 0.3933345821668368, "learning_rate": 6.790353877140542e-06, "loss": 0.5033, "step": 2823 }, { "epoch": 2.0513472408235534, "grad_norm": 0.3994192333516289, "learning_rate": 6.788124731209466e-06, "loss": 0.4953, "step": 2824 }, { "epoch": 2.0520736385717204, "grad_norm": 0.36607993612051454, "learning_rate": 6.785895177667516e-06, "loss": 0.4963, "step": 2825 }, { "epoch": 2.0528000363198875, "grad_norm": 0.36687689120176453, "learning_rate": 6.78366521702293e-06, "loss": 0.5003, "step": 2826 }, { "epoch": 2.0535264340680546, "grad_norm": 0.42363155301268685, "learning_rate": 6.781434849784039e-06, "loss": 0.51, "step": 2827 }, { "epoch": 2.054252831816221, "grad_norm": 0.3378823422646072, "learning_rate": 6.779204076459264e-06, "loss": 0.4923, "step": 2828 }, { "epoch": 2.0549792295643883, "grad_norm": 0.41958106038730947, "learning_rate": 6.776972897557117e-06, "loss": 0.4943, "step": 2829 }, { "epoch": 2.0557056273125554, "grad_norm": 0.3637963294712784, "learning_rate": 6.774741313586206e-06, "loss": 0.5042, "step": 2830 }, { "epoch": 2.0564320250607224, "grad_norm": 0.35444308760596194, "learning_rate": 6.772509325055233e-06, "loss": 0.497, "step": 2831 }, { "epoch": 2.0571584228088895, "grad_norm": 0.30668522235604445, "learning_rate": 6.770276932472987e-06, "loss": 0.5104, "step": 2832 }, { "epoch": 2.057884820557056, "grad_norm": 0.5739622435217333, "learning_rate": 6.768044136348353e-06, "loss": 0.4828, "step": 2833 }, { "epoch": 2.058611218305223, "grad_norm": 0.4356973061131128, "learning_rate": 6.765810937190307e-06, "loss": 0.5088, "step": 2834 }, { "epoch": 2.0593376160533903, "grad_norm": 0.39513686409727394, "learning_rate": 6.763577335507913e-06, "loss": 0.4884, "step": 2835 }, { "epoch": 2.0600640138015573, "grad_norm": 0.4080599817351704, "learning_rate": 6.761343331810334e-06, "loss": 0.4983, "step": 2836 }, { "epoch": 2.060790411549724, "grad_norm": 0.4641360221646442, "learning_rate": 6.759108926606821e-06, "loss": 0.5074, "step": 2837 }, { "epoch": 2.061516809297891, "grad_norm": 0.48283517031623097, "learning_rate": 6.7568741204067145e-06, "loss": 0.4941, "step": 2838 }, { "epoch": 2.062243207046058, "grad_norm": 0.37677270766893173, "learning_rate": 6.754638913719449e-06, "loss": 0.495, "step": 2839 }, { "epoch": 2.062969604794225, "grad_norm": 0.3897940565546679, "learning_rate": 6.752403307054549e-06, "loss": 0.4832, "step": 2840 }, { "epoch": 2.0636960025423923, "grad_norm": 0.3706329128849379, "learning_rate": 6.750167300921635e-06, "loss": 0.4951, "step": 2841 }, { "epoch": 2.064422400290559, "grad_norm": 0.3677737911519454, "learning_rate": 6.747930895830409e-06, "loss": 0.4971, "step": 2842 }, { "epoch": 2.065148798038726, "grad_norm": 0.4806697321398137, "learning_rate": 6.745694092290671e-06, "loss": 0.5163, "step": 2843 }, { "epoch": 2.065875195786893, "grad_norm": 0.3258014555766835, "learning_rate": 6.74345689081231e-06, "loss": 0.4869, "step": 2844 }, { "epoch": 2.06660159353506, "grad_norm": 0.35430162341459304, "learning_rate": 6.741219291905308e-06, "loss": 0.5137, "step": 2845 }, { "epoch": 2.067327991283227, "grad_norm": 0.3735414728864755, "learning_rate": 6.738981296079734e-06, "loss": 0.5199, "step": 2846 }, { "epoch": 2.068054389031394, "grad_norm": 0.4005930008235301, "learning_rate": 6.7367429038457485e-06, "loss": 0.5184, "step": 2847 }, { "epoch": 2.068780786779561, "grad_norm": 0.46490959194042725, "learning_rate": 6.7345041157136035e-06, "loss": 0.5035, "step": 2848 }, { "epoch": 2.069507184527728, "grad_norm": 0.336522204114012, "learning_rate": 6.7322649321936395e-06, "loss": 0.4975, "step": 2849 }, { "epoch": 2.070233582275895, "grad_norm": 0.3625662873299568, "learning_rate": 6.7300253537962905e-06, "loss": 0.5191, "step": 2850 }, { "epoch": 2.070959980024062, "grad_norm": 0.33860160399452294, "learning_rate": 6.727785381032076e-06, "loss": 0.5, "step": 2851 }, { "epoch": 2.0716863777722287, "grad_norm": 0.363185881586708, "learning_rate": 6.725545014411608e-06, "loss": 0.4773, "step": 2852 }, { "epoch": 2.072412775520396, "grad_norm": 0.32039147732115986, "learning_rate": 6.723304254445589e-06, "loss": 0.5072, "step": 2853 }, { "epoch": 2.073139173268563, "grad_norm": 0.37782007575870213, "learning_rate": 6.72106310164481e-06, "loss": 0.4984, "step": 2854 }, { "epoch": 2.07386557101673, "grad_norm": 0.33014550752157024, "learning_rate": 6.718821556520151e-06, "loss": 0.4903, "step": 2855 }, { "epoch": 2.074591968764897, "grad_norm": 0.4828649833387322, "learning_rate": 6.716579619582581e-06, "loss": 0.502, "step": 2856 }, { "epoch": 2.0753183665130637, "grad_norm": 0.40367185786911486, "learning_rate": 6.71433729134316e-06, "loss": 0.506, "step": 2857 }, { "epoch": 2.0760447642612307, "grad_norm": 0.3460938036307214, "learning_rate": 6.712094572313038e-06, "loss": 0.496, "step": 2858 }, { "epoch": 2.076771162009398, "grad_norm": 0.3581135934064617, "learning_rate": 6.70985146300345e-06, "loss": 0.4855, "step": 2859 }, { "epoch": 2.077497559757565, "grad_norm": 0.3518723354378461, "learning_rate": 6.707607963925725e-06, "loss": 0.5071, "step": 2860 }, { "epoch": 2.0782239575057315, "grad_norm": 0.41437491459633446, "learning_rate": 6.7053640755912786e-06, "loss": 0.4892, "step": 2861 }, { "epoch": 2.0789503552538986, "grad_norm": 0.40016810758491533, "learning_rate": 6.703119798511612e-06, "loss": 0.5168, "step": 2862 }, { "epoch": 2.0796767530020657, "grad_norm": 0.4107704547854394, "learning_rate": 6.700875133198321e-06, "loss": 0.5094, "step": 2863 }, { "epoch": 2.0804031507502327, "grad_norm": 0.3885733785737631, "learning_rate": 6.698630080163086e-06, "loss": 0.5, "step": 2864 }, { "epoch": 2.0811295484984, "grad_norm": 0.5226559711677936, "learning_rate": 6.696384639917677e-06, "loss": 0.5024, "step": 2865 }, { "epoch": 2.0818559462465664, "grad_norm": 0.3797436835719642, "learning_rate": 6.6941388129739504e-06, "loss": 0.4978, "step": 2866 }, { "epoch": 2.0825823439947335, "grad_norm": 0.3646627564573875, "learning_rate": 6.691892599843856e-06, "loss": 0.5022, "step": 2867 }, { "epoch": 2.0833087417429006, "grad_norm": 0.36932403717003065, "learning_rate": 6.689646001039422e-06, "loss": 0.4874, "step": 2868 }, { "epoch": 2.0840351394910677, "grad_norm": 0.3689439163666255, "learning_rate": 6.687399017072775e-06, "loss": 0.4938, "step": 2869 }, { "epoch": 2.0847615372392347, "grad_norm": 0.3554966085123132, "learning_rate": 6.685151648456124e-06, "loss": 0.4857, "step": 2870 }, { "epoch": 2.0854879349874014, "grad_norm": 0.6683235515857348, "learning_rate": 6.682903895701767e-06, "loss": 0.4834, "step": 2871 }, { "epoch": 2.0862143327355684, "grad_norm": 0.37256500006151105, "learning_rate": 6.680655759322086e-06, "loss": 0.5091, "step": 2872 }, { "epoch": 2.0869407304837355, "grad_norm": 0.3597428273584394, "learning_rate": 6.678407239829558e-06, "loss": 0.5078, "step": 2873 }, { "epoch": 2.0876671282319026, "grad_norm": 0.4260727973510339, "learning_rate": 6.676158337736738e-06, "loss": 0.5046, "step": 2874 }, { "epoch": 2.0883935259800697, "grad_norm": 0.41474895800784195, "learning_rate": 6.673909053556278e-06, "loss": 0.489, "step": 2875 }, { "epoch": 2.0891199237282363, "grad_norm": 0.40947917474288614, "learning_rate": 6.67165938780091e-06, "loss": 0.5022, "step": 2876 }, { "epoch": 2.0898463214764034, "grad_norm": 0.39584157189048297, "learning_rate": 6.669409340983455e-06, "loss": 0.4945, "step": 2877 }, { "epoch": 2.0905727192245704, "grad_norm": 0.3825280975330202, "learning_rate": 6.66715891361682e-06, "loss": 0.492, "step": 2878 }, { "epoch": 2.0912991169727375, "grad_norm": 0.37703028844024256, "learning_rate": 6.664908106214001e-06, "loss": 0.4941, "step": 2879 }, { "epoch": 2.0920255147209046, "grad_norm": 0.364638637636816, "learning_rate": 6.66265691928808e-06, "loss": 0.5158, "step": 2880 }, { "epoch": 2.092751912469071, "grad_norm": 0.33601793792720597, "learning_rate": 6.660405353352226e-06, "loss": 0.5131, "step": 2881 }, { "epoch": 2.0934783102172383, "grad_norm": 0.592614366219023, "learning_rate": 6.658153408919689e-06, "loss": 0.4827, "step": 2882 }, { "epoch": 2.0942047079654054, "grad_norm": 0.3619686249243115, "learning_rate": 6.6559010865038135e-06, "loss": 0.4959, "step": 2883 }, { "epoch": 2.0949311057135724, "grad_norm": 0.35243439705723756, "learning_rate": 6.653648386618025e-06, "loss": 0.4988, "step": 2884 }, { "epoch": 2.095657503461739, "grad_norm": 0.3651217663787908, "learning_rate": 6.651395309775837e-06, "loss": 0.496, "step": 2885 }, { "epoch": 2.096383901209906, "grad_norm": 0.3842440109628493, "learning_rate": 6.649141856490846e-06, "loss": 0.4979, "step": 2886 }, { "epoch": 2.097110298958073, "grad_norm": 0.4581904771466546, "learning_rate": 6.646888027276739e-06, "loss": 0.5091, "step": 2887 }, { "epoch": 2.0978366967062403, "grad_norm": 0.5193582269979015, "learning_rate": 6.644633822647285e-06, "loss": 0.5116, "step": 2888 }, { "epoch": 2.0985630944544074, "grad_norm": 0.3359619709738642, "learning_rate": 6.6423792431163395e-06, "loss": 0.4923, "step": 2889 }, { "epoch": 2.099289492202574, "grad_norm": 0.3597705367645605, "learning_rate": 6.640124289197845e-06, "loss": 0.4991, "step": 2890 }, { "epoch": 2.100015889950741, "grad_norm": 0.407415043370364, "learning_rate": 6.637868961405829e-06, "loss": 0.487, "step": 2891 }, { "epoch": 2.100742287698908, "grad_norm": 0.37796274922777096, "learning_rate": 6.635613260254401e-06, "loss": 0.4914, "step": 2892 }, { "epoch": 2.101468685447075, "grad_norm": 0.31201147100352206, "learning_rate": 6.633357186257759e-06, "loss": 0.5067, "step": 2893 }, { "epoch": 2.1021950831952423, "grad_norm": 0.3265575415617553, "learning_rate": 6.6311007399301855e-06, "loss": 0.4925, "step": 2894 }, { "epoch": 2.102921480943409, "grad_norm": 0.38724063508599177, "learning_rate": 6.628843921786045e-06, "loss": 0.4985, "step": 2895 }, { "epoch": 2.103647878691576, "grad_norm": 0.6051025075106355, "learning_rate": 6.626586732339794e-06, "loss": 0.5002, "step": 2896 }, { "epoch": 2.104374276439743, "grad_norm": 0.38902900482916525, "learning_rate": 6.624329172105964e-06, "loss": 0.5102, "step": 2897 }, { "epoch": 2.10510067418791, "grad_norm": 0.3551678176261896, "learning_rate": 6.622071241599175e-06, "loss": 0.5054, "step": 2898 }, { "epoch": 2.105827071936077, "grad_norm": 0.3393281263547164, "learning_rate": 6.619812941334136e-06, "loss": 0.5078, "step": 2899 }, { "epoch": 2.106553469684244, "grad_norm": 1.5832168634036479, "learning_rate": 6.617554271825636e-06, "loss": 0.4975, "step": 2900 }, { "epoch": 2.107279867432411, "grad_norm": 0.3944324081013014, "learning_rate": 6.615295233588546e-06, "loss": 0.5181, "step": 2901 }, { "epoch": 2.108006265180578, "grad_norm": 0.3668693933031106, "learning_rate": 6.613035827137827e-06, "loss": 0.5025, "step": 2902 }, { "epoch": 2.108732662928745, "grad_norm": 0.35678377216491264, "learning_rate": 6.610776052988519e-06, "loss": 0.4916, "step": 2903 }, { "epoch": 2.1094590606769117, "grad_norm": 0.3724762793133281, "learning_rate": 6.608515911655744e-06, "loss": 0.4914, "step": 2904 }, { "epoch": 2.1101854584250788, "grad_norm": 0.29830214021032636, "learning_rate": 6.606255403654717e-06, "loss": 0.4919, "step": 2905 }, { "epoch": 2.110911856173246, "grad_norm": 0.4018527850111275, "learning_rate": 6.603994529500728e-06, "loss": 0.5077, "step": 2906 }, { "epoch": 2.111638253921413, "grad_norm": 0.4001560282776422, "learning_rate": 6.601733289709154e-06, "loss": 0.4812, "step": 2907 }, { "epoch": 2.11236465166958, "grad_norm": 0.4123126231877299, "learning_rate": 6.599471684795452e-06, "loss": 0.4916, "step": 2908 }, { "epoch": 2.1130910494177466, "grad_norm": 0.31888312898973853, "learning_rate": 6.597209715275168e-06, "loss": 0.4819, "step": 2909 }, { "epoch": 2.1138174471659137, "grad_norm": 0.37376059043712934, "learning_rate": 6.594947381663926e-06, "loss": 0.4869, "step": 2910 }, { "epoch": 2.1145438449140808, "grad_norm": 0.3313260091877316, "learning_rate": 6.592684684477435e-06, "loss": 0.5016, "step": 2911 }, { "epoch": 2.115270242662248, "grad_norm": 0.3587303872117973, "learning_rate": 6.590421624231487e-06, "loss": 0.4815, "step": 2912 }, { "epoch": 2.115996640410415, "grad_norm": 0.37481813006623715, "learning_rate": 6.588158201441956e-06, "loss": 0.4888, "step": 2913 }, { "epoch": 2.1167230381585815, "grad_norm": 0.32510590971086595, "learning_rate": 6.5858944166247994e-06, "loss": 0.5049, "step": 2914 }, { "epoch": 2.1174494359067486, "grad_norm": 0.4162742377307629, "learning_rate": 6.583630270296057e-06, "loss": 0.4937, "step": 2915 }, { "epoch": 2.1181758336549157, "grad_norm": 0.4144903712969387, "learning_rate": 6.581365762971853e-06, "loss": 0.5029, "step": 2916 }, { "epoch": 2.1189022314030828, "grad_norm": 0.3926017570855505, "learning_rate": 6.579100895168389e-06, "loss": 0.5063, "step": 2917 }, { "epoch": 2.11962862915125, "grad_norm": 0.37089262361926517, "learning_rate": 6.576835667401953e-06, "loss": 0.494, "step": 2918 }, { "epoch": 2.1203550268994165, "grad_norm": 0.414632796429618, "learning_rate": 6.574570080188911e-06, "loss": 0.4996, "step": 2919 }, { "epoch": 2.1210814246475835, "grad_norm": 0.3823046012390911, "learning_rate": 6.5723041340457175e-06, "loss": 0.506, "step": 2920 }, { "epoch": 2.1218078223957506, "grad_norm": 0.34052878156073463, "learning_rate": 6.570037829488902e-06, "loss": 0.4983, "step": 2921 }, { "epoch": 2.1225342201439177, "grad_norm": 0.3739161848070694, "learning_rate": 6.56777116703508e-06, "loss": 0.4977, "step": 2922 }, { "epoch": 2.1232606178920843, "grad_norm": 0.3433004278765078, "learning_rate": 6.565504147200945e-06, "loss": 0.5015, "step": 2923 }, { "epoch": 2.1239870156402514, "grad_norm": 0.3546076096060906, "learning_rate": 6.563236770503276e-06, "loss": 0.4928, "step": 2924 }, { "epoch": 2.1247134133884185, "grad_norm": 0.3522187399759698, "learning_rate": 6.560969037458933e-06, "loss": 0.5002, "step": 2925 }, { "epoch": 2.1254398111365855, "grad_norm": 0.43583768519910443, "learning_rate": 6.558700948584852e-06, "loss": 0.4951, "step": 2926 }, { "epoch": 2.1261662088847526, "grad_norm": 0.39854393558885376, "learning_rate": 6.556432504398056e-06, "loss": 0.5109, "step": 2927 }, { "epoch": 2.1268926066329192, "grad_norm": 0.4879786970248183, "learning_rate": 6.554163705415646e-06, "loss": 0.4779, "step": 2928 }, { "epoch": 2.1276190043810863, "grad_norm": 0.3796776631929523, "learning_rate": 6.551894552154806e-06, "loss": 0.5165, "step": 2929 }, { "epoch": 2.1283454021292534, "grad_norm": 0.34666624815506336, "learning_rate": 6.5496250451327996e-06, "loss": 0.4924, "step": 2930 }, { "epoch": 2.1290717998774205, "grad_norm": 0.41599288142856355, "learning_rate": 6.547355184866968e-06, "loss": 0.5063, "step": 2931 }, { "epoch": 2.1297981976255875, "grad_norm": 0.3738382621240727, "learning_rate": 6.545084971874738e-06, "loss": 0.4959, "step": 2932 }, { "epoch": 2.130524595373754, "grad_norm": 0.5175550892040155, "learning_rate": 6.542814406673613e-06, "loss": 0.5029, "step": 2933 }, { "epoch": 2.1312509931219212, "grad_norm": 0.4203991259923369, "learning_rate": 6.540543489781183e-06, "loss": 0.5143, "step": 2934 }, { "epoch": 2.1319773908700883, "grad_norm": 0.4217207006128095, "learning_rate": 6.538272221715107e-06, "loss": 0.5133, "step": 2935 }, { "epoch": 2.1327037886182554, "grad_norm": 0.42445433984426745, "learning_rate": 6.536000602993134e-06, "loss": 0.5033, "step": 2936 }, { "epoch": 2.1334301863664225, "grad_norm": 0.3376368181992335, "learning_rate": 6.533728634133089e-06, "loss": 0.5009, "step": 2937 }, { "epoch": 2.134156584114589, "grad_norm": 0.5977879995318446, "learning_rate": 6.531456315652878e-06, "loss": 0.5002, "step": 2938 }, { "epoch": 2.134882981862756, "grad_norm": 0.4754900561503473, "learning_rate": 6.529183648070484e-06, "loss": 0.5076, "step": 2939 }, { "epoch": 2.1356093796109232, "grad_norm": 0.3782456325532489, "learning_rate": 6.526910631903973e-06, "loss": 0.5007, "step": 2940 }, { "epoch": 2.1363357773590903, "grad_norm": 0.37297735671201765, "learning_rate": 6.5246372676714895e-06, "loss": 0.5082, "step": 2941 }, { "epoch": 2.137062175107257, "grad_norm": 0.3580637523533214, "learning_rate": 6.522363555891255e-06, "loss": 0.5074, "step": 2942 }, { "epoch": 2.137788572855424, "grad_norm": 0.46572760943088304, "learning_rate": 6.520089497081574e-06, "loss": 0.4881, "step": 2943 }, { "epoch": 2.138514970603591, "grad_norm": 0.4387344136375126, "learning_rate": 6.5178150917608265e-06, "loss": 0.4914, "step": 2944 }, { "epoch": 2.139241368351758, "grad_norm": 0.3956585802771223, "learning_rate": 6.515540340447475e-06, "loss": 0.5024, "step": 2945 }, { "epoch": 2.1399677660999252, "grad_norm": 0.3692454181128716, "learning_rate": 6.513265243660057e-06, "loss": 0.4931, "step": 2946 }, { "epoch": 2.1406941638480923, "grad_norm": 0.3357775537505742, "learning_rate": 6.5109898019171924e-06, "loss": 0.5035, "step": 2947 }, { "epoch": 2.141420561596259, "grad_norm": 0.3150057320366553, "learning_rate": 6.508714015737577e-06, "loss": 0.5124, "step": 2948 }, { "epoch": 2.142146959344426, "grad_norm": 0.3200772493002218, "learning_rate": 6.506437885639988e-06, "loss": 0.5045, "step": 2949 }, { "epoch": 2.142873357092593, "grad_norm": 0.5384146504997512, "learning_rate": 6.504161412143277e-06, "loss": 0.5008, "step": 2950 }, { "epoch": 2.14359975484076, "grad_norm": 0.34588162160118074, "learning_rate": 6.5018845957663764e-06, "loss": 0.5021, "step": 2951 }, { "epoch": 2.144326152588927, "grad_norm": 0.38778620028106675, "learning_rate": 6.499607437028298e-06, "loss": 0.4979, "step": 2952 }, { "epoch": 2.145052550337094, "grad_norm": 0.7015904343487487, "learning_rate": 6.497329936448129e-06, "loss": 0.4771, "step": 2953 }, { "epoch": 2.145778948085261, "grad_norm": 0.32149055233561774, "learning_rate": 6.495052094545036e-06, "loss": 0.4944, "step": 2954 }, { "epoch": 2.146505345833428, "grad_norm": 0.45082281024800525, "learning_rate": 6.492773911838263e-06, "loss": 0.4984, "step": 2955 }, { "epoch": 2.147231743581595, "grad_norm": 0.4953862513462056, "learning_rate": 6.490495388847131e-06, "loss": 0.4928, "step": 2956 }, { "epoch": 2.1479581413297617, "grad_norm": 0.3827423100083045, "learning_rate": 6.488216526091042e-06, "loss": 0.4853, "step": 2957 }, { "epoch": 2.148684539077929, "grad_norm": 0.3300300050257459, "learning_rate": 6.485937324089468e-06, "loss": 0.4915, "step": 2958 }, { "epoch": 2.149410936826096, "grad_norm": 0.3433551945936361, "learning_rate": 6.483657783361968e-06, "loss": 0.4973, "step": 2959 }, { "epoch": 2.150137334574263, "grad_norm": 0.3816368521232514, "learning_rate": 6.481377904428171e-06, "loss": 0.4866, "step": 2960 }, { "epoch": 2.15086373232243, "grad_norm": 0.4339166761827746, "learning_rate": 6.479097687807785e-06, "loss": 0.5095, "step": 2961 }, { "epoch": 2.1515901300705966, "grad_norm": 0.34956120380265815, "learning_rate": 6.476817134020596e-06, "loss": 0.4938, "step": 2962 }, { "epoch": 2.1523165278187637, "grad_norm": 0.4460892120279158, "learning_rate": 6.474536243586466e-06, "loss": 0.505, "step": 2963 }, { "epoch": 2.1530429255669308, "grad_norm": 0.3513400937290252, "learning_rate": 6.472255017025334e-06, "loss": 0.4939, "step": 2964 }, { "epoch": 2.153769323315098, "grad_norm": 0.36376467899952253, "learning_rate": 6.469973454857217e-06, "loss": 0.4992, "step": 2965 }, { "epoch": 2.154495721063265, "grad_norm": 0.36673117211425776, "learning_rate": 6.4676915576022045e-06, "loss": 0.5045, "step": 2966 }, { "epoch": 2.1552221188114316, "grad_norm": 0.3762124077379351, "learning_rate": 6.465409325780467e-06, "loss": 0.5015, "step": 2967 }, { "epoch": 2.1559485165595986, "grad_norm": 0.35115455230934106, "learning_rate": 6.463126759912248e-06, "loss": 0.5098, "step": 2968 }, { "epoch": 2.1566749143077657, "grad_norm": 0.35298907038203053, "learning_rate": 6.4608438605178695e-06, "loss": 0.485, "step": 2969 }, { "epoch": 2.1574013120559328, "grad_norm": 0.3472801797067342, "learning_rate": 6.458560628117727e-06, "loss": 0.5066, "step": 2970 }, { "epoch": 2.1581277098040994, "grad_norm": 0.36895758673806917, "learning_rate": 6.456277063232296e-06, "loss": 0.5113, "step": 2971 }, { "epoch": 2.1588541075522665, "grad_norm": 0.6875475378019651, "learning_rate": 6.453993166382122e-06, "loss": 0.5131, "step": 2972 }, { "epoch": 2.1595805053004336, "grad_norm": 0.3214418264033814, "learning_rate": 6.451708938087831e-06, "loss": 0.4929, "step": 2973 }, { "epoch": 2.1603069030486006, "grad_norm": 0.34283067423716496, "learning_rate": 6.449424378870123e-06, "loss": 0.4839, "step": 2974 }, { "epoch": 2.1610333007967677, "grad_norm": 0.30824117611470303, "learning_rate": 6.4471394892497714e-06, "loss": 0.4937, "step": 2975 }, { "epoch": 2.1617596985449343, "grad_norm": 0.36744675557677187, "learning_rate": 6.44485426974763e-06, "loss": 0.484, "step": 2976 }, { "epoch": 2.1624860962931014, "grad_norm": 0.36198374511775755, "learning_rate": 6.442568720884621e-06, "loss": 0.51, "step": 2977 }, { "epoch": 2.1632124940412685, "grad_norm": 0.3395204712387428, "learning_rate": 6.440282843181748e-06, "loss": 0.5048, "step": 2978 }, { "epoch": 2.1639388917894355, "grad_norm": 0.3586355283964876, "learning_rate": 6.437996637160086e-06, "loss": 0.4987, "step": 2979 }, { "epoch": 2.1646652895376026, "grad_norm": 0.6935755788835902, "learning_rate": 6.435710103340787e-06, "loss": 0.5091, "step": 2980 }, { "epoch": 2.1653916872857693, "grad_norm": 0.4025325375633823, "learning_rate": 6.433423242245074e-06, "loss": 0.4987, "step": 2981 }, { "epoch": 2.1661180850339363, "grad_norm": 0.3298638497588873, "learning_rate": 6.431136054394247e-06, "loss": 0.5024, "step": 2982 }, { "epoch": 2.1668444827821034, "grad_norm": 0.4156589971885642, "learning_rate": 6.4288485403096825e-06, "loss": 0.507, "step": 2983 }, { "epoch": 2.1675708805302705, "grad_norm": 0.30768317468521594, "learning_rate": 6.426560700512828e-06, "loss": 0.4919, "step": 2984 }, { "epoch": 2.1682972782784375, "grad_norm": 0.3163321608467066, "learning_rate": 6.4242725355252075e-06, "loss": 0.5029, "step": 2985 }, { "epoch": 2.169023676026604, "grad_norm": 0.3998333975752197, "learning_rate": 6.421984045868418e-06, "loss": 0.5134, "step": 2986 }, { "epoch": 2.1697500737747712, "grad_norm": 0.3610186546760319, "learning_rate": 6.419695232064131e-06, "loss": 0.4949, "step": 2987 }, { "epoch": 2.1704764715229383, "grad_norm": 0.33557243978359264, "learning_rate": 6.41740609463409e-06, "loss": 0.4966, "step": 2988 }, { "epoch": 2.1712028692711054, "grad_norm": 0.30979741916167625, "learning_rate": 6.415116634100116e-06, "loss": 0.4861, "step": 2989 }, { "epoch": 2.171929267019272, "grad_norm": 0.36967468828561945, "learning_rate": 6.412826850984099e-06, "loss": 0.5139, "step": 2990 }, { "epoch": 2.172655664767439, "grad_norm": 0.510837030995537, "learning_rate": 6.410536745808009e-06, "loss": 0.5146, "step": 2991 }, { "epoch": 2.173382062515606, "grad_norm": 0.3706820249330839, "learning_rate": 6.408246319093882e-06, "loss": 0.5086, "step": 2992 }, { "epoch": 2.1741084602637732, "grad_norm": 0.38337344616742824, "learning_rate": 6.405955571363832e-06, "loss": 0.5003, "step": 2993 }, { "epoch": 2.1748348580119403, "grad_norm": 0.33710792016006064, "learning_rate": 6.403664503140046e-06, "loss": 0.4857, "step": 2994 }, { "epoch": 2.175561255760107, "grad_norm": 0.3325991143206928, "learning_rate": 6.401373114944781e-06, "loss": 0.504, "step": 2995 }, { "epoch": 2.176287653508274, "grad_norm": 0.3747987822017251, "learning_rate": 6.399081407300372e-06, "loss": 0.4865, "step": 2996 }, { "epoch": 2.177014051256441, "grad_norm": 0.8042188756656197, "learning_rate": 6.396789380729218e-06, "loss": 0.5041, "step": 2997 }, { "epoch": 2.177740449004608, "grad_norm": 0.32635741396114964, "learning_rate": 6.394497035753804e-06, "loss": 0.4877, "step": 2998 }, { "epoch": 2.1784668467527752, "grad_norm": 0.5737248436672333, "learning_rate": 6.392204372896676e-06, "loss": 0.4965, "step": 2999 }, { "epoch": 2.179193244500942, "grad_norm": 0.3958470679136034, "learning_rate": 6.3899113926804565e-06, "loss": 0.4748, "step": 3000 }, { "epoch": 2.179919642249109, "grad_norm": 0.4448283621367889, "learning_rate": 6.387618095627841e-06, "loss": 0.4839, "step": 3001 }, { "epoch": 2.180646039997276, "grad_norm": 0.5670955549609107, "learning_rate": 6.385324482261597e-06, "loss": 0.4843, "step": 3002 }, { "epoch": 2.181372437745443, "grad_norm": 0.3817161520952691, "learning_rate": 6.383030553104562e-06, "loss": 0.4961, "step": 3003 }, { "epoch": 2.18209883549361, "grad_norm": 0.3568277844701838, "learning_rate": 6.380736308679649e-06, "loss": 0.4869, "step": 3004 }, { "epoch": 2.182825233241777, "grad_norm": 0.35591197709871963, "learning_rate": 6.378441749509841e-06, "loss": 0.4872, "step": 3005 }, { "epoch": 2.183551630989944, "grad_norm": 0.45168722406676537, "learning_rate": 6.376146876118193e-06, "loss": 0.4842, "step": 3006 }, { "epoch": 2.184278028738111, "grad_norm": 0.36618818991265534, "learning_rate": 6.37385168902783e-06, "loss": 0.5104, "step": 3007 }, { "epoch": 2.185004426486278, "grad_norm": 0.4164922266504299, "learning_rate": 6.37155618876195e-06, "loss": 0.4961, "step": 3008 }, { "epoch": 2.1857308242344446, "grad_norm": 0.33660435418460627, "learning_rate": 6.369260375843825e-06, "loss": 0.494, "step": 3009 }, { "epoch": 2.1864572219826117, "grad_norm": 0.3661615173054258, "learning_rate": 6.366964250796794e-06, "loss": 0.5024, "step": 3010 }, { "epoch": 2.187183619730779, "grad_norm": 0.3621135555183418, "learning_rate": 6.364667814144267e-06, "loss": 0.494, "step": 3011 }, { "epoch": 2.187910017478946, "grad_norm": 0.39242459700593657, "learning_rate": 6.362371066409727e-06, "loss": 0.4949, "step": 3012 }, { "epoch": 2.188636415227113, "grad_norm": 0.35575069031596895, "learning_rate": 6.360074008116732e-06, "loss": 0.5088, "step": 3013 }, { "epoch": 2.1893628129752796, "grad_norm": 0.36556209399790124, "learning_rate": 6.357776639788904e-06, "loss": 0.5023, "step": 3014 }, { "epoch": 2.1900892107234466, "grad_norm": 0.34006098095149784, "learning_rate": 6.355478961949935e-06, "loss": 0.5036, "step": 3015 }, { "epoch": 2.1908156084716137, "grad_norm": 0.3649755041154573, "learning_rate": 6.353180975123595e-06, "loss": 0.5236, "step": 3016 }, { "epoch": 2.191542006219781, "grad_norm": 0.5093498310975254, "learning_rate": 6.3508826798337184e-06, "loss": 0.4942, "step": 3017 }, { "epoch": 2.192268403967948, "grad_norm": 0.3997941447173825, "learning_rate": 6.348584076604213e-06, "loss": 0.4917, "step": 3018 }, { "epoch": 2.1929948017161145, "grad_norm": 0.3734740659212164, "learning_rate": 6.346285165959053e-06, "loss": 0.4966, "step": 3019 }, { "epoch": 2.1937211994642816, "grad_norm": 0.5631358961409713, "learning_rate": 6.3439859484222874e-06, "loss": 0.4836, "step": 3020 }, { "epoch": 2.1944475972124486, "grad_norm": 0.3562949020025143, "learning_rate": 6.341686424518032e-06, "loss": 0.4878, "step": 3021 }, { "epoch": 2.1951739949606157, "grad_norm": 0.44992840420045366, "learning_rate": 6.339386594770472e-06, "loss": 0.5014, "step": 3022 }, { "epoch": 2.195900392708783, "grad_norm": 0.34442915629672294, "learning_rate": 6.337086459703867e-06, "loss": 0.4986, "step": 3023 }, { "epoch": 2.1966267904569494, "grad_norm": 0.39024905830114803, "learning_rate": 6.33478601984254e-06, "loss": 0.5024, "step": 3024 }, { "epoch": 2.1973531882051165, "grad_norm": 0.3998500751389736, "learning_rate": 6.332485275710889e-06, "loss": 0.4949, "step": 3025 }, { "epoch": 2.1980795859532836, "grad_norm": 0.3429294829411977, "learning_rate": 6.330184227833376e-06, "loss": 0.4807, "step": 3026 }, { "epoch": 2.1988059837014506, "grad_norm": 0.690099298899355, "learning_rate": 6.327882876734537e-06, "loss": 0.4909, "step": 3027 }, { "epoch": 2.1995323814496177, "grad_norm": 0.35195712782936517, "learning_rate": 6.325581222938972e-06, "loss": 0.48, "step": 3028 }, { "epoch": 2.2002587791977843, "grad_norm": 0.328965359456782, "learning_rate": 6.323279266971357e-06, "loss": 0.4956, "step": 3029 }, { "epoch": 2.2009851769459514, "grad_norm": 0.4625848240891544, "learning_rate": 6.3209770093564315e-06, "loss": 0.5042, "step": 3030 }, { "epoch": 2.2017115746941185, "grad_norm": 0.4053912419568821, "learning_rate": 6.318674450619002e-06, "loss": 0.494, "step": 3031 }, { "epoch": 2.2024379724422856, "grad_norm": 0.3512279925789144, "learning_rate": 6.316371591283953e-06, "loss": 0.4903, "step": 3032 }, { "epoch": 2.2031643701904526, "grad_norm": 0.3636383991578603, "learning_rate": 6.3140684318762265e-06, "loss": 0.4762, "step": 3033 }, { "epoch": 2.2038907679386193, "grad_norm": 0.3852602096351379, "learning_rate": 6.31176497292084e-06, "loss": 0.4977, "step": 3034 }, { "epoch": 2.2046171656867863, "grad_norm": 0.3947804255450504, "learning_rate": 6.309461214942877e-06, "loss": 0.4976, "step": 3035 }, { "epoch": 2.2053435634349534, "grad_norm": 0.3790065839004977, "learning_rate": 6.307157158467487e-06, "loss": 0.4863, "step": 3036 }, { "epoch": 2.2060699611831205, "grad_norm": 0.41911269447716715, "learning_rate": 6.304852804019892e-06, "loss": 0.4962, "step": 3037 }, { "epoch": 2.206796358931287, "grad_norm": 0.35734766993699474, "learning_rate": 6.30254815212538e-06, "loss": 0.5068, "step": 3038 }, { "epoch": 2.207522756679454, "grad_norm": 0.6636967409043618, "learning_rate": 6.3002432033093055e-06, "loss": 0.4963, "step": 3039 }, { "epoch": 2.2082491544276213, "grad_norm": 0.32487300685748505, "learning_rate": 6.297937958097094e-06, "loss": 0.4934, "step": 3040 }, { "epoch": 2.2089755521757883, "grad_norm": 0.31860100445543343, "learning_rate": 6.29563241701423e-06, "loss": 0.5128, "step": 3041 }, { "epoch": 2.2097019499239554, "grad_norm": 0.644224408832893, "learning_rate": 6.293326580586278e-06, "loss": 0.495, "step": 3042 }, { "epoch": 2.210428347672122, "grad_norm": 0.3863384310540497, "learning_rate": 6.29102044933886e-06, "loss": 0.5091, "step": 3043 }, { "epoch": 2.211154745420289, "grad_norm": 0.3246376834758291, "learning_rate": 6.2887140237976714e-06, "loss": 0.5011, "step": 3044 }, { "epoch": 2.211881143168456, "grad_norm": 0.34220816049628433, "learning_rate": 6.286407304488471e-06, "loss": 0.4955, "step": 3045 }, { "epoch": 2.2126075409166233, "grad_norm": 0.32757096583048706, "learning_rate": 6.284100291937083e-06, "loss": 0.5024, "step": 3046 }, { "epoch": 2.2133339386647903, "grad_norm": 0.37712871027281575, "learning_rate": 6.281792986669406e-06, "loss": 0.4955, "step": 3047 }, { "epoch": 2.214060336412957, "grad_norm": 0.4039253546929864, "learning_rate": 6.279485389211396e-06, "loss": 0.4844, "step": 3048 }, { "epoch": 2.214786734161124, "grad_norm": 0.3442024462559989, "learning_rate": 6.277177500089082e-06, "loss": 0.5045, "step": 3049 }, { "epoch": 2.215513131909291, "grad_norm": 0.36987896779171475, "learning_rate": 6.274869319828558e-06, "loss": 0.4955, "step": 3050 }, { "epoch": 2.216239529657458, "grad_norm": 0.33664971222500834, "learning_rate": 6.272560848955982e-06, "loss": 0.4976, "step": 3051 }, { "epoch": 2.2169659274056253, "grad_norm": 0.38221122818636305, "learning_rate": 6.270252087997581e-06, "loss": 0.4949, "step": 3052 }, { "epoch": 2.217692325153792, "grad_norm": 0.3254009518810701, "learning_rate": 6.267943037479647e-06, "loss": 0.504, "step": 3053 }, { "epoch": 2.218418722901959, "grad_norm": 0.3533296250544195, "learning_rate": 6.265633697928539e-06, "loss": 0.495, "step": 3054 }, { "epoch": 2.219145120650126, "grad_norm": 0.4377959364901721, "learning_rate": 6.26332406987068e-06, "loss": 0.4848, "step": 3055 }, { "epoch": 2.219871518398293, "grad_norm": 0.44974371152679193, "learning_rate": 6.261014153832559e-06, "loss": 0.5034, "step": 3056 }, { "epoch": 2.2205979161464597, "grad_norm": 0.4524681508883102, "learning_rate": 6.258703950340734e-06, "loss": 0.5038, "step": 3057 }, { "epoch": 2.221324313894627, "grad_norm": 0.45769261482601487, "learning_rate": 6.256393459921824e-06, "loss": 0.5083, "step": 3058 }, { "epoch": 2.222050711642794, "grad_norm": 0.37051014434450674, "learning_rate": 6.254082683102517e-06, "loss": 0.4945, "step": 3059 }, { "epoch": 2.222777109390961, "grad_norm": 0.3394404946299637, "learning_rate": 6.251771620409563e-06, "loss": 0.4961, "step": 3060 }, { "epoch": 2.223503507139128, "grad_norm": 0.33392609888135194, "learning_rate": 6.24946027236978e-06, "loss": 0.4922, "step": 3061 }, { "epoch": 2.2242299048872947, "grad_norm": 0.39803836210224014, "learning_rate": 6.24714863951005e-06, "loss": 0.5061, "step": 3062 }, { "epoch": 2.2249563026354617, "grad_norm": 0.5053484367967763, "learning_rate": 6.244836722357319e-06, "loss": 0.4951, "step": 3063 }, { "epoch": 2.225682700383629, "grad_norm": 0.3402645327719718, "learning_rate": 6.2425245214386e-06, "loss": 0.488, "step": 3064 }, { "epoch": 2.226409098131796, "grad_norm": 0.363552088042168, "learning_rate": 6.240212037280967e-06, "loss": 0.487, "step": 3065 }, { "epoch": 2.227135495879963, "grad_norm": 0.5809668434482768, "learning_rate": 6.237899270411564e-06, "loss": 0.4819, "step": 3066 }, { "epoch": 2.2278618936281296, "grad_norm": 0.36027206956518293, "learning_rate": 6.2355862213575935e-06, "loss": 0.5039, "step": 3067 }, { "epoch": 2.2285882913762967, "grad_norm": 0.3955705798934775, "learning_rate": 6.233272890646327e-06, "loss": 0.4914, "step": 3068 }, { "epoch": 2.2293146891244637, "grad_norm": 0.36772804787972874, "learning_rate": 6.230959278805098e-06, "loss": 0.494, "step": 3069 }, { "epoch": 2.230041086872631, "grad_norm": 0.3811689830122236, "learning_rate": 6.228645386361304e-06, "loss": 0.5019, "step": 3070 }, { "epoch": 2.230767484620798, "grad_norm": 0.3383061085045838, "learning_rate": 6.226331213842406e-06, "loss": 0.4885, "step": 3071 }, { "epoch": 2.2314938823689645, "grad_norm": 0.36887931278841335, "learning_rate": 6.224016761775933e-06, "loss": 0.493, "step": 3072 }, { "epoch": 2.2322202801171316, "grad_norm": 0.47523629811023393, "learning_rate": 6.2217020306894705e-06, "loss": 0.4951, "step": 3073 }, { "epoch": 2.2329466778652987, "grad_norm": 0.3919649192352115, "learning_rate": 6.2193870211106745e-06, "loss": 0.4818, "step": 3074 }, { "epoch": 2.2336730756134657, "grad_norm": 0.33973722288683916, "learning_rate": 6.217071733567261e-06, "loss": 0.4911, "step": 3075 }, { "epoch": 2.2343994733616324, "grad_norm": 0.39854275169930475, "learning_rate": 6.214756168587009e-06, "loss": 0.5012, "step": 3076 }, { "epoch": 2.2351258711097994, "grad_norm": 0.44043656008659554, "learning_rate": 6.212440326697762e-06, "loss": 0.5017, "step": 3077 }, { "epoch": 2.2358522688579665, "grad_norm": 0.3722796430104987, "learning_rate": 6.210124208427427e-06, "loss": 0.4996, "step": 3078 }, { "epoch": 2.2365786666061336, "grad_norm": 0.3412876211388963, "learning_rate": 6.207807814303973e-06, "loss": 0.5017, "step": 3079 }, { "epoch": 2.2373050643543007, "grad_norm": 0.32142078513383754, "learning_rate": 6.205491144855432e-06, "loss": 0.4852, "step": 3080 }, { "epoch": 2.2380314621024673, "grad_norm": 0.4026214624360047, "learning_rate": 6.203174200609899e-06, "loss": 0.4937, "step": 3081 }, { "epoch": 2.2387578598506344, "grad_norm": 0.35889235187183816, "learning_rate": 6.200856982095532e-06, "loss": 0.495, "step": 3082 }, { "epoch": 2.2394842575988014, "grad_norm": 0.3788649730714019, "learning_rate": 6.19853948984055e-06, "loss": 0.4982, "step": 3083 }, { "epoch": 2.2402106553469685, "grad_norm": 0.36923423941185074, "learning_rate": 6.196221724373237e-06, "loss": 0.4852, "step": 3084 }, { "epoch": 2.2409370530951356, "grad_norm": 0.3499125392928531, "learning_rate": 6.193903686221937e-06, "loss": 0.4993, "step": 3085 }, { "epoch": 2.241663450843302, "grad_norm": 0.41064362066578464, "learning_rate": 6.191585375915056e-06, "loss": 0.4944, "step": 3086 }, { "epoch": 2.2423898485914693, "grad_norm": 0.38299816267520065, "learning_rate": 6.189266793981064e-06, "loss": 0.4726, "step": 3087 }, { "epoch": 2.2431162463396364, "grad_norm": 0.3238281351522023, "learning_rate": 6.186947940948494e-06, "loss": 0.4907, "step": 3088 }, { "epoch": 2.2438426440878034, "grad_norm": 0.38073181834522724, "learning_rate": 6.184628817345936e-06, "loss": 0.5025, "step": 3089 }, { "epoch": 2.2445690418359705, "grad_norm": 0.31508364422589685, "learning_rate": 6.182309423702044e-06, "loss": 0.4906, "step": 3090 }, { "epoch": 2.245295439584137, "grad_norm": 0.47437382669171824, "learning_rate": 6.179989760545535e-06, "loss": 0.4859, "step": 3091 }, { "epoch": 2.246021837332304, "grad_norm": 0.3875849029709213, "learning_rate": 6.177669828405186e-06, "loss": 0.5086, "step": 3092 }, { "epoch": 2.2467482350804713, "grad_norm": 0.3891730372212856, "learning_rate": 6.175349627809839e-06, "loss": 0.5044, "step": 3093 }, { "epoch": 2.2474746328286384, "grad_norm": 0.43368525656394613, "learning_rate": 6.173029159288388e-06, "loss": 0.5012, "step": 3094 }, { "epoch": 2.248201030576805, "grad_norm": 0.6038511756961967, "learning_rate": 6.1707084233697974e-06, "loss": 0.4894, "step": 3095 }, { "epoch": 2.248927428324972, "grad_norm": 0.3066072645157146, "learning_rate": 6.1683874205830905e-06, "loss": 0.498, "step": 3096 }, { "epoch": 2.249653826073139, "grad_norm": 0.3188905488105647, "learning_rate": 6.166066151457347e-06, "loss": 0.4794, "step": 3097 }, { "epoch": 2.250380223821306, "grad_norm": 0.43514160901236826, "learning_rate": 6.163744616521712e-06, "loss": 0.4904, "step": 3098 }, { "epoch": 2.2511066215694733, "grad_norm": 0.36486654258921625, "learning_rate": 6.161422816305389e-06, "loss": 0.4897, "step": 3099 }, { "epoch": 2.2518330193176403, "grad_norm": 0.3480152596351159, "learning_rate": 6.1591007513376425e-06, "loss": 0.4867, "step": 3100 }, { "epoch": 2.252559417065807, "grad_norm": 0.6364902568308674, "learning_rate": 6.156778422147797e-06, "loss": 0.5002, "step": 3101 }, { "epoch": 2.253285814813974, "grad_norm": 0.3973215322038487, "learning_rate": 6.154455829265239e-06, "loss": 0.4754, "step": 3102 }, { "epoch": 2.254012212562141, "grad_norm": 0.4008644063048734, "learning_rate": 6.152132973219414e-06, "loss": 0.5, "step": 3103 }, { "epoch": 2.254738610310308, "grad_norm": 0.770372781416335, "learning_rate": 6.1498098545398255e-06, "loss": 0.4892, "step": 3104 }, { "epoch": 2.255465008058475, "grad_norm": 0.3680821695745185, "learning_rate": 6.147486473756039e-06, "loss": 0.4938, "step": 3105 }, { "epoch": 2.256191405806642, "grad_norm": 0.4193679580025291, "learning_rate": 6.14516283139768e-06, "loss": 0.4937, "step": 3106 }, { "epoch": 2.256917803554809, "grad_norm": 0.35600056467080976, "learning_rate": 6.142838927994432e-06, "loss": 0.491, "step": 3107 }, { "epoch": 2.257644201302976, "grad_norm": 0.3740725509927286, "learning_rate": 6.1405147640760395e-06, "loss": 0.4931, "step": 3108 }, { "epoch": 2.258370599051143, "grad_norm": 0.34855260413863015, "learning_rate": 6.138190340172308e-06, "loss": 0.5016, "step": 3109 }, { "epoch": 2.2590969967993098, "grad_norm": 0.5207683008045049, "learning_rate": 6.135865656813095e-06, "loss": 0.5182, "step": 3110 }, { "epoch": 2.259823394547477, "grad_norm": 0.40710281188709585, "learning_rate": 6.1335407145283265e-06, "loss": 0.4872, "step": 3111 }, { "epoch": 2.260549792295644, "grad_norm": 0.38524703767888147, "learning_rate": 6.131215513847982e-06, "loss": 0.4888, "step": 3112 }, { "epoch": 2.261276190043811, "grad_norm": 0.3611442300937129, "learning_rate": 6.128890055302103e-06, "loss": 0.4971, "step": 3113 }, { "epoch": 2.2620025877919776, "grad_norm": 0.3726264631189811, "learning_rate": 6.126564339420784e-06, "loss": 0.4982, "step": 3114 }, { "epoch": 2.2627289855401447, "grad_norm": 0.35003659748054033, "learning_rate": 6.124238366734185e-06, "loss": 0.4857, "step": 3115 }, { "epoch": 2.2634553832883118, "grad_norm": 0.44795422664526696, "learning_rate": 6.121912137772521e-06, "loss": 0.4933, "step": 3116 }, { "epoch": 2.264181781036479, "grad_norm": 0.38836552057562496, "learning_rate": 6.119585653066067e-06, "loss": 0.4868, "step": 3117 }, { "epoch": 2.264908178784646, "grad_norm": 0.3874958318464041, "learning_rate": 6.117258913145153e-06, "loss": 0.4896, "step": 3118 }, { "epoch": 2.265634576532813, "grad_norm": 0.4184151509447441, "learning_rate": 6.114931918540172e-06, "loss": 0.4927, "step": 3119 }, { "epoch": 2.2663609742809796, "grad_norm": 0.5489700776940928, "learning_rate": 6.112604669781572e-06, "loss": 0.4811, "step": 3120 }, { "epoch": 2.2670873720291467, "grad_norm": 0.37173040857538353, "learning_rate": 6.110277167399861e-06, "loss": 0.4978, "step": 3121 }, { "epoch": 2.2678137697773137, "grad_norm": 0.3215371108487471, "learning_rate": 6.107949411925599e-06, "loss": 0.4808, "step": 3122 }, { "epoch": 2.268540167525481, "grad_norm": 0.4337388157888445, "learning_rate": 6.105621403889411e-06, "loss": 0.501, "step": 3123 }, { "epoch": 2.2692665652736475, "grad_norm": 0.33974619025502883, "learning_rate": 6.103293143821978e-06, "loss": 0.4955, "step": 3124 }, { "epoch": 2.2699929630218145, "grad_norm": 0.4190678472874704, "learning_rate": 6.100964632254033e-06, "loss": 0.489, "step": 3125 }, { "epoch": 2.2707193607699816, "grad_norm": 0.4983709975295048, "learning_rate": 6.098635869716375e-06, "loss": 0.4962, "step": 3126 }, { "epoch": 2.2714457585181487, "grad_norm": 0.4360393265527959, "learning_rate": 6.0963068567398535e-06, "loss": 0.49, "step": 3127 }, { "epoch": 2.2721721562663157, "grad_norm": 0.347103768192224, "learning_rate": 6.093977593855376e-06, "loss": 0.4925, "step": 3128 }, { "epoch": 2.2728985540144824, "grad_norm": 0.4549595372358685, "learning_rate": 6.0916480815939095e-06, "loss": 0.4792, "step": 3129 }, { "epoch": 2.2736249517626494, "grad_norm": 0.42519053661063105, "learning_rate": 6.089318320486477e-06, "loss": 0.5004, "step": 3130 }, { "epoch": 2.2743513495108165, "grad_norm": 0.34401001903742934, "learning_rate": 6.086988311064157e-06, "loss": 0.4918, "step": 3131 }, { "epoch": 2.2750777472589836, "grad_norm": 0.3675422068036801, "learning_rate": 6.084658053858086e-06, "loss": 0.4946, "step": 3132 }, { "epoch": 2.2758041450071507, "grad_norm": 0.6373015019480114, "learning_rate": 6.082327549399456e-06, "loss": 0.4968, "step": 3133 }, { "epoch": 2.2765305427553173, "grad_norm": 0.9060836261092253, "learning_rate": 6.079996798219516e-06, "loss": 0.5049, "step": 3134 }, { "epoch": 2.2772569405034844, "grad_norm": 0.4086724137891712, "learning_rate": 6.077665800849568e-06, "loss": 0.4936, "step": 3135 }, { "epoch": 2.2779833382516514, "grad_norm": 0.42824112282521215, "learning_rate": 6.075334557820977e-06, "loss": 0.5069, "step": 3136 }, { "epoch": 2.2787097359998185, "grad_norm": 0.4370864620437589, "learning_rate": 6.073003069665161e-06, "loss": 0.4845, "step": 3137 }, { "epoch": 2.2794361337479856, "grad_norm": 0.34589256703550214, "learning_rate": 6.070671336913588e-06, "loss": 0.4836, "step": 3138 }, { "epoch": 2.2801625314961522, "grad_norm": 0.44081977847163717, "learning_rate": 6.068339360097791e-06, "loss": 0.499, "step": 3139 }, { "epoch": 2.2808889292443193, "grad_norm": 0.3693827842084408, "learning_rate": 6.066007139749351e-06, "loss": 0.4819, "step": 3140 }, { "epoch": 2.2816153269924864, "grad_norm": 0.4114801998919762, "learning_rate": 6.063674676399911e-06, "loss": 0.4971, "step": 3141 }, { "epoch": 2.2823417247406534, "grad_norm": 0.5174468598265398, "learning_rate": 6.061341970581165e-06, "loss": 0.5008, "step": 3142 }, { "epoch": 2.28306812248882, "grad_norm": 0.7341098293975072, "learning_rate": 6.0590090228248625e-06, "loss": 0.4929, "step": 3143 }, { "epoch": 2.283794520236987, "grad_norm": 0.3505216954061479, "learning_rate": 6.056675833662811e-06, "loss": 0.5036, "step": 3144 }, { "epoch": 2.284520917985154, "grad_norm": 0.3540500347732656, "learning_rate": 6.0543424036268675e-06, "loss": 0.4915, "step": 3145 }, { "epoch": 2.2852473157333213, "grad_norm": 0.44413442338960246, "learning_rate": 6.052008733248954e-06, "loss": 0.5071, "step": 3146 }, { "epoch": 2.2859737134814884, "grad_norm": 0.3900031444781187, "learning_rate": 6.049674823061036e-06, "loss": 0.5175, "step": 3147 }, { "epoch": 2.286700111229655, "grad_norm": 0.32819673521829124, "learning_rate": 6.047340673595139e-06, "loss": 0.4848, "step": 3148 }, { "epoch": 2.287426508977822, "grad_norm": 0.37858795931699807, "learning_rate": 6.045006285383342e-06, "loss": 0.4929, "step": 3149 }, { "epoch": 2.288152906725989, "grad_norm": 0.3666616444696444, "learning_rate": 6.042671658957783e-06, "loss": 0.5038, "step": 3150 }, { "epoch": 2.288879304474156, "grad_norm": 0.3633303205358319, "learning_rate": 6.040336794850646e-06, "loss": 0.4951, "step": 3151 }, { "epoch": 2.2896057022223233, "grad_norm": 1.1115070192327328, "learning_rate": 6.038001693594176e-06, "loss": 0.4958, "step": 3152 }, { "epoch": 2.29033209997049, "grad_norm": 0.326420594941503, "learning_rate": 6.03566635572067e-06, "loss": 0.4869, "step": 3153 }, { "epoch": 2.291058497718657, "grad_norm": 0.3963449158303792, "learning_rate": 6.033330781762476e-06, "loss": 0.4788, "step": 3154 }, { "epoch": 2.291784895466824, "grad_norm": 0.5105806805590508, "learning_rate": 6.0309949722519986e-06, "loss": 0.5032, "step": 3155 }, { "epoch": 2.292511293214991, "grad_norm": 0.4155693537256211, "learning_rate": 6.028658927721698e-06, "loss": 0.4902, "step": 3156 }, { "epoch": 2.293237690963158, "grad_norm": 0.4138282695922685, "learning_rate": 6.0263226487040845e-06, "loss": 0.4809, "step": 3157 }, { "epoch": 2.293964088711325, "grad_norm": 0.37462980188091954, "learning_rate": 6.023986135731724e-06, "loss": 0.4942, "step": 3158 }, { "epoch": 2.294690486459492, "grad_norm": 0.4289202950128778, "learning_rate": 6.021649389337234e-06, "loss": 0.498, "step": 3159 }, { "epoch": 2.295416884207659, "grad_norm": 0.5635091191246396, "learning_rate": 6.019312410053286e-06, "loss": 0.489, "step": 3160 }, { "epoch": 2.296143281955826, "grad_norm": 0.4607311853871575, "learning_rate": 6.016975198412606e-06, "loss": 0.4946, "step": 3161 }, { "epoch": 2.2968696797039927, "grad_norm": 0.6935083323300786, "learning_rate": 6.014637754947969e-06, "loss": 0.4811, "step": 3162 }, { "epoch": 2.2975960774521598, "grad_norm": 0.41357546263443523, "learning_rate": 6.012300080192208e-06, "loss": 0.5075, "step": 3163 }, { "epoch": 2.298322475200327, "grad_norm": 0.3542243619660625, "learning_rate": 6.0099621746782035e-06, "loss": 0.4971, "step": 3164 }, { "epoch": 2.299048872948494, "grad_norm": 0.44134295561503295, "learning_rate": 6.007624038938895e-06, "loss": 0.4888, "step": 3165 }, { "epoch": 2.299775270696661, "grad_norm": 0.39417830167581025, "learning_rate": 6.0052856735072685e-06, "loss": 0.4984, "step": 3166 }, { "epoch": 2.300501668444828, "grad_norm": 0.33506843695028593, "learning_rate": 6.002947078916365e-06, "loss": 0.4846, "step": 3167 }, { "epoch": 2.3012280661929947, "grad_norm": 0.394726807159148, "learning_rate": 6.000608255699277e-06, "loss": 0.5015, "step": 3168 }, { "epoch": 2.3019544639411618, "grad_norm": 0.35592888824186264, "learning_rate": 5.99826920438915e-06, "loss": 0.4928, "step": 3169 }, { "epoch": 2.302680861689329, "grad_norm": 0.3741228112311038, "learning_rate": 5.995929925519181e-06, "loss": 0.4748, "step": 3170 }, { "epoch": 2.303407259437496, "grad_norm": 0.4118521684904315, "learning_rate": 5.993590419622619e-06, "loss": 0.4916, "step": 3171 }, { "epoch": 2.3041336571856625, "grad_norm": 0.4161129936032844, "learning_rate": 5.991250687232764e-06, "loss": 0.4841, "step": 3172 }, { "epoch": 2.3048600549338296, "grad_norm": 0.42952160504666864, "learning_rate": 5.98891072888297e-06, "loss": 0.4884, "step": 3173 }, { "epoch": 2.3055864526819967, "grad_norm": 0.39432337272336604, "learning_rate": 5.986570545106638e-06, "loss": 0.4956, "step": 3174 }, { "epoch": 2.3063128504301638, "grad_norm": 0.3996450987725706, "learning_rate": 5.984230136437226e-06, "loss": 0.4968, "step": 3175 }, { "epoch": 2.307039248178331, "grad_norm": 0.3391473825126834, "learning_rate": 5.981889503408238e-06, "loss": 0.4883, "step": 3176 }, { "epoch": 2.3077656459264975, "grad_norm": 0.3267696126670061, "learning_rate": 5.9795486465532325e-06, "loss": 0.4978, "step": 3177 }, { "epoch": 2.3084920436746645, "grad_norm": 0.3922857284244964, "learning_rate": 5.977207566405818e-06, "loss": 0.4833, "step": 3178 }, { "epoch": 2.3092184414228316, "grad_norm": 0.42615109149541347, "learning_rate": 5.974866263499655e-06, "loss": 0.4861, "step": 3179 }, { "epoch": 2.3099448391709987, "grad_norm": 0.39246348259809566, "learning_rate": 5.972524738368452e-06, "loss": 0.499, "step": 3180 }, { "epoch": 2.3106712369191653, "grad_norm": 0.3811353423363143, "learning_rate": 5.970182991545972e-06, "loss": 0.4878, "step": 3181 }, { "epoch": 2.3113976346673324, "grad_norm": 0.3538069167085555, "learning_rate": 5.967841023566025e-06, "loss": 0.4999, "step": 3182 }, { "epoch": 2.3121240324154995, "grad_norm": 0.451875857319256, "learning_rate": 5.965498834962472e-06, "loss": 0.4811, "step": 3183 }, { "epoch": 2.3128504301636665, "grad_norm": 0.5972179413766493, "learning_rate": 5.963156426269228e-06, "loss": 0.4903, "step": 3184 }, { "epoch": 2.3135768279118336, "grad_norm": 0.41997194324350673, "learning_rate": 5.960813798020252e-06, "loss": 0.4753, "step": 3185 }, { "epoch": 2.3143032256600007, "grad_norm": 0.6097372572334416, "learning_rate": 5.9584709507495594e-06, "loss": 0.5037, "step": 3186 }, { "epoch": 2.3150296234081673, "grad_norm": 0.5162731967709743, "learning_rate": 5.956127884991211e-06, "loss": 0.4946, "step": 3187 }, { "epoch": 2.3157560211563344, "grad_norm": 0.41135839595878126, "learning_rate": 5.9537846012793184e-06, "loss": 0.497, "step": 3188 }, { "epoch": 2.3164824189045015, "grad_norm": 0.5225429394800519, "learning_rate": 5.9514411001480435e-06, "loss": 0.5023, "step": 3189 }, { "epoch": 2.3172088166526685, "grad_norm": 0.36058624696826763, "learning_rate": 5.949097382131599e-06, "loss": 0.4968, "step": 3190 }, { "epoch": 2.317935214400835, "grad_norm": 0.4896109590425641, "learning_rate": 5.946753447764245e-06, "loss": 0.4831, "step": 3191 }, { "epoch": 2.3186616121490022, "grad_norm": 0.37739851530243934, "learning_rate": 5.9444092975802925e-06, "loss": 0.5023, "step": 3192 }, { "epoch": 2.3193880098971693, "grad_norm": 0.3958899188666041, "learning_rate": 5.942064932114099e-06, "loss": 0.49, "step": 3193 }, { "epoch": 2.3201144076453364, "grad_norm": 0.5308468797720521, "learning_rate": 5.939720351900072e-06, "loss": 0.497, "step": 3194 }, { "epoch": 2.3208408053935035, "grad_norm": 0.3649823573866849, "learning_rate": 5.9373755574726735e-06, "loss": 0.4998, "step": 3195 }, { "epoch": 2.32156720314167, "grad_norm": 0.3904577934507057, "learning_rate": 5.935030549366405e-06, "loss": 0.485, "step": 3196 }, { "epoch": 2.322293600889837, "grad_norm": 0.34420467341162336, "learning_rate": 5.932685328115823e-06, "loss": 0.4921, "step": 3197 }, { "epoch": 2.3230199986380042, "grad_norm": 0.3332275063766719, "learning_rate": 5.930339894255533e-06, "loss": 0.495, "step": 3198 }, { "epoch": 2.3237463963861713, "grad_norm": 0.38028556409357467, "learning_rate": 5.927994248320183e-06, "loss": 0.4818, "step": 3199 }, { "epoch": 2.324472794134338, "grad_norm": 0.42536308376316817, "learning_rate": 5.925648390844476e-06, "loss": 0.4848, "step": 3200 }, { "epoch": 2.1526435235773724, "grad_norm": 0.36400339398627185, "learning_rate": 6.446353231581457e-06, "loss": 0.4911, "step": 3201 }, { "epoch": 2.1533159620072286, "grad_norm": 0.37954296733354875, "learning_rate": 6.4442456764159814e-06, "loss": 0.4976, "step": 3202 }, { "epoch": 2.153988400437085, "grad_norm": 0.45941645823821337, "learning_rate": 6.44213784125384e-06, "loss": 0.4858, "step": 3203 }, { "epoch": 2.154660838866941, "grad_norm": 0.40495850868238187, "learning_rate": 6.4400297265036795e-06, "loss": 0.4881, "step": 3204 }, { "epoch": 2.1553332772967977, "grad_norm": 0.3544217596686645, "learning_rate": 6.437921332574203e-06, "loss": 0.4921, "step": 3205 }, { "epoch": 2.156005715726654, "grad_norm": 0.35855590079318694, "learning_rate": 6.435812659874163e-06, "loss": 0.4878, "step": 3206 }, { "epoch": 2.15667815415651, "grad_norm": 0.4257310659468136, "learning_rate": 6.4337037088123734e-06, "loss": 0.4784, "step": 3207 }, { "epoch": 2.1573505925863663, "grad_norm": 0.359797194091946, "learning_rate": 6.431594479797693e-06, "loss": 0.477, "step": 3208 }, { "epoch": 2.1580230310162225, "grad_norm": 0.5311983101781117, "learning_rate": 6.429484973239041e-06, "loss": 0.4895, "step": 3209 }, { "epoch": 2.1586954694460787, "grad_norm": 0.3738818622798392, "learning_rate": 6.4273751895453894e-06, "loss": 0.4743, "step": 3210 }, { "epoch": 2.159367907875935, "grad_norm": 0.3760099340466295, "learning_rate": 6.425265129125762e-06, "loss": 0.4886, "step": 3211 }, { "epoch": 2.1600403463057916, "grad_norm": 0.39293130543136123, "learning_rate": 6.423154792389237e-06, "loss": 0.4735, "step": 3212 }, { "epoch": 2.1607127847356478, "grad_norm": 0.3819149221012835, "learning_rate": 6.421044179744946e-06, "loss": 0.4881, "step": 3213 }, { "epoch": 2.161385223165504, "grad_norm": 0.3526992849837529, "learning_rate": 6.418933291602079e-06, "loss": 0.4905, "step": 3214 }, { "epoch": 2.16205766159536, "grad_norm": 0.39242237563550325, "learning_rate": 6.4168221283698696e-06, "loss": 0.478, "step": 3215 }, { "epoch": 2.1627301000252164, "grad_norm": 0.5052242268359664, "learning_rate": 6.414710690457613e-06, "loss": 0.4813, "step": 3216 }, { "epoch": 2.1634025384550726, "grad_norm": 0.3974416216109638, "learning_rate": 6.4125989782746535e-06, "loss": 0.4613, "step": 3217 }, { "epoch": 2.164074976884929, "grad_norm": 0.3953775362044758, "learning_rate": 6.4104869922303905e-06, "loss": 0.474, "step": 3218 }, { "epoch": 2.1647474153147854, "grad_norm": 0.9970467066795999, "learning_rate": 6.408374732734275e-06, "loss": 0.4636, "step": 3219 }, { "epoch": 2.1654198537446416, "grad_norm": 0.3449898830543444, "learning_rate": 6.406262200195812e-06, "loss": 0.4636, "step": 3220 }, { "epoch": 2.166092292174498, "grad_norm": 0.3320632740312974, "learning_rate": 6.4041493950245605e-06, "loss": 0.4874, "step": 3221 }, { "epoch": 2.166764730604354, "grad_norm": 0.3599970105192158, "learning_rate": 6.40203631763013e-06, "loss": 0.452, "step": 3222 }, { "epoch": 2.1674371690342102, "grad_norm": 0.49644697700894996, "learning_rate": 6.399922968422186e-06, "loss": 0.4668, "step": 3223 }, { "epoch": 2.1681096074640664, "grad_norm": 0.42659876148794784, "learning_rate": 6.397809347810441e-06, "loss": 0.4592, "step": 3224 }, { "epoch": 2.1687820458939226, "grad_norm": 0.39713710109686995, "learning_rate": 6.395695456204667e-06, "loss": 0.4839, "step": 3225 }, { "epoch": 2.1694544843237793, "grad_norm": 0.44749879329318243, "learning_rate": 6.3935812940146835e-06, "loss": 0.4735, "step": 3226 }, { "epoch": 2.1701269227536355, "grad_norm": 0.4321525163181309, "learning_rate": 6.391466861650365e-06, "loss": 0.4775, "step": 3227 }, { "epoch": 2.1707993611834917, "grad_norm": 0.3855482525351591, "learning_rate": 6.389352159521636e-06, "loss": 0.461, "step": 3228 }, { "epoch": 2.171471799613348, "grad_norm": 0.44598796268875185, "learning_rate": 6.3872371880384776e-06, "loss": 0.4644, "step": 3229 }, { "epoch": 2.172144238043204, "grad_norm": 0.3776752834861266, "learning_rate": 6.3851219476109184e-06, "loss": 0.487, "step": 3230 }, { "epoch": 2.1728166764730603, "grad_norm": 0.3441671467053756, "learning_rate": 6.383006438649042e-06, "loss": 0.4682, "step": 3231 }, { "epoch": 2.1734891149029165, "grad_norm": 0.38288668022048156, "learning_rate": 6.380890661562984e-06, "loss": 0.4876, "step": 3232 }, { "epoch": 2.174161553332773, "grad_norm": 0.4303974490863329, "learning_rate": 6.3787746167629286e-06, "loss": 0.4628, "step": 3233 }, { "epoch": 2.1748339917626294, "grad_norm": 0.44345318856219873, "learning_rate": 6.376658304659118e-06, "loss": 0.4638, "step": 3234 }, { "epoch": 2.1755064301924856, "grad_norm": 0.4281777031529893, "learning_rate": 6.37454172566184e-06, "loss": 0.4844, "step": 3235 }, { "epoch": 2.1761788686223418, "grad_norm": 0.3941670465285334, "learning_rate": 6.372424880181437e-06, "loss": 0.4864, "step": 3236 }, { "epoch": 2.176851307052198, "grad_norm": 0.4568583459931907, "learning_rate": 6.3703077686283045e-06, "loss": 0.4735, "step": 3237 }, { "epoch": 2.177523745482054, "grad_norm": 0.5070676742381048, "learning_rate": 6.368190391412887e-06, "loss": 0.4655, "step": 3238 }, { "epoch": 2.1781961839119104, "grad_norm": 0.34743038619990396, "learning_rate": 6.3660727489456815e-06, "loss": 0.4652, "step": 3239 }, { "epoch": 2.178868622341767, "grad_norm": 0.39638695582927597, "learning_rate": 6.3639548416372385e-06, "loss": 0.4702, "step": 3240 }, { "epoch": 2.1795410607716232, "grad_norm": 0.40020178726324046, "learning_rate": 6.3618366698981545e-06, "loss": 0.4537, "step": 3241 }, { "epoch": 2.1802134992014794, "grad_norm": 0.37629287034991654, "learning_rate": 6.359718234139082e-06, "loss": 0.4581, "step": 3242 }, { "epoch": 2.1808859376313356, "grad_norm": 0.4216261561384884, "learning_rate": 6.357599534770724e-06, "loss": 0.4898, "step": 3243 }, { "epoch": 2.181558376061192, "grad_norm": 0.33402757128286764, "learning_rate": 6.355480572203834e-06, "loss": 0.4574, "step": 3244 }, { "epoch": 2.182230814491048, "grad_norm": 0.365971405847702, "learning_rate": 6.353361346849214e-06, "loss": 0.4571, "step": 3245 }, { "epoch": 2.1829032529209043, "grad_norm": 0.3427010018820965, "learning_rate": 6.35124185911772e-06, "loss": 0.48, "step": 3246 }, { "epoch": 2.183575691350761, "grad_norm": 0.36662548856695765, "learning_rate": 6.349122109420262e-06, "loss": 0.4668, "step": 3247 }, { "epoch": 2.184248129780617, "grad_norm": 0.3478504185173162, "learning_rate": 6.3470020981677915e-06, "loss": 0.4723, "step": 3248 }, { "epoch": 2.1849205682104733, "grad_norm": 0.31574374838479446, "learning_rate": 6.3448818257713205e-06, "loss": 0.4635, "step": 3249 }, { "epoch": 2.1855930066403295, "grad_norm": 0.43950264307355086, "learning_rate": 6.3427612926419056e-06, "loss": 0.4791, "step": 3250 }, { "epoch": 2.1862654450701857, "grad_norm": 0.3558974965207327, "learning_rate": 6.340640499190656e-06, "loss": 0.4614, "step": 3251 }, { "epoch": 2.186937883500042, "grad_norm": 0.6481493882892034, "learning_rate": 6.338519445828728e-06, "loss": 0.4551, "step": 3252 }, { "epoch": 2.187610321929898, "grad_norm": 0.39982021103181276, "learning_rate": 6.336398132967337e-06, "loss": 0.4529, "step": 3253 }, { "epoch": 2.1882827603597548, "grad_norm": 0.3361374722833279, "learning_rate": 6.334276561017738e-06, "loss": 0.475, "step": 3254 }, { "epoch": 2.188955198789611, "grad_norm": 0.3661649872244156, "learning_rate": 6.3321547303912445e-06, "loss": 0.4701, "step": 3255 }, { "epoch": 2.189627637219467, "grad_norm": 0.38467033502442166, "learning_rate": 6.330032641499216e-06, "loss": 0.4654, "step": 3256 }, { "epoch": 2.1903000756493234, "grad_norm": 0.33990424949533915, "learning_rate": 6.3279102947530626e-06, "loss": 0.4636, "step": 3257 }, { "epoch": 2.1909725140791796, "grad_norm": 0.5088859601290591, "learning_rate": 6.325787690564246e-06, "loss": 0.4741, "step": 3258 }, { "epoch": 2.191644952509036, "grad_norm": 0.4521884277447263, "learning_rate": 6.3236648293442735e-06, "loss": 0.4617, "step": 3259 }, { "epoch": 2.192317390938892, "grad_norm": 0.3272684919643106, "learning_rate": 6.321541711504708e-06, "loss": 0.4675, "step": 3260 }, { "epoch": 2.1929898293687486, "grad_norm": 0.3178252049326452, "learning_rate": 6.319418337457158e-06, "loss": 0.4601, "step": 3261 }, { "epoch": 2.193662267798605, "grad_norm": 0.343212213649888, "learning_rate": 6.317294707613287e-06, "loss": 0.4823, "step": 3262 }, { "epoch": 2.194334706228461, "grad_norm": 0.36111254446942886, "learning_rate": 6.315170822384797e-06, "loss": 0.4666, "step": 3263 }, { "epoch": 2.1950071446583173, "grad_norm": 0.3247937151282206, "learning_rate": 6.313046682183452e-06, "loss": 0.4479, "step": 3264 }, { "epoch": 2.1956795830881735, "grad_norm": 0.32402685278862575, "learning_rate": 6.310922287421062e-06, "loss": 0.4686, "step": 3265 }, { "epoch": 2.1963520215180297, "grad_norm": 0.3085536264866017, "learning_rate": 6.308797638509478e-06, "loss": 0.4792, "step": 3266 }, { "epoch": 2.197024459947886, "grad_norm": 0.3181533538129706, "learning_rate": 6.3066727358606126e-06, "loss": 0.4721, "step": 3267 }, { "epoch": 2.1976968983777425, "grad_norm": 0.3312265223041896, "learning_rate": 6.304547579886419e-06, "loss": 0.4547, "step": 3268 }, { "epoch": 2.1983693368075987, "grad_norm": 0.328345337965804, "learning_rate": 6.302422170998903e-06, "loss": 0.4789, "step": 3269 }, { "epoch": 2.199041775237455, "grad_norm": 0.39821230084937137, "learning_rate": 6.300296509610117e-06, "loss": 0.4763, "step": 3270 }, { "epoch": 2.199714213667311, "grad_norm": 0.45659507362333, "learning_rate": 6.298170596132168e-06, "loss": 0.4832, "step": 3271 }, { "epoch": 2.2003866520971673, "grad_norm": 0.3582687048929397, "learning_rate": 6.296044430977205e-06, "loss": 0.4933, "step": 3272 }, { "epoch": 2.2010590905270235, "grad_norm": 0.3347341994137685, "learning_rate": 6.293918014557429e-06, "loss": 0.4704, "step": 3273 }, { "epoch": 2.2017315289568797, "grad_norm": 0.3215277495026712, "learning_rate": 6.2917913472850915e-06, "loss": 0.4629, "step": 3274 }, { "epoch": 2.2024039673867364, "grad_norm": 0.3515898713741847, "learning_rate": 6.289664429572486e-06, "loss": 0.4675, "step": 3275 }, { "epoch": 2.2030764058165926, "grad_norm": 0.40158213129557646, "learning_rate": 6.287537261831965e-06, "loss": 0.4712, "step": 3276 }, { "epoch": 2.203748844246449, "grad_norm": 0.36658677429253866, "learning_rate": 6.285409844475918e-06, "loss": 0.4657, "step": 3277 }, { "epoch": 2.204421282676305, "grad_norm": 0.40253283718196536, "learning_rate": 6.283282177916794e-06, "loss": 0.4712, "step": 3278 }, { "epoch": 2.205093721106161, "grad_norm": 0.35863478731973064, "learning_rate": 6.28115426256708e-06, "loss": 0.4657, "step": 3279 }, { "epoch": 2.2057661595360174, "grad_norm": 0.3305351152359045, "learning_rate": 6.279026098839321e-06, "loss": 0.4557, "step": 3280 }, { "epoch": 2.2064385979658736, "grad_norm": 0.3318879526526031, "learning_rate": 6.2768976871460985e-06, "loss": 0.4601, "step": 3281 }, { "epoch": 2.20711103639573, "grad_norm": 0.35760047494368197, "learning_rate": 6.274769027900056e-06, "loss": 0.478, "step": 3282 }, { "epoch": 2.2077834748255865, "grad_norm": 0.34678732685220764, "learning_rate": 6.272640121513872e-06, "loss": 0.471, "step": 3283 }, { "epoch": 2.2084559132554427, "grad_norm": 0.32565899960537, "learning_rate": 6.270510968400283e-06, "loss": 0.4814, "step": 3284 }, { "epoch": 2.209128351685299, "grad_norm": 0.3208575439821756, "learning_rate": 6.268381568972065e-06, "loss": 0.4651, "step": 3285 }, { "epoch": 2.209800790115155, "grad_norm": 0.4131626036588382, "learning_rate": 6.266251923642049e-06, "loss": 0.4898, "step": 3286 }, { "epoch": 2.2104732285450113, "grad_norm": 0.3806906259045866, "learning_rate": 6.264122032823111e-06, "loss": 0.4727, "step": 3287 }, { "epoch": 2.2111456669748675, "grad_norm": 0.7017980432113152, "learning_rate": 6.26199189692817e-06, "loss": 0.4762, "step": 3288 }, { "epoch": 2.2118181054047237, "grad_norm": 0.3308519344551586, "learning_rate": 6.259861516370201e-06, "loss": 0.4555, "step": 3289 }, { "epoch": 2.2124905438345803, "grad_norm": 0.3946936336332011, "learning_rate": 6.2577308915622196e-06, "loss": 0.4517, "step": 3290 }, { "epoch": 2.2131629822644365, "grad_norm": 0.41976183541831025, "learning_rate": 6.255600022917292e-06, "loss": 0.4712, "step": 3291 }, { "epoch": 2.2138354206942927, "grad_norm": 0.3441105184459733, "learning_rate": 6.253468910848529e-06, "loss": 0.4409, "step": 3292 }, { "epoch": 2.214507859124149, "grad_norm": 0.440199416455677, "learning_rate": 6.251337555769093e-06, "loss": 0.4681, "step": 3293 }, { "epoch": 2.215180297554005, "grad_norm": 0.45599533834389516, "learning_rate": 6.2492059580921886e-06, "loss": 0.4509, "step": 3294 }, { "epoch": 2.2158527359838613, "grad_norm": 0.6261088005455595, "learning_rate": 6.2470741182310725e-06, "loss": 0.4504, "step": 3295 }, { "epoch": 2.2165251744137175, "grad_norm": 0.32376267307506235, "learning_rate": 6.244942036599042e-06, "loss": 0.4565, "step": 3296 }, { "epoch": 2.217197612843574, "grad_norm": 0.4449321605914357, "learning_rate": 6.2428097136094476e-06, "loss": 0.4837, "step": 3297 }, { "epoch": 2.2178700512734304, "grad_norm": 0.4372822395750341, "learning_rate": 6.240677149675684e-06, "loss": 0.4713, "step": 3298 }, { "epoch": 2.2185424897032866, "grad_norm": 0.7497825332115169, "learning_rate": 6.2385443452111915e-06, "loss": 0.4561, "step": 3299 }, { "epoch": 2.219214928133143, "grad_norm": 0.3561248007825388, "learning_rate": 6.236411300629458e-06, "loss": 0.4714, "step": 3300 }, { "epoch": 2.219887366562999, "grad_norm": 0.355500848926921, "learning_rate": 6.234278016344018e-06, "loss": 0.4625, "step": 3301 }, { "epoch": 2.220559804992855, "grad_norm": 0.30501535700416, "learning_rate": 6.232144492768451e-06, "loss": 0.4719, "step": 3302 }, { "epoch": 2.2212322434227114, "grad_norm": 0.42608821946659853, "learning_rate": 6.230010730316388e-06, "loss": 0.48, "step": 3303 }, { "epoch": 2.221904681852568, "grad_norm": 0.45846152556150416, "learning_rate": 6.227876729401501e-06, "loss": 0.4938, "step": 3304 }, { "epoch": 2.2225771202824243, "grad_norm": 0.4631201581405841, "learning_rate": 6.225742490437507e-06, "loss": 0.4493, "step": 3305 }, { "epoch": 2.2232495587122805, "grad_norm": 0.3319109235846109, "learning_rate": 6.223608013838177e-06, "loss": 0.4509, "step": 3306 }, { "epoch": 2.2239219971421367, "grad_norm": 0.4319123676573331, "learning_rate": 6.221473300017319e-06, "loss": 0.4613, "step": 3307 }, { "epoch": 2.224594435571993, "grad_norm": 0.5502446668762675, "learning_rate": 6.219338349388792e-06, "loss": 0.4665, "step": 3308 }, { "epoch": 2.225266874001849, "grad_norm": 0.3383954086793452, "learning_rate": 6.217203162366502e-06, "loss": 0.4773, "step": 3309 }, { "epoch": 2.2259393124317053, "grad_norm": 0.3253169022175458, "learning_rate": 6.215067739364397e-06, "loss": 0.4457, "step": 3310 }, { "epoch": 2.226611750861562, "grad_norm": 0.38283019608837743, "learning_rate": 6.212932080796473e-06, "loss": 0.4683, "step": 3311 }, { "epoch": 2.227284189291418, "grad_norm": 0.3627285206977748, "learning_rate": 6.2107961870767706e-06, "loss": 0.4643, "step": 3312 }, { "epoch": 2.2279566277212743, "grad_norm": 0.37400335315235345, "learning_rate": 6.20866005861938e-06, "loss": 0.4774, "step": 3313 }, { "epoch": 2.2286290661511305, "grad_norm": 0.4257019090989514, "learning_rate": 6.206523695838428e-06, "loss": 0.4554, "step": 3314 }, { "epoch": 2.2293015045809867, "grad_norm": 0.4793611486308495, "learning_rate": 6.204387099148097e-06, "loss": 0.4524, "step": 3315 }, { "epoch": 2.229973943010843, "grad_norm": 0.38225534048231125, "learning_rate": 6.2022502689626075e-06, "loss": 0.4689, "step": 3316 }, { "epoch": 2.230646381440699, "grad_norm": 0.32799250015071885, "learning_rate": 6.200113205696228e-06, "loss": 0.4682, "step": 3317 }, { "epoch": 2.231318819870556, "grad_norm": 0.47480492674234276, "learning_rate": 6.197975909763273e-06, "loss": 0.4702, "step": 3318 }, { "epoch": 2.231991258300412, "grad_norm": 0.3494408626924014, "learning_rate": 6.195838381578101e-06, "loss": 0.4562, "step": 3319 }, { "epoch": 2.232663696730268, "grad_norm": 0.37782094497467594, "learning_rate": 6.1937006215551176e-06, "loss": 0.4707, "step": 3320 }, { "epoch": 2.2333361351601244, "grad_norm": 0.37275458351555435, "learning_rate": 6.191562630108767e-06, "loss": 0.4572, "step": 3321 }, { "epoch": 2.2340085735899806, "grad_norm": 0.3533869889329662, "learning_rate": 6.189424407653548e-06, "loss": 0.4544, "step": 3322 }, { "epoch": 2.234681012019837, "grad_norm": 0.30999442867104227, "learning_rate": 6.187285954603994e-06, "loss": 0.472, "step": 3323 }, { "epoch": 2.235353450449693, "grad_norm": 0.33558462774882347, "learning_rate": 6.185147271374692e-06, "loss": 0.4644, "step": 3324 }, { "epoch": 2.2360258888795497, "grad_norm": 0.328979183614441, "learning_rate": 6.183008358380266e-06, "loss": 0.4646, "step": 3325 }, { "epoch": 2.236698327309406, "grad_norm": 0.33931223367496843, "learning_rate": 6.1808692160353904e-06, "loss": 0.4729, "step": 3326 }, { "epoch": 2.237370765739262, "grad_norm": 0.36880270279008426, "learning_rate": 6.178729844754782e-06, "loss": 0.4598, "step": 3327 }, { "epoch": 2.2380432041691183, "grad_norm": 0.3645994812221974, "learning_rate": 6.176590244953201e-06, "loss": 0.4583, "step": 3328 }, { "epoch": 2.2387156425989745, "grad_norm": 0.4262042189460906, "learning_rate": 6.174450417045453e-06, "loss": 0.4535, "step": 3329 }, { "epoch": 2.2393880810288307, "grad_norm": 0.3778285311948193, "learning_rate": 6.172310361446387e-06, "loss": 0.451, "step": 3330 }, { "epoch": 2.240060519458687, "grad_norm": 0.31217943304610896, "learning_rate": 6.170170078570898e-06, "loss": 0.4764, "step": 3331 }, { "epoch": 2.240732957888543, "grad_norm": 0.43079020556899783, "learning_rate": 6.168029568833923e-06, "loss": 0.4607, "step": 3332 }, { "epoch": 2.2414053963183997, "grad_norm": 0.3395066927764587, "learning_rate": 6.165888832650444e-06, "loss": 0.47, "step": 3333 }, { "epoch": 2.242077834748256, "grad_norm": 0.36339912533861557, "learning_rate": 6.163747870435486e-06, "loss": 0.4749, "step": 3334 }, { "epoch": 2.242750273178112, "grad_norm": 0.32776696036895525, "learning_rate": 6.16160668260412e-06, "loss": 0.4597, "step": 3335 }, { "epoch": 2.2434227116079684, "grad_norm": 0.34177905036225076, "learning_rate": 6.159465269571456e-06, "loss": 0.4552, "step": 3336 }, { "epoch": 2.2440951500378246, "grad_norm": 0.3385665299076455, "learning_rate": 6.157323631752655e-06, "loss": 0.4684, "step": 3337 }, { "epoch": 2.2447675884676808, "grad_norm": 0.3009145657215783, "learning_rate": 6.155181769562915e-06, "loss": 0.4478, "step": 3338 }, { "epoch": 2.245440026897537, "grad_norm": 0.3583058788615868, "learning_rate": 6.15303968341748e-06, "loss": 0.4798, "step": 3339 }, { "epoch": 2.2461124653273936, "grad_norm": 0.38765323534294566, "learning_rate": 6.150897373731637e-06, "loss": 0.4591, "step": 3340 }, { "epoch": 2.24678490375725, "grad_norm": 0.32351350562173625, "learning_rate": 6.148754840920718e-06, "loss": 0.4665, "step": 3341 }, { "epoch": 2.247457342187106, "grad_norm": 0.3610813084026684, "learning_rate": 6.146612085400096e-06, "loss": 0.4749, "step": 3342 }, { "epoch": 2.2481297806169622, "grad_norm": 0.346156630363326, "learning_rate": 6.144469107585189e-06, "loss": 0.4721, "step": 3343 }, { "epoch": 2.2488022190468184, "grad_norm": 0.5827995420298924, "learning_rate": 6.142325907891458e-06, "loss": 0.4634, "step": 3344 }, { "epoch": 2.2494746574766746, "grad_norm": 0.3453503009767493, "learning_rate": 6.140182486734403e-06, "loss": 0.4577, "step": 3345 }, { "epoch": 2.250147095906531, "grad_norm": 0.39578599271575676, "learning_rate": 6.138038844529574e-06, "loss": 0.4555, "step": 3346 }, { "epoch": 2.2508195343363875, "grad_norm": 0.3610888497577412, "learning_rate": 6.1358949816925565e-06, "loss": 0.4697, "step": 3347 }, { "epoch": 2.2514919727662437, "grad_norm": 0.3602710216475961, "learning_rate": 6.133750898638985e-06, "loss": 0.4692, "step": 3348 }, { "epoch": 2.2521644111961, "grad_norm": 0.36150798187522293, "learning_rate": 6.131606595784531e-06, "loss": 0.4515, "step": 3349 }, { "epoch": 2.252836849625956, "grad_norm": 0.32433360019611446, "learning_rate": 6.129462073544916e-06, "loss": 0.4559, "step": 3350 }, { "epoch": 2.2535092880558123, "grad_norm": 0.33643243164387887, "learning_rate": 6.1273173323358955e-06, "loss": 0.4524, "step": 3351 }, { "epoch": 2.2541817264856685, "grad_norm": 0.3717075416643779, "learning_rate": 6.125172372573275e-06, "loss": 0.4674, "step": 3352 }, { "epoch": 2.2548541649155247, "grad_norm": 0.4768402626852065, "learning_rate": 6.123027194672897e-06, "loss": 0.4533, "step": 3353 }, { "epoch": 2.2555266033453814, "grad_norm": 0.4102465952984789, "learning_rate": 6.120881799050648e-06, "loss": 0.448, "step": 3354 }, { "epoch": 2.2561990417752376, "grad_norm": 0.34922910900027687, "learning_rate": 6.118736186122459e-06, "loss": 0.4596, "step": 3355 }, { "epoch": 2.2568714802050938, "grad_norm": 0.3605208529073994, "learning_rate": 6.116590356304299e-06, "loss": 0.4621, "step": 3356 }, { "epoch": 2.25754391863495, "grad_norm": 0.4704991433860005, "learning_rate": 6.114444310012181e-06, "loss": 0.4639, "step": 3357 }, { "epoch": 2.258216357064806, "grad_norm": 0.3261559815874674, "learning_rate": 6.112298047662162e-06, "loss": 0.4575, "step": 3358 }, { "epoch": 2.2588887954946624, "grad_norm": 0.41003180273468104, "learning_rate": 6.110151569670338e-06, "loss": 0.4613, "step": 3359 }, { "epoch": 2.2595612339245186, "grad_norm": 0.32332479510461204, "learning_rate": 6.108004876452847e-06, "loss": 0.4455, "step": 3360 }, { "epoch": 2.2602336723543752, "grad_norm": 0.3221377433290264, "learning_rate": 6.1058579684258715e-06, "loss": 0.458, "step": 3361 }, { "epoch": 2.2609061107842314, "grad_norm": 0.31199433964100104, "learning_rate": 6.103710846005631e-06, "loss": 0.4612, "step": 3362 }, { "epoch": 2.2615785492140876, "grad_norm": 0.36232928587137264, "learning_rate": 6.101563509608391e-06, "loss": 0.4578, "step": 3363 }, { "epoch": 2.262250987643944, "grad_norm": 0.4503992437491697, "learning_rate": 6.099415959650459e-06, "loss": 0.4629, "step": 3364 }, { "epoch": 2.2629234260738, "grad_norm": 0.3428325411929094, "learning_rate": 6.097268196548176e-06, "loss": 0.461, "step": 3365 }, { "epoch": 2.2635958645036562, "grad_norm": 0.3724685334484763, "learning_rate": 6.095120220717934e-06, "loss": 0.4531, "step": 3366 }, { "epoch": 2.2642683029335124, "grad_norm": 0.3771343863955035, "learning_rate": 6.092972032576161e-06, "loss": 0.4652, "step": 3367 }, { "epoch": 2.264940741363369, "grad_norm": 0.49327174181998573, "learning_rate": 6.09082363253933e-06, "loss": 0.4527, "step": 3368 }, { "epoch": 2.2656131797932253, "grad_norm": 0.3807007587697114, "learning_rate": 6.088675021023948e-06, "loss": 0.4629, "step": 3369 }, { "epoch": 2.2662856182230815, "grad_norm": 0.3282162633067282, "learning_rate": 6.08652619844657e-06, "loss": 0.4729, "step": 3370 }, { "epoch": 2.2669580566529377, "grad_norm": 0.3615931019391284, "learning_rate": 6.0843771652237884e-06, "loss": 0.478, "step": 3371 }, { "epoch": 2.267630495082794, "grad_norm": 0.7113957644212862, "learning_rate": 6.082227921772239e-06, "loss": 0.4586, "step": 3372 }, { "epoch": 2.26830293351265, "grad_norm": 0.3374916536736294, "learning_rate": 6.080078468508595e-06, "loss": 0.4684, "step": 3373 }, { "epoch": 2.2689753719425063, "grad_norm": 0.33119172696213184, "learning_rate": 6.077928805849571e-06, "loss": 0.4681, "step": 3374 }, { "epoch": 2.269647810372363, "grad_norm": 0.3473668156048032, "learning_rate": 6.075778934211926e-06, "loss": 0.4488, "step": 3375 }, { "epoch": 2.270320248802219, "grad_norm": 0.37623319413269796, "learning_rate": 6.073628854012457e-06, "loss": 0.4698, "step": 3376 }, { "epoch": 2.2709926872320754, "grad_norm": 0.38767667636713227, "learning_rate": 6.0714785656679984e-06, "loss": 0.456, "step": 3377 }, { "epoch": 2.2716651256619316, "grad_norm": 0.329376270538414, "learning_rate": 6.069328069595431e-06, "loss": 0.4644, "step": 3378 }, { "epoch": 2.2723375640917878, "grad_norm": 0.3312790537268965, "learning_rate": 6.06717736621167e-06, "loss": 0.4416, "step": 3379 }, { "epoch": 2.273010002521644, "grad_norm": 0.3267748945523019, "learning_rate": 6.065026455933673e-06, "loss": 0.4578, "step": 3380 }, { "epoch": 2.2736824409515, "grad_norm": 0.4400018699156518, "learning_rate": 6.06287533917844e-06, "loss": 0.4839, "step": 3381 }, { "epoch": 2.274354879381357, "grad_norm": 0.33881760895521307, "learning_rate": 6.060724016363006e-06, "loss": 0.4499, "step": 3382 }, { "epoch": 2.275027317811213, "grad_norm": 0.3591199272872496, "learning_rate": 6.058572487904454e-06, "loss": 0.4621, "step": 3383 }, { "epoch": 2.2756997562410692, "grad_norm": 0.43687174063020634, "learning_rate": 6.056420754219898e-06, "loss": 0.4558, "step": 3384 }, { "epoch": 2.2763721946709254, "grad_norm": 0.6033053826657983, "learning_rate": 6.054268815726498e-06, "loss": 0.4604, "step": 3385 }, { "epoch": 2.2770446331007816, "grad_norm": 0.3598084090904599, "learning_rate": 6.052116672841451e-06, "loss": 0.4625, "step": 3386 }, { "epoch": 2.277717071530638, "grad_norm": 0.4066941227589285, "learning_rate": 6.049964325981994e-06, "loss": 0.4718, "step": 3387 }, { "epoch": 2.278389509960494, "grad_norm": 0.37321132978250743, "learning_rate": 6.047811775565403e-06, "loss": 0.4598, "step": 3388 }, { "epoch": 2.2790619483903507, "grad_norm": 0.36319732850319875, "learning_rate": 6.045659022008993e-06, "loss": 0.4669, "step": 3389 }, { "epoch": 2.279734386820207, "grad_norm": 0.42909122433100383, "learning_rate": 6.043506065730121e-06, "loss": 0.4616, "step": 3390 }, { "epoch": 2.280406825250063, "grad_norm": 0.3331074209108524, "learning_rate": 6.041352907146182e-06, "loss": 0.4412, "step": 3391 }, { "epoch": 2.2810792636799193, "grad_norm": 0.3376033994060931, "learning_rate": 6.0391995466746105e-06, "loss": 0.4671, "step": 3392 }, { "epoch": 2.2817517021097755, "grad_norm": 0.3632299799785127, "learning_rate": 6.037045984732877e-06, "loss": 0.4602, "step": 3393 }, { "epoch": 2.2824241405396317, "grad_norm": 0.3367098095856448, "learning_rate": 6.034892221738498e-06, "loss": 0.4681, "step": 3394 }, { "epoch": 2.283096578969488, "grad_norm": 0.3349169481214063, "learning_rate": 6.032738258109019e-06, "loss": 0.4693, "step": 3395 }, { "epoch": 2.2837690173993446, "grad_norm": 0.43376510365880344, "learning_rate": 6.030584094262034e-06, "loss": 0.4651, "step": 3396 }, { "epoch": 2.2844414558292008, "grad_norm": 0.3622779425031837, "learning_rate": 6.028429730615172e-06, "loss": 0.4505, "step": 3397 }, { "epoch": 2.285113894259057, "grad_norm": 0.8038609794552343, "learning_rate": 6.026275167586099e-06, "loss": 0.455, "step": 3398 }, { "epoch": 2.285786332688913, "grad_norm": 0.31065295877150795, "learning_rate": 6.024120405592521e-06, "loss": 0.4607, "step": 3399 }, { "epoch": 2.2864587711187694, "grad_norm": 0.3118730137489182, "learning_rate": 6.021965445052182e-06, "loss": 0.4613, "step": 3400 }, { "epoch": 2.2871312095486256, "grad_norm": 0.31927555976031363, "learning_rate": 6.019810286382871e-06, "loss": 0.4642, "step": 3401 }, { "epoch": 2.287803647978482, "grad_norm": 0.3290263416853223, "learning_rate": 6.017654930002404e-06, "loss": 0.4467, "step": 3402 }, { "epoch": 2.2884760864083384, "grad_norm": 0.41316779038138807, "learning_rate": 6.015499376328642e-06, "loss": 0.467, "step": 3403 }, { "epoch": 2.2891485248381946, "grad_norm": 0.31784747734763796, "learning_rate": 6.0133436257794845e-06, "loss": 0.4578, "step": 3404 }, { "epoch": 2.289820963268051, "grad_norm": 0.3090216314364643, "learning_rate": 6.011187678772868e-06, "loss": 0.463, "step": 3405 }, { "epoch": 2.290493401697907, "grad_norm": 0.8887177266878555, "learning_rate": 6.009031535726766e-06, "loss": 0.4587, "step": 3406 }, { "epoch": 2.2911658401277633, "grad_norm": 0.32367775021751516, "learning_rate": 6.006875197059191e-06, "loss": 0.4501, "step": 3407 }, { "epoch": 2.2918382785576195, "grad_norm": 0.40011093762620686, "learning_rate": 6.004718663188196e-06, "loss": 0.4481, "step": 3408 }, { "epoch": 2.2925107169874757, "grad_norm": 0.33581182782298286, "learning_rate": 6.0025619345318665e-06, "loss": 0.4477, "step": 3409 }, { "epoch": 2.2931831554173323, "grad_norm": 0.35930830236800326, "learning_rate": 6.000405011508331e-06, "loss": 0.4642, "step": 3410 }, { "epoch": 2.2938555938471885, "grad_norm": 0.38802853702385304, "learning_rate": 5.99824789453575e-06, "loss": 0.4627, "step": 3411 }, { "epoch": 2.2945280322770447, "grad_norm": 0.3984723628092719, "learning_rate": 5.996090584032329e-06, "loss": 0.4616, "step": 3412 }, { "epoch": 2.295200470706901, "grad_norm": 0.3867437885588343, "learning_rate": 5.993933080416303e-06, "loss": 0.461, "step": 3413 }, { "epoch": 2.295872909136757, "grad_norm": 0.39906088363451553, "learning_rate": 5.9917753841059516e-06, "loss": 0.4793, "step": 3414 }, { "epoch": 2.2965453475666133, "grad_norm": 0.3425550182660963, "learning_rate": 5.989617495519586e-06, "loss": 0.457, "step": 3415 }, { "epoch": 2.2972177859964695, "grad_norm": 0.33384643461177643, "learning_rate": 5.987459415075559e-06, "loss": 0.4755, "step": 3416 }, { "epoch": 2.297890224426326, "grad_norm": 0.38703558717300707, "learning_rate": 5.985301143192258e-06, "loss": 0.4556, "step": 3417 }, { "epoch": 2.2985626628561824, "grad_norm": 0.4168459939748986, "learning_rate": 5.983142680288109e-06, "loss": 0.4587, "step": 3418 }, { "epoch": 2.2992351012860386, "grad_norm": 0.3324296279399844, "learning_rate": 5.980984026781572e-06, "loss": 0.4543, "step": 3419 }, { "epoch": 2.299907539715895, "grad_norm": 0.3950144902415733, "learning_rate": 5.978825183091148e-06, "loss": 0.4772, "step": 3420 }, { "epoch": 2.300579978145751, "grad_norm": 0.4183703742331098, "learning_rate": 5.976666149635375e-06, "loss": 0.455, "step": 3421 }, { "epoch": 2.301252416575607, "grad_norm": 0.3388616076348071, "learning_rate": 5.974506926832822e-06, "loss": 0.4559, "step": 3422 }, { "epoch": 2.3019248550054634, "grad_norm": 0.343276092749269, "learning_rate": 5.972347515102102e-06, "loss": 0.4581, "step": 3423 }, { "epoch": 2.30259729343532, "grad_norm": 0.3380498032264536, "learning_rate": 5.97018791486186e-06, "loss": 0.4748, "step": 3424 }, { "epoch": 2.3032697318651763, "grad_norm": 0.35002944027653216, "learning_rate": 5.96802812653078e-06, "loss": 0.4524, "step": 3425 }, { "epoch": 2.3039421702950325, "grad_norm": 0.34512713571252573, "learning_rate": 5.9658681505275785e-06, "loss": 0.457, "step": 3426 }, { "epoch": 2.3046146087248887, "grad_norm": 0.380751421190636, "learning_rate": 5.963707987271014e-06, "loss": 0.4629, "step": 3427 }, { "epoch": 2.305287047154745, "grad_norm": 0.44507606560359714, "learning_rate": 5.961547637179875e-06, "loss": 0.4526, "step": 3428 }, { "epoch": 2.305959485584601, "grad_norm": 0.4277887184641815, "learning_rate": 5.959387100672994e-06, "loss": 0.4551, "step": 3429 }, { "epoch": 2.3066319240144573, "grad_norm": 0.2969333291788095, "learning_rate": 5.957226378169233e-06, "loss": 0.4567, "step": 3430 }, { "epoch": 2.307304362444314, "grad_norm": 0.3232796193905546, "learning_rate": 5.955065470087491e-06, "loss": 0.4579, "step": 3431 }, { "epoch": 2.30797680087417, "grad_norm": 0.4424788971209814, "learning_rate": 5.9529043768467085e-06, "loss": 0.4559, "step": 3432 }, { "epoch": 2.3086492393040263, "grad_norm": 0.4020983687858228, "learning_rate": 5.950743098865854e-06, "loss": 0.4594, "step": 3433 }, { "epoch": 2.3093216777338825, "grad_norm": 0.33850722955430224, "learning_rate": 5.948581636563939e-06, "loss": 0.4516, "step": 3434 }, { "epoch": 2.3099941161637387, "grad_norm": 0.34400717979285295, "learning_rate": 5.946419990360004e-06, "loss": 0.4634, "step": 3435 }, { "epoch": 2.310666554593595, "grad_norm": 0.3447090933374568, "learning_rate": 5.94425816067313e-06, "loss": 0.4685, "step": 3436 }, { "epoch": 2.311338993023451, "grad_norm": 0.3135329848238476, "learning_rate": 5.9420961479224315e-06, "loss": 0.4433, "step": 3437 }, { "epoch": 2.312011431453308, "grad_norm": 0.38012311880815325, "learning_rate": 5.9399339525270595e-06, "loss": 0.4714, "step": 3438 }, { "epoch": 2.312683869883164, "grad_norm": 0.3890841552249759, "learning_rate": 5.9377715749062e-06, "loss": 0.4611, "step": 3439 }, { "epoch": 2.31335630831302, "grad_norm": 0.3188847168667241, "learning_rate": 5.935609015479075e-06, "loss": 0.4671, "step": 3440 }, { "epoch": 2.3140287467428764, "grad_norm": 0.49850677405706556, "learning_rate": 5.9334462746649375e-06, "loss": 0.4444, "step": 3441 }, { "epoch": 2.3147011851727326, "grad_norm": 0.35339646675873393, "learning_rate": 5.9312833528830835e-06, "loss": 0.4606, "step": 3442 }, { "epoch": 2.315373623602589, "grad_norm": 0.3545069415592496, "learning_rate": 5.92912025055284e-06, "loss": 0.4423, "step": 3443 }, { "epoch": 2.316046062032445, "grad_norm": 0.3661937143726214, "learning_rate": 5.926956968093565e-06, "loss": 0.46, "step": 3444 }, { "epoch": 2.3167185004623017, "grad_norm": 0.3305650067264564, "learning_rate": 5.924793505924658e-06, "loss": 0.46, "step": 3445 }, { "epoch": 2.317390938892158, "grad_norm": 0.36544706722770126, "learning_rate": 5.9226298644655485e-06, "loss": 0.4495, "step": 3446 }, { "epoch": 2.318063377322014, "grad_norm": 0.3967132330665242, "learning_rate": 5.920466044135704e-06, "loss": 0.4619, "step": 3447 }, { "epoch": 2.3187358157518703, "grad_norm": 0.47266167596193176, "learning_rate": 5.918302045354626e-06, "loss": 0.4483, "step": 3448 }, { "epoch": 2.3194082541817265, "grad_norm": 0.3989441553994751, "learning_rate": 5.91613786854185e-06, "loss": 0.4614, "step": 3449 }, { "epoch": 2.3200806926115827, "grad_norm": 0.519376934857127, "learning_rate": 5.9139735141169455e-06, "loss": 0.4338, "step": 3450 }, { "epoch": 2.320753131041439, "grad_norm": 0.416453104062567, "learning_rate": 5.911808982499519e-06, "loss": 0.4563, "step": 3451 }, { "epoch": 2.3214255694712955, "grad_norm": 0.36849376223410835, "learning_rate": 5.909644274109204e-06, "loss": 0.4497, "step": 3452 }, { "epoch": 2.3220980079011517, "grad_norm": 0.33518302342042033, "learning_rate": 5.907479389365678e-06, "loss": 0.4594, "step": 3453 }, { "epoch": 2.322770446331008, "grad_norm": 0.38748584452757723, "learning_rate": 5.905314328688649e-06, "loss": 0.4528, "step": 3454 }, { "epoch": 2.323442884760864, "grad_norm": 0.44889342511131064, "learning_rate": 5.903149092497856e-06, "loss": 0.4646, "step": 3455 }, { "epoch": 2.3241153231907203, "grad_norm": 0.39168359932896285, "learning_rate": 5.900983681213075e-06, "loss": 0.4451, "step": 3456 }, { "epoch": 2.3247877616205765, "grad_norm": 0.33063191767706884, "learning_rate": 5.898818095254116e-06, "loss": 0.4552, "step": 3457 }, { "epoch": 2.3254602000504327, "grad_norm": 0.4200611590444878, "learning_rate": 5.896652335040825e-06, "loss": 0.4658, "step": 3458 }, { "epoch": 2.3261326384802894, "grad_norm": 0.3506150399617044, "learning_rate": 5.894486400993072e-06, "loss": 0.4623, "step": 3459 }, { "epoch": 2.3268050769101456, "grad_norm": 0.4364839190318039, "learning_rate": 5.8923202935307755e-06, "loss": 0.4401, "step": 3460 }, { "epoch": 2.327477515340002, "grad_norm": 0.3506604314068755, "learning_rate": 5.890154013073875e-06, "loss": 0.4564, "step": 3461 }, { "epoch": 2.328149953769858, "grad_norm": 0.3195057224021962, "learning_rate": 5.887987560042348e-06, "loss": 0.4447, "step": 3462 }, { "epoch": 2.328822392199714, "grad_norm": 0.4081013352389473, "learning_rate": 5.88582093485621e-06, "loss": 0.4575, "step": 3463 }, { "epoch": 2.3294948306295704, "grad_norm": 0.3509868634510859, "learning_rate": 5.883654137935501e-06, "loss": 0.4437, "step": 3464 }, { "epoch": 2.3301672690594266, "grad_norm": 0.3227618446614624, "learning_rate": 5.881487169700305e-06, "loss": 0.4718, "step": 3465 }, { "epoch": 2.330839707489283, "grad_norm": 0.7520884280657425, "learning_rate": 5.879320030570728e-06, "loss": 0.4716, "step": 3466 }, { "epoch": 2.3315121459191395, "grad_norm": 0.3747673865418648, "learning_rate": 5.877152720966917e-06, "loss": 0.4656, "step": 3467 }, { "epoch": 2.3321845843489957, "grad_norm": 0.36206969058631283, "learning_rate": 5.8749852413090465e-06, "loss": 0.4562, "step": 3468 }, { "epoch": 2.332857022778852, "grad_norm": 0.4123862983586637, "learning_rate": 5.872817592017331e-06, "loss": 0.4469, "step": 3469 }, { "epoch": 2.333529461208708, "grad_norm": 0.33733669496642177, "learning_rate": 5.870649773512011e-06, "loss": 0.4565, "step": 3470 }, { "epoch": 2.3342018996385643, "grad_norm": 0.42636616448320297, "learning_rate": 5.868481786213364e-06, "loss": 0.4468, "step": 3471 }, { "epoch": 2.3348743380684205, "grad_norm": 0.384115666486269, "learning_rate": 5.866313630541698e-06, "loss": 0.4437, "step": 3472 }, { "epoch": 2.3355467764982767, "grad_norm": 0.41716083426210254, "learning_rate": 5.864145306917355e-06, "loss": 0.4298, "step": 3473 }, { "epoch": 2.3362192149281333, "grad_norm": 0.3513270477111295, "learning_rate": 5.861976815760708e-06, "loss": 0.4543, "step": 3474 }, { "epoch": 2.3368916533579895, "grad_norm": 0.3658591809502371, "learning_rate": 5.859808157492164e-06, "loss": 0.4586, "step": 3475 }, { "epoch": 2.3375640917878457, "grad_norm": 0.3776366683658066, "learning_rate": 5.857639332532163e-06, "loss": 0.4575, "step": 3476 }, { "epoch": 2.338236530217702, "grad_norm": 0.36436135486161964, "learning_rate": 5.855470341301176e-06, "loss": 0.4386, "step": 3477 }, { "epoch": 2.338908968647558, "grad_norm": 0.3240074843175133, "learning_rate": 5.853301184219706e-06, "loss": 0.4498, "step": 3478 }, { "epoch": 2.3395814070774144, "grad_norm": 0.4090670479530033, "learning_rate": 5.851131861708288e-06, "loss": 0.473, "step": 3479 }, { "epoch": 2.3402538455072706, "grad_norm": 0.388472246416491, "learning_rate": 5.84896237418749e-06, "loss": 0.4737, "step": 3480 }, { "epoch": 2.3409262839371268, "grad_norm": 0.3360497237320795, "learning_rate": 5.846792722077911e-06, "loss": 0.4552, "step": 3481 }, { "epoch": 2.3415987223669834, "grad_norm": 0.341364864082169, "learning_rate": 5.8446229058001856e-06, "loss": 0.4576, "step": 3482 }, { "epoch": 2.3422711607968396, "grad_norm": 0.32697453039951657, "learning_rate": 5.842452925774974e-06, "loss": 0.4392, "step": 3483 }, { "epoch": 2.342943599226696, "grad_norm": 0.3980001762154398, "learning_rate": 5.840282782422972e-06, "loss": 0.4551, "step": 3484 }, { "epoch": 2.343616037656552, "grad_norm": 0.31000431629498004, "learning_rate": 5.838112476164908e-06, "loss": 0.4521, "step": 3485 }, { "epoch": 2.3442884760864082, "grad_norm": 0.31545657709084923, "learning_rate": 5.835942007421538e-06, "loss": 0.4641, "step": 3486 }, { "epoch": 2.3449609145162644, "grad_norm": 0.3679552244830917, "learning_rate": 5.833771376613654e-06, "loss": 0.4593, "step": 3487 }, { "epoch": 2.3456333529461206, "grad_norm": 0.31157320097469177, "learning_rate": 5.831600584162076e-06, "loss": 0.4561, "step": 3488 }, { "epoch": 2.3463057913759773, "grad_norm": 0.468879910189735, "learning_rate": 5.829429630487659e-06, "loss": 0.4376, "step": 3489 }, { "epoch": 2.3469782298058335, "grad_norm": 0.3771545389043398, "learning_rate": 5.827258516011284e-06, "loss": 0.4609, "step": 3490 }, { "epoch": 2.3476506682356897, "grad_norm": 0.39098431423034563, "learning_rate": 5.825087241153867e-06, "loss": 0.4572, "step": 3491 }, { "epoch": 2.348323106665546, "grad_norm": 0.32933039095880423, "learning_rate": 5.822915806336355e-06, "loss": 0.4492, "step": 3492 }, { "epoch": 2.348995545095402, "grad_norm": 0.33183804809411027, "learning_rate": 5.820744211979725e-06, "loss": 0.4514, "step": 3493 }, { "epoch": 2.3496679835252583, "grad_norm": 0.33945276045807116, "learning_rate": 5.818572458504983e-06, "loss": 0.442, "step": 3494 }, { "epoch": 2.3503404219551145, "grad_norm": 0.3534888950933072, "learning_rate": 5.816400546333171e-06, "loss": 0.4535, "step": 3495 }, { "epoch": 2.351012860384971, "grad_norm": 0.3096377373774302, "learning_rate": 5.814228475885357e-06, "loss": 0.4546, "step": 3496 }, { "epoch": 2.3516852988148274, "grad_norm": 0.32904977790765544, "learning_rate": 5.812056247582643e-06, "loss": 0.458, "step": 3497 }, { "epoch": 2.3523577372446836, "grad_norm": 0.38029547051000695, "learning_rate": 5.809883861846159e-06, "loss": 0.4387, "step": 3498 }, { "epoch": 2.3530301756745398, "grad_norm": 0.33342204975446, "learning_rate": 5.8077113190970644e-06, "loss": 0.4563, "step": 3499 }, { "epoch": 2.353702614104396, "grad_norm": 0.34230708427497747, "learning_rate": 5.8055386197565564e-06, "loss": 0.4583, "step": 3500 }, { "epoch": 2.354375052534252, "grad_norm": 0.2951783161108756, "learning_rate": 5.803365764245852e-06, "loss": 0.4583, "step": 3501 }, { "epoch": 2.3550474909641084, "grad_norm": 0.3079953132771035, "learning_rate": 5.801192752986208e-06, "loss": 0.4503, "step": 3502 }, { "epoch": 2.355719929393965, "grad_norm": 0.3570046765039688, "learning_rate": 5.799019586398904e-06, "loss": 0.4578, "step": 3503 }, { "epoch": 2.356392367823821, "grad_norm": 0.3177125091323911, "learning_rate": 5.796846264905255e-06, "loss": 0.4648, "step": 3504 }, { "epoch": 2.3570648062536774, "grad_norm": 0.3363125140497039, "learning_rate": 5.794672788926602e-06, "loss": 0.4623, "step": 3505 }, { "epoch": 2.3577372446835336, "grad_norm": 0.3167853911375675, "learning_rate": 5.792499158884322e-06, "loss": 0.4667, "step": 3506 }, { "epoch": 2.35840968311339, "grad_norm": 0.6921775759842348, "learning_rate": 5.790325375199812e-06, "loss": 0.4543, "step": 3507 }, { "epoch": 2.359082121543246, "grad_norm": 0.30236339661586337, "learning_rate": 5.788151438294509e-06, "loss": 0.4433, "step": 3508 }, { "epoch": 2.3597545599731022, "grad_norm": 0.40941230596207956, "learning_rate": 5.785977348589875e-06, "loss": 0.4474, "step": 3509 }, { "epoch": 2.360426998402959, "grad_norm": 0.5196512787688972, "learning_rate": 5.783803106507399e-06, "loss": 0.4373, "step": 3510 }, { "epoch": 2.361099436832815, "grad_norm": 0.2991559649141668, "learning_rate": 5.781628712468605e-06, "loss": 0.4538, "step": 3511 }, { "epoch": 2.3617718752626713, "grad_norm": 0.3825417809422422, "learning_rate": 5.779454166895042e-06, "loss": 0.447, "step": 3512 }, { "epoch": 2.3624443136925275, "grad_norm": 0.4769351696146306, "learning_rate": 5.777279470208294e-06, "loss": 0.4713, "step": 3513 }, { "epoch": 2.3631167521223837, "grad_norm": 0.30023152393422464, "learning_rate": 5.775104622829965e-06, "loss": 0.446, "step": 3514 }, { "epoch": 2.36378919055224, "grad_norm": 0.33153694221794716, "learning_rate": 5.7729296251817e-06, "loss": 0.4776, "step": 3515 }, { "epoch": 2.364461628982096, "grad_norm": 0.4047692204296205, "learning_rate": 5.770754477685162e-06, "loss": 0.4573, "step": 3516 }, { "epoch": 2.3651340674119528, "grad_norm": 0.40042975658998164, "learning_rate": 5.7685791807620505e-06, "loss": 0.4606, "step": 3517 }, { "epoch": 2.365806505841809, "grad_norm": 0.327532117308796, "learning_rate": 5.766403734834089e-06, "loss": 0.445, "step": 3518 }, { "epoch": 2.366478944271665, "grad_norm": 0.2704078193848059, "learning_rate": 5.764228140323036e-06, "loss": 0.4386, "step": 3519 }, { "epoch": 2.3671513827015214, "grad_norm": 0.3404895554400373, "learning_rate": 5.7620523976506715e-06, "loss": 0.4511, "step": 3520 }, { "epoch": 2.3678238211313776, "grad_norm": 0.4333037773933191, "learning_rate": 5.759876507238811e-06, "loss": 0.4511, "step": 3521 }, { "epoch": 2.3684962595612338, "grad_norm": 1.309105942254137, "learning_rate": 5.7577004695092945e-06, "loss": 0.4603, "step": 3522 }, { "epoch": 2.36916869799109, "grad_norm": 0.32328978955846294, "learning_rate": 5.755524284883989e-06, "loss": 0.4632, "step": 3523 }, { "epoch": 2.3698411364209466, "grad_norm": 0.37849843215721984, "learning_rate": 5.753347953784797e-06, "loss": 0.4409, "step": 3524 }, { "epoch": 2.370513574850803, "grad_norm": 0.3476793256077204, "learning_rate": 5.751171476633641e-06, "loss": 0.4486, "step": 3525 }, { "epoch": 2.371186013280659, "grad_norm": 0.3750262990064418, "learning_rate": 5.748994853852479e-06, "loss": 0.4647, "step": 3526 }, { "epoch": 2.3718584517105152, "grad_norm": 0.34635074214628625, "learning_rate": 5.746818085863292e-06, "loss": 0.4416, "step": 3527 }, { "epoch": 2.3725308901403714, "grad_norm": 0.38434110558426926, "learning_rate": 5.74464117308809e-06, "loss": 0.4564, "step": 3528 }, { "epoch": 2.3732033285702276, "grad_norm": 0.34561027779242004, "learning_rate": 5.742464115948915e-06, "loss": 0.4424, "step": 3529 }, { "epoch": 2.373875767000084, "grad_norm": 0.33266273991326856, "learning_rate": 5.740286914867832e-06, "loss": 0.4387, "step": 3530 }, { "epoch": 2.3745482054299405, "grad_norm": 0.42008457441870206, "learning_rate": 5.738109570266939e-06, "loss": 0.4511, "step": 3531 }, { "epoch": 2.3752206438597967, "grad_norm": 0.3368066077329263, "learning_rate": 5.735932082568356e-06, "loss": 0.4416, "step": 3532 }, { "epoch": 2.375893082289653, "grad_norm": 0.30953007129649845, "learning_rate": 5.733754452194235e-06, "loss": 0.4515, "step": 3533 }, { "epoch": 2.376565520719509, "grad_norm": 0.3584541636729375, "learning_rate": 5.731576679566754e-06, "loss": 0.4529, "step": 3534 }, { "epoch": 2.3772379591493653, "grad_norm": 0.3819730806896943, "learning_rate": 5.729398765108118e-06, "loss": 0.4545, "step": 3535 }, { "epoch": 2.3779103975792215, "grad_norm": 0.3393313272383021, "learning_rate": 5.727220709240563e-06, "loss": 0.4607, "step": 3536 }, { "epoch": 2.3785828360090777, "grad_norm": 0.3407277498003918, "learning_rate": 5.725042512386347e-06, "loss": 0.4556, "step": 3537 }, { "epoch": 2.3792552744389344, "grad_norm": 0.44335690258193794, "learning_rate": 5.72286417496776e-06, "loss": 0.4492, "step": 3538 }, { "epoch": 2.3799277128687906, "grad_norm": 0.360155250476559, "learning_rate": 5.720685697407118e-06, "loss": 0.4658, "step": 3539 }, { "epoch": 2.3806001512986468, "grad_norm": 0.4081717614851412, "learning_rate": 5.718507080126761e-06, "loss": 0.453, "step": 3540 }, { "epoch": 2.381272589728503, "grad_norm": 0.32668892308941555, "learning_rate": 5.71632832354906e-06, "loss": 0.4399, "step": 3541 }, { "epoch": 2.381945028158359, "grad_norm": 0.4516247995817048, "learning_rate": 5.714149428096413e-06, "loss": 0.4543, "step": 3542 }, { "epoch": 2.3826174665882154, "grad_norm": 0.35628091371878984, "learning_rate": 5.711970394191241e-06, "loss": 0.4426, "step": 3543 }, { "epoch": 2.3832899050180716, "grad_norm": 0.3202215438454765, "learning_rate": 5.709791222255996e-06, "loss": 0.4556, "step": 3544 }, { "epoch": 2.3839623434479282, "grad_norm": 0.3217462913974043, "learning_rate": 5.7076119127131545e-06, "loss": 0.4578, "step": 3545 }, { "epoch": 2.3846347818777844, "grad_norm": 0.3800753121649306, "learning_rate": 5.705432465985224e-06, "loss": 0.4543, "step": 3546 }, { "epoch": 2.3853072203076406, "grad_norm": 0.3745986832533898, "learning_rate": 5.703252882494728e-06, "loss": 0.4564, "step": 3547 }, { "epoch": 2.385979658737497, "grad_norm": 0.39388160698750074, "learning_rate": 5.70107316266423e-06, "loss": 0.4553, "step": 3548 }, { "epoch": 2.386652097167353, "grad_norm": 0.3693568996979446, "learning_rate": 5.698893306916309e-06, "loss": 0.4545, "step": 3549 }, { "epoch": 2.3873245355972093, "grad_norm": 0.3358204292471544, "learning_rate": 5.6967133156735785e-06, "loss": 0.4654, "step": 3550 }, { "epoch": 2.3879969740270655, "grad_norm": 0.3879923145296747, "learning_rate": 5.6945331893586705e-06, "loss": 0.4497, "step": 3551 }, { "epoch": 2.388669412456922, "grad_norm": 0.4702166619666767, "learning_rate": 5.69235292839425e-06, "loss": 0.4523, "step": 3552 }, { "epoch": 2.3893418508867783, "grad_norm": 0.35151582679612603, "learning_rate": 5.690172533203005e-06, "loss": 0.4548, "step": 3553 }, { "epoch": 2.3900142893166345, "grad_norm": 0.33623006086909196, "learning_rate": 5.68799200420765e-06, "loss": 0.4395, "step": 3554 }, { "epoch": 2.3906867277464907, "grad_norm": 0.3180533268004615, "learning_rate": 5.685811341830924e-06, "loss": 0.4521, "step": 3555 }, { "epoch": 2.391359166176347, "grad_norm": 0.3120770132369895, "learning_rate": 5.6836305464955945e-06, "loss": 0.4563, "step": 3556 }, { "epoch": 2.392031604606203, "grad_norm": 0.3313207875580303, "learning_rate": 5.681449618624454e-06, "loss": 0.4555, "step": 3557 }, { "epoch": 2.3927040430360593, "grad_norm": 0.34906684447334524, "learning_rate": 5.679268558640318e-06, "loss": 0.4399, "step": 3558 }, { "epoch": 2.393376481465916, "grad_norm": 0.3359379283214766, "learning_rate": 5.677087366966031e-06, "loss": 0.4499, "step": 3559 }, { "epoch": 2.394048919895772, "grad_norm": 0.34212649051370675, "learning_rate": 5.67490604402446e-06, "loss": 0.4536, "step": 3560 }, { "epoch": 2.3947213583256284, "grad_norm": 0.3968833539019725, "learning_rate": 5.672724590238502e-06, "loss": 0.4363, "step": 3561 }, { "epoch": 2.3953937967554846, "grad_norm": 0.33982599660113577, "learning_rate": 5.670543006031075e-06, "loss": 0.4588, "step": 3562 }, { "epoch": 2.396066235185341, "grad_norm": 0.3126079592635059, "learning_rate": 5.668361291825124e-06, "loss": 0.4459, "step": 3563 }, { "epoch": 2.396738673615197, "grad_norm": 0.3645127404080134, "learning_rate": 5.666179448043621e-06, "loss": 0.4499, "step": 3564 }, { "epoch": 2.397411112045053, "grad_norm": 0.3110482341885458, "learning_rate": 5.663997475109558e-06, "loss": 0.4678, "step": 3565 }, { "epoch": 2.39808355047491, "grad_norm": 0.32199104525287026, "learning_rate": 5.661815373445959e-06, "loss": 0.4614, "step": 3566 }, { "epoch": 2.398755988904766, "grad_norm": 0.8639742510000906, "learning_rate": 5.659633143475864e-06, "loss": 0.4617, "step": 3567 }, { "epoch": 2.3994284273346222, "grad_norm": 0.40995524737792327, "learning_rate": 5.6574507856223474e-06, "loss": 0.4533, "step": 3568 }, { "epoch": 2.4001008657644785, "grad_norm": 0.36762151659653114, "learning_rate": 5.655268300308502e-06, "loss": 0.4489, "step": 3569 }, { "epoch": 2.4007733041943347, "grad_norm": 0.33318188755161604, "learning_rate": 5.653085687957449e-06, "loss": 0.4482, "step": 3570 }, { "epoch": 2.401445742624191, "grad_norm": 0.28687255302183695, "learning_rate": 5.650902948992332e-06, "loss": 0.4448, "step": 3571 }, { "epoch": 2.402118181054047, "grad_norm": 0.3976726902254766, "learning_rate": 5.648720083836319e-06, "loss": 0.4601, "step": 3572 }, { "epoch": 2.4027906194839037, "grad_norm": 0.3193218278392546, "learning_rate": 5.646537092912603e-06, "loss": 0.4585, "step": 3573 }, { "epoch": 2.40346305791376, "grad_norm": 0.36282104810944193, "learning_rate": 5.644353976644402e-06, "loss": 0.4362, "step": 3574 }, { "epoch": 2.404135496343616, "grad_norm": 0.39340217861985266, "learning_rate": 5.642170735454959e-06, "loss": 0.4593, "step": 3575 }, { "epoch": 2.4048079347734723, "grad_norm": 0.3427481067975898, "learning_rate": 5.6399873697675385e-06, "loss": 0.4485, "step": 3576 }, { "epoch": 2.4054803732033285, "grad_norm": 0.405186505645815, "learning_rate": 5.637803880005431e-06, "loss": 0.4441, "step": 3577 }, { "epoch": 2.4061528116331847, "grad_norm": 0.3167912422521965, "learning_rate": 5.635620266591953e-06, "loss": 0.4543, "step": 3578 }, { "epoch": 2.406825250063041, "grad_norm": 0.35430272710261224, "learning_rate": 5.63343652995044e-06, "loss": 0.4806, "step": 3579 }, { "epoch": 2.4074976884928976, "grad_norm": 0.39827430646088413, "learning_rate": 5.631252670504255e-06, "loss": 0.4627, "step": 3580 }, { "epoch": 2.408170126922754, "grad_norm": 0.3536213396008051, "learning_rate": 5.629068688676785e-06, "loss": 0.4375, "step": 3581 }, { "epoch": 2.40884256535261, "grad_norm": 0.5413875717151687, "learning_rate": 5.626884584891438e-06, "loss": 0.4451, "step": 3582 }, { "epoch": 2.409515003782466, "grad_norm": 0.41928945425001374, "learning_rate": 5.624700359571649e-06, "loss": 0.4569, "step": 3583 }, { "epoch": 2.4101874422123224, "grad_norm": 0.3354908193449054, "learning_rate": 5.622516013140874e-06, "loss": 0.4486, "step": 3584 }, { "epoch": 2.4108598806421786, "grad_norm": 0.3648121009986923, "learning_rate": 5.620331546022591e-06, "loss": 0.4332, "step": 3585 }, { "epoch": 2.411532319072035, "grad_norm": 0.35255253553402593, "learning_rate": 5.618146958640309e-06, "loss": 0.4381, "step": 3586 }, { "epoch": 2.4122047575018915, "grad_norm": 0.333954692659307, "learning_rate": 5.615962251417551e-06, "loss": 0.4526, "step": 3587 }, { "epoch": 2.4128771959317477, "grad_norm": 0.37607079229227774, "learning_rate": 5.613777424777871e-06, "loss": 0.4626, "step": 3588 }, { "epoch": 2.413549634361604, "grad_norm": 0.38095934546167926, "learning_rate": 5.6115924791448385e-06, "loss": 0.4528, "step": 3589 }, { "epoch": 2.41422207279146, "grad_norm": 0.3993076314402837, "learning_rate": 5.609407414942053e-06, "loss": 0.4643, "step": 3590 }, { "epoch": 2.4148945112213163, "grad_norm": 0.39223329828299025, "learning_rate": 5.607222232593131e-06, "loss": 0.4641, "step": 3591 }, { "epoch": 2.4155669496511725, "grad_norm": 0.3591949686390438, "learning_rate": 5.605036932521718e-06, "loss": 0.4494, "step": 3592 }, { "epoch": 2.4162393880810287, "grad_norm": 0.3133711489094439, "learning_rate": 5.602851515151477e-06, "loss": 0.4429, "step": 3593 }, { "epoch": 2.4169118265108853, "grad_norm": 0.33802182001683545, "learning_rate": 5.600665980906099e-06, "loss": 0.4481, "step": 3594 }, { "epoch": 2.4175842649407415, "grad_norm": 0.35171603486105313, "learning_rate": 5.59848033020929e-06, "loss": 0.4791, "step": 3595 }, { "epoch": 2.4182567033705977, "grad_norm": 0.3591522629231598, "learning_rate": 5.596294563484787e-06, "loss": 0.4724, "step": 3596 }, { "epoch": 2.418929141800454, "grad_norm": 0.447799486858325, "learning_rate": 5.594108681156347e-06, "loss": 0.4495, "step": 3597 }, { "epoch": 2.41960158023031, "grad_norm": 0.3290264637523835, "learning_rate": 5.591922683647744e-06, "loss": 0.4495, "step": 3598 }, { "epoch": 2.4202740186601663, "grad_norm": 0.3379787091837133, "learning_rate": 5.589736571382781e-06, "loss": 0.4592, "step": 3599 }, { "epoch": 2.4209464570900225, "grad_norm": 0.32521436538097526, "learning_rate": 5.587550344785278e-06, "loss": 0.4619, "step": 3600 }, { "epoch": 2.421618895519879, "grad_norm": 0.3701978164103939, "learning_rate": 5.585364004279083e-06, "loss": 0.4436, "step": 3601 }, { "epoch": 2.4222913339497354, "grad_norm": 0.3847282583347662, "learning_rate": 5.583177550288063e-06, "loss": 0.457, "step": 3602 }, { "epoch": 2.4229637723795916, "grad_norm": 0.31382599299470876, "learning_rate": 5.580990983236105e-06, "loss": 0.4668, "step": 3603 }, { "epoch": 2.423636210809448, "grad_norm": 0.3389055913276392, "learning_rate": 5.5788043035471205e-06, "loss": 0.4558, "step": 3604 }, { "epoch": 2.424308649239304, "grad_norm": 1.0617375171131134, "learning_rate": 5.576617511645044e-06, "loss": 0.4664, "step": 3605 }, { "epoch": 2.42498108766916, "grad_norm": 0.3532471892301109, "learning_rate": 5.574430607953827e-06, "loss": 0.4551, "step": 3606 }, { "epoch": 2.4256535260990164, "grad_norm": 0.3346822353524649, "learning_rate": 5.572243592897449e-06, "loss": 0.4539, "step": 3607 }, { "epoch": 2.426325964528873, "grad_norm": 0.36237163805522626, "learning_rate": 5.570056466899904e-06, "loss": 0.4612, "step": 3608 }, { "epoch": 2.4269984029587293, "grad_norm": 0.3128470088619246, "learning_rate": 5.567869230385214e-06, "loss": 0.4469, "step": 3609 }, { "epoch": 2.4276708413885855, "grad_norm": 0.34861094074937193, "learning_rate": 5.565681883777418e-06, "loss": 0.4551, "step": 3610 }, { "epoch": 2.4283432798184417, "grad_norm": 0.4297843609897647, "learning_rate": 5.56349442750058e-06, "loss": 0.4449, "step": 3611 }, { "epoch": 2.429015718248298, "grad_norm": 0.3477250631144471, "learning_rate": 5.561306861978783e-06, "loss": 0.4398, "step": 3612 }, { "epoch": 2.429688156678154, "grad_norm": 0.3509821263654013, "learning_rate": 5.559119187636129e-06, "loss": 0.4441, "step": 3613 }, { "epoch": 2.4303605951080103, "grad_norm": 0.35737277401120054, "learning_rate": 5.556931404896747e-06, "loss": 0.4494, "step": 3614 }, { "epoch": 2.431033033537867, "grad_norm": 0.3625938554539421, "learning_rate": 5.554743514184781e-06, "loss": 0.4558, "step": 3615 }, { "epoch": 2.431705471967723, "grad_norm": 0.36630770612755065, "learning_rate": 5.552555515924398e-06, "loss": 0.4443, "step": 3616 }, { "epoch": 2.4323779103975793, "grad_norm": 0.33239617734472793, "learning_rate": 5.550367410539788e-06, "loss": 0.4478, "step": 3617 }, { "epoch": 2.4330503488274355, "grad_norm": 0.7626069649045053, "learning_rate": 5.5481791984551614e-06, "loss": 0.4503, "step": 3618 }, { "epoch": 2.4337227872572917, "grad_norm": 0.3158254591145617, "learning_rate": 5.545990880094747e-06, "loss": 0.4589, "step": 3619 }, { "epoch": 2.434395225687148, "grad_norm": 0.3522408038054907, "learning_rate": 5.543802455882793e-06, "loss": 0.4539, "step": 3620 }, { "epoch": 2.435067664117004, "grad_norm": 0.35852926157212767, "learning_rate": 5.541613926243573e-06, "loss": 0.4552, "step": 3621 }, { "epoch": 2.435740102546861, "grad_norm": 0.3039993342004631, "learning_rate": 5.539425291601378e-06, "loss": 0.4565, "step": 3622 }, { "epoch": 2.436412540976717, "grad_norm": 0.34123599831642026, "learning_rate": 5.537236552380518e-06, "loss": 0.4699, "step": 3623 }, { "epoch": 2.437084979406573, "grad_norm": 0.34602669178981266, "learning_rate": 5.535047709005327e-06, "loss": 0.4612, "step": 3624 }, { "epoch": 2.4377574178364294, "grad_norm": 0.38467285537375934, "learning_rate": 5.532858761900156e-06, "loss": 0.4437, "step": 3625 }, { "epoch": 2.4384298562662856, "grad_norm": 0.3498196149534507, "learning_rate": 5.5306697114893785e-06, "loss": 0.4524, "step": 3626 }, { "epoch": 2.439102294696142, "grad_norm": 0.3548262072706039, "learning_rate": 5.5284805581973854e-06, "loss": 0.4547, "step": 3627 }, { "epoch": 2.439774733125998, "grad_norm": 0.3799419694111064, "learning_rate": 5.526291302448589e-06, "loss": 0.4424, "step": 3628 }, { "epoch": 2.4404471715558547, "grad_norm": 0.9342712842749369, "learning_rate": 5.5241019446674235e-06, "loss": 0.4591, "step": 3629 }, { "epoch": 2.441119609985711, "grad_norm": 0.3836099289086837, "learning_rate": 5.52191248527834e-06, "loss": 0.46, "step": 3630 }, { "epoch": 2.441792048415567, "grad_norm": 0.3213034553405655, "learning_rate": 5.519722924705808e-06, "loss": 0.4524, "step": 3631 }, { "epoch": 2.4424644868454233, "grad_norm": 0.3457962788868057, "learning_rate": 5.517533263374322e-06, "loss": 0.4429, "step": 3632 }, { "epoch": 2.4431369252752795, "grad_norm": 0.30079475711553777, "learning_rate": 5.51534350170839e-06, "loss": 0.4608, "step": 3633 }, { "epoch": 2.4438093637051357, "grad_norm": 0.33057774220755387, "learning_rate": 5.513153640132543e-06, "loss": 0.4751, "step": 3634 }, { "epoch": 2.444481802134992, "grad_norm": 0.6793162709862243, "learning_rate": 5.5109636790713315e-06, "loss": 0.4369, "step": 3635 }, { "epoch": 2.445154240564848, "grad_norm": 0.30836110440964787, "learning_rate": 5.508773618949326e-06, "loss": 0.4536, "step": 3636 }, { "epoch": 2.4458266789947047, "grad_norm": 0.3242160993777931, "learning_rate": 5.50658346019111e-06, "loss": 0.4322, "step": 3637 }, { "epoch": 2.446499117424561, "grad_norm": 0.4923792011120225, "learning_rate": 5.504393203221294e-06, "loss": 0.451, "step": 3638 }, { "epoch": 2.447171555854417, "grad_norm": 0.3273613311767987, "learning_rate": 5.502202848464504e-06, "loss": 0.4545, "step": 3639 }, { "epoch": 2.4478439942842734, "grad_norm": 0.37767840103116296, "learning_rate": 5.500012396345385e-06, "loss": 0.4412, "step": 3640 }, { "epoch": 2.4485164327141296, "grad_norm": 0.39168523194271326, "learning_rate": 5.497821847288599e-06, "loss": 0.4675, "step": 3641 }, { "epoch": 2.4491888711439858, "grad_norm": 0.38593499549881277, "learning_rate": 5.4956312017188315e-06, "loss": 0.4667, "step": 3642 }, { "epoch": 2.449861309573842, "grad_norm": 0.3615940397816671, "learning_rate": 5.493440460060785e-06, "loss": 0.4512, "step": 3643 }, { "epoch": 2.450533748003698, "grad_norm": 0.3281921881336059, "learning_rate": 5.491249622739177e-06, "loss": 0.4531, "step": 3644 }, { "epoch": 2.451206186433555, "grad_norm": 0.34808784454053615, "learning_rate": 5.489058690178748e-06, "loss": 0.4414, "step": 3645 }, { "epoch": 2.451878624863411, "grad_norm": 0.35169534590584306, "learning_rate": 5.4868676628042524e-06, "loss": 0.4396, "step": 3646 }, { "epoch": 2.452551063293267, "grad_norm": 0.32127016672844627, "learning_rate": 5.484676541040471e-06, "loss": 0.4441, "step": 3647 }, { "epoch": 2.4532235017231234, "grad_norm": 0.42630039919354706, "learning_rate": 5.482485325312192e-06, "loss": 0.453, "step": 3648 }, { "epoch": 2.4538959401529796, "grad_norm": 0.3802912671793287, "learning_rate": 5.480294016044232e-06, "loss": 0.4543, "step": 3649 }, { "epoch": 2.454568378582836, "grad_norm": 0.44748190503299723, "learning_rate": 5.478102613661419e-06, "loss": 0.4687, "step": 3650 }, { "epoch": 2.455240817012692, "grad_norm": 0.3925107294918871, "learning_rate": 5.475911118588603e-06, "loss": 0.4542, "step": 3651 }, { "epoch": 2.4559132554425487, "grad_norm": 0.30168919377227377, "learning_rate": 5.473719531250648e-06, "loss": 0.4424, "step": 3652 }, { "epoch": 2.456585693872405, "grad_norm": 0.3977451541372308, "learning_rate": 5.471527852072439e-06, "loss": 0.4498, "step": 3653 }, { "epoch": 2.457258132302261, "grad_norm": 0.3721355148940516, "learning_rate": 5.46933608147888e-06, "loss": 0.4476, "step": 3654 }, { "epoch": 2.4579305707321173, "grad_norm": 0.35096306924828724, "learning_rate": 5.467144219894888e-06, "loss": 0.4413, "step": 3655 }, { "epoch": 2.4586030091619735, "grad_norm": 0.3745171435882103, "learning_rate": 5.4649522677454025e-06, "loss": 0.4475, "step": 3656 }, { "epoch": 2.4592754475918297, "grad_norm": 0.34740641576534054, "learning_rate": 5.4627602254553756e-06, "loss": 0.4531, "step": 3657 }, { "epoch": 2.459947886021686, "grad_norm": 0.31338871132628493, "learning_rate": 5.460568093449782e-06, "loss": 0.454, "step": 3658 }, { "epoch": 2.4606203244515426, "grad_norm": 0.3593929671118801, "learning_rate": 5.458375872153611e-06, "loss": 0.4474, "step": 3659 }, { "epoch": 2.4612927628813988, "grad_norm": 0.33493349981955667, "learning_rate": 5.45618356199187e-06, "loss": 0.4511, "step": 3660 }, { "epoch": 2.461965201311255, "grad_norm": 0.3787649160828917, "learning_rate": 5.453991163389583e-06, "loss": 0.4399, "step": 3661 }, { "epoch": 2.462637639741111, "grad_norm": 0.32480355204924527, "learning_rate": 5.451798676771793e-06, "loss": 0.4438, "step": 3662 }, { "epoch": 2.4633100781709674, "grad_norm": 0.31279420879956654, "learning_rate": 5.449606102563554e-06, "loss": 0.4452, "step": 3663 }, { "epoch": 2.4639825166008236, "grad_norm": 0.28327007386162856, "learning_rate": 5.447413441189945e-06, "loss": 0.4472, "step": 3664 }, { "epoch": 2.4646549550306798, "grad_norm": 0.3486201881078406, "learning_rate": 5.445220693076058e-06, "loss": 0.4563, "step": 3665 }, { "epoch": 2.4653273934605364, "grad_norm": 0.36849582646330364, "learning_rate": 5.443027858647002e-06, "loss": 0.4522, "step": 3666 }, { "epoch": 2.4659998318903926, "grad_norm": 0.3301145167435137, "learning_rate": 5.440834938327905e-06, "loss": 0.4584, "step": 3667 }, { "epoch": 2.466672270320249, "grad_norm": 0.3003337613365606, "learning_rate": 5.438641932543905e-06, "loss": 0.4464, "step": 3668 }, { "epoch": 2.467344708750105, "grad_norm": 0.3347995636249391, "learning_rate": 5.436448841720166e-06, "loss": 0.435, "step": 3669 }, { "epoch": 2.4680171471799612, "grad_norm": 0.30100475327165044, "learning_rate": 5.43425566628186e-06, "loss": 0.4565, "step": 3670 }, { "epoch": 2.4686895856098174, "grad_norm": 0.38738774674073373, "learning_rate": 5.432062406654182e-06, "loss": 0.4513, "step": 3671 }, { "epoch": 2.4693620240396736, "grad_norm": 0.3242614621730323, "learning_rate": 5.4298690632623375e-06, "loss": 0.454, "step": 3672 }, { "epoch": 2.4700344624695303, "grad_norm": 0.3905562361155058, "learning_rate": 5.427675636531552e-06, "loss": 0.444, "step": 3673 }, { "epoch": 2.4707069008993865, "grad_norm": 0.34570382106032044, "learning_rate": 5.425482126887067e-06, "loss": 0.4396, "step": 3674 }, { "epoch": 2.4713793393292427, "grad_norm": 0.3403550891713755, "learning_rate": 5.423288534754141e-06, "loss": 0.4522, "step": 3675 }, { "epoch": 2.472051777759099, "grad_norm": 0.3318834277282211, "learning_rate": 5.421094860558045e-06, "loss": 0.4388, "step": 3676 }, { "epoch": 2.472724216188955, "grad_norm": 0.29745058737843016, "learning_rate": 5.4189011047240655e-06, "loss": 0.4707, "step": 3677 }, { "epoch": 2.4733966546188113, "grad_norm": 0.3149220065122759, "learning_rate": 5.416707267677512e-06, "loss": 0.4372, "step": 3678 }, { "epoch": 2.4740690930486675, "grad_norm": 0.32808004579387384, "learning_rate": 5.414513349843702e-06, "loss": 0.4561, "step": 3679 }, { "epoch": 2.474741531478524, "grad_norm": 0.38005276723555714, "learning_rate": 5.412319351647971e-06, "loss": 0.4572, "step": 3680 }, { "epoch": 2.4754139699083804, "grad_norm": 0.32430615453417455, "learning_rate": 5.410125273515672e-06, "loss": 0.4377, "step": 3681 }, { "epoch": 2.4760864083382366, "grad_norm": 0.29363910335500876, "learning_rate": 5.407931115872169e-06, "loss": 0.4345, "step": 3682 }, { "epoch": 2.4767588467680928, "grad_norm": 0.390361054176332, "learning_rate": 5.405736879142849e-06, "loss": 0.45, "step": 3683 }, { "epoch": 2.477431285197949, "grad_norm": 0.36684773614787974, "learning_rate": 5.403542563753108e-06, "loss": 0.4363, "step": 3684 }, { "epoch": 2.478103723627805, "grad_norm": 0.3920056483082055, "learning_rate": 5.401348170128359e-06, "loss": 0.4523, "step": 3685 }, { "epoch": 2.4787761620576614, "grad_norm": 1.4669592281148975, "learning_rate": 5.399153698694029e-06, "loss": 0.4544, "step": 3686 }, { "epoch": 2.479448600487518, "grad_norm": 0.33742803366775703, "learning_rate": 5.396959149875564e-06, "loss": 0.4525, "step": 3687 }, { "epoch": 2.4801210389173742, "grad_norm": 0.43895102427182286, "learning_rate": 5.3947645240984185e-06, "loss": 0.4398, "step": 3688 }, { "epoch": 2.4807934773472304, "grad_norm": 0.3213947832025373, "learning_rate": 5.392569821788069e-06, "loss": 0.4261, "step": 3689 }, { "epoch": 2.4814659157770866, "grad_norm": 0.35836800990492956, "learning_rate": 5.390375043370002e-06, "loss": 0.4289, "step": 3690 }, { "epoch": 2.482138354206943, "grad_norm": 0.41082932360377455, "learning_rate": 5.388180189269722e-06, "loss": 0.466, "step": 3691 }, { "epoch": 2.482810792636799, "grad_norm": 0.3616481356253951, "learning_rate": 5.385985259912745e-06, "loss": 0.4434, "step": 3692 }, { "epoch": 2.4834832310666553, "grad_norm": 0.3233270418518311, "learning_rate": 5.383790255724604e-06, "loss": 0.4624, "step": 3693 }, { "epoch": 2.484155669496512, "grad_norm": 0.3773964032169205, "learning_rate": 5.381595177130844e-06, "loss": 0.4633, "step": 3694 }, { "epoch": 2.484828107926368, "grad_norm": 0.40606903638666175, "learning_rate": 5.379400024557029e-06, "loss": 0.4504, "step": 3695 }, { "epoch": 2.4855005463562243, "grad_norm": 0.35668009511381027, "learning_rate": 5.37720479842873e-06, "loss": 0.4459, "step": 3696 }, { "epoch": 2.4861729847860805, "grad_norm": 0.34781885195436796, "learning_rate": 5.37500949917154e-06, "loss": 0.4368, "step": 3697 }, { "epoch": 2.4868454232159367, "grad_norm": 0.3977243510705404, "learning_rate": 5.3728141272110625e-06, "loss": 0.4397, "step": 3698 }, { "epoch": 2.487517861645793, "grad_norm": 0.38920376735507745, "learning_rate": 5.370618682972914e-06, "loss": 0.4699, "step": 3699 }, { "epoch": 2.488190300075649, "grad_norm": 0.535365852410878, "learning_rate": 5.368423166882729e-06, "loss": 0.4579, "step": 3700 }, { "epoch": 2.4888627385055058, "grad_norm": 0.3562149949820014, "learning_rate": 5.366227579366151e-06, "loss": 0.4407, "step": 3701 }, { "epoch": 2.489535176935362, "grad_norm": 0.3354089482329607, "learning_rate": 5.364031920848841e-06, "loss": 0.4611, "step": 3702 }, { "epoch": 2.490207615365218, "grad_norm": 0.4049203957942453, "learning_rate": 5.361836191756471e-06, "loss": 0.4558, "step": 3703 }, { "epoch": 2.4908800537950744, "grad_norm": 0.3010025011844406, "learning_rate": 5.35964039251473e-06, "loss": 0.4443, "step": 3704 }, { "epoch": 2.4915524922249306, "grad_norm": 0.3885268762782305, "learning_rate": 5.3574445235493165e-06, "loss": 0.4522, "step": 3705 }, { "epoch": 2.492224930654787, "grad_norm": 0.9568793935382124, "learning_rate": 5.355248585285946e-06, "loss": 0.4513, "step": 3706 }, { "epoch": 2.492897369084643, "grad_norm": 0.4198708586420078, "learning_rate": 5.353052578150346e-06, "loss": 0.454, "step": 3707 }, { "epoch": 2.4935698075144996, "grad_norm": 0.3116920558206328, "learning_rate": 5.350856502568258e-06, "loss": 0.4531, "step": 3708 }, { "epoch": 2.494242245944356, "grad_norm": 0.3530569433463639, "learning_rate": 5.348660358965438e-06, "loss": 0.4373, "step": 3709 }, { "epoch": 2.494914684374212, "grad_norm": 0.6219596914149117, "learning_rate": 5.346464147767649e-06, "loss": 0.47, "step": 3710 }, { "epoch": 2.4955871228040682, "grad_norm": 0.7200298627812961, "learning_rate": 5.344267869400676e-06, "loss": 0.4562, "step": 3711 }, { "epoch": 2.4962595612339245, "grad_norm": 0.3831329382165244, "learning_rate": 5.342071524290311e-06, "loss": 0.4503, "step": 3712 }, { "epoch": 2.4969319996637807, "grad_norm": 0.3730820369626398, "learning_rate": 5.339875112862361e-06, "loss": 0.4544, "step": 3713 }, { "epoch": 2.497604438093637, "grad_norm": 0.9308006610674574, "learning_rate": 5.337678635542641e-06, "loss": 0.4455, "step": 3714 }, { "epoch": 2.4982768765234935, "grad_norm": 0.3900152218356268, "learning_rate": 5.335482092756991e-06, "loss": 0.4519, "step": 3715 }, { "epoch": 2.4989493149533497, "grad_norm": 0.34510470883456346, "learning_rate": 5.33328548493125e-06, "loss": 0.4614, "step": 3716 }, { "epoch": 2.499621753383206, "grad_norm": 0.39301377708235846, "learning_rate": 5.331088812491278e-06, "loss": 0.4376, "step": 3717 }, { "epoch": 2.500294191813062, "grad_norm": 0.4132391443171161, "learning_rate": 5.3288920758629445e-06, "loss": 0.4611, "step": 3718 }, { "epoch": 2.5009666302429183, "grad_norm": 0.30379365709876277, "learning_rate": 5.326695275472132e-06, "loss": 0.462, "step": 3719 }, { "epoch": 2.5016390686727745, "grad_norm": 0.3063310686320014, "learning_rate": 5.324498411744737e-06, "loss": 0.4438, "step": 3720 }, { "epoch": 2.5023115071026307, "grad_norm": 0.3687412953586924, "learning_rate": 5.322301485106663e-06, "loss": 0.4485, "step": 3721 }, { "epoch": 2.5029839455324874, "grad_norm": 0.30038806642290977, "learning_rate": 5.320104495983831e-06, "loss": 0.448, "step": 3722 }, { "epoch": 2.5036563839623436, "grad_norm": 0.4042132931618547, "learning_rate": 5.317907444802174e-06, "loss": 0.4486, "step": 3723 }, { "epoch": 2.5043288223922, "grad_norm": 0.38294548405814394, "learning_rate": 5.315710331987634e-06, "loss": 0.459, "step": 3724 }, { "epoch": 2.505001260822056, "grad_norm": 0.2917400221339506, "learning_rate": 5.313513157966166e-06, "loss": 0.4398, "step": 3725 }, { "epoch": 2.505673699251912, "grad_norm": 0.30874946783443197, "learning_rate": 5.311315923163739e-06, "loss": 0.4611, "step": 3726 }, { "epoch": 2.5063461376817684, "grad_norm": 0.32034127394020057, "learning_rate": 5.30911862800633e-06, "loss": 0.4564, "step": 3727 }, { "epoch": 2.5070185761116246, "grad_norm": 0.393814502245678, "learning_rate": 5.306921272919931e-06, "loss": 0.4548, "step": 3728 }, { "epoch": 2.5076910145414812, "grad_norm": 0.3589397375038359, "learning_rate": 5.304723858330544e-06, "loss": 0.4525, "step": 3729 }, { "epoch": 2.5083634529713374, "grad_norm": 0.3886193975048878, "learning_rate": 5.302526384664182e-06, "loss": 0.4326, "step": 3730 }, { "epoch": 2.5090358914011937, "grad_norm": 0.3619242724868831, "learning_rate": 5.300328852346872e-06, "loss": 0.4603, "step": 3731 }, { "epoch": 2.50970832983105, "grad_norm": 0.3808986117863621, "learning_rate": 5.29813126180465e-06, "loss": 0.4587, "step": 3732 }, { "epoch": 2.510380768260906, "grad_norm": 0.3818801429897, "learning_rate": 5.295933613463565e-06, "loss": 0.4528, "step": 3733 }, { "epoch": 2.5110532066907623, "grad_norm": 0.3185246625786046, "learning_rate": 5.2937359077496756e-06, "loss": 0.4388, "step": 3734 }, { "epoch": 2.5117256451206185, "grad_norm": 0.33417761556505876, "learning_rate": 5.291538145089051e-06, "loss": 0.4422, "step": 3735 }, { "epoch": 2.512398083550475, "grad_norm": 0.3543133452008666, "learning_rate": 5.289340325907774e-06, "loss": 0.4375, "step": 3736 }, { "epoch": 2.5130705219803313, "grad_norm": 0.45034041931345337, "learning_rate": 5.287142450631937e-06, "loss": 0.4511, "step": 3737 }, { "epoch": 2.5137429604101875, "grad_norm": 0.3341749624172436, "learning_rate": 5.284944519687641e-06, "loss": 0.4419, "step": 3738 }, { "epoch": 2.5144153988400437, "grad_norm": 0.31416689317385305, "learning_rate": 5.2827465335010034e-06, "loss": 0.4485, "step": 3739 }, { "epoch": 2.5150878372699, "grad_norm": 0.32352161394960716, "learning_rate": 5.280548492498145e-06, "loss": 0.4637, "step": 3740 }, { "epoch": 2.515760275699756, "grad_norm": 0.3596611050259769, "learning_rate": 5.278350397105205e-06, "loss": 0.4492, "step": 3741 }, { "epoch": 2.5164327141296123, "grad_norm": 0.38533801199013906, "learning_rate": 5.276152247748329e-06, "loss": 0.4591, "step": 3742 }, { "epoch": 2.517105152559469, "grad_norm": 0.39893128818506984, "learning_rate": 5.27395404485367e-06, "loss": 0.4515, "step": 3743 }, { "epoch": 2.517777590989325, "grad_norm": 0.3601291730744921, "learning_rate": 5.271755788847399e-06, "loss": 0.4331, "step": 3744 }, { "epoch": 2.5184500294191814, "grad_norm": 0.35341859376319246, "learning_rate": 5.269557480155689e-06, "loss": 0.4467, "step": 3745 }, { "epoch": 2.5191224678490376, "grad_norm": 0.34460484487452686, "learning_rate": 5.267359119204729e-06, "loss": 0.4619, "step": 3746 }, { "epoch": 2.519794906278894, "grad_norm": 0.3163956703666074, "learning_rate": 5.2651607064207174e-06, "loss": 0.4456, "step": 3747 }, { "epoch": 2.52046734470875, "grad_norm": 0.38107463823791704, "learning_rate": 5.262962242229861e-06, "loss": 0.4604, "step": 3748 }, { "epoch": 2.521139783138606, "grad_norm": 0.354401592955316, "learning_rate": 5.260763727058377e-06, "loss": 0.4598, "step": 3749 }, { "epoch": 2.521812221568463, "grad_norm": 0.30038110187928374, "learning_rate": 5.258565161332493e-06, "loss": 0.4312, "step": 3750 }, { "epoch": 2.522484659998319, "grad_norm": 0.35161551300999627, "learning_rate": 5.256366545478444e-06, "loss": 0.4369, "step": 3751 }, { "epoch": 2.5231570984281753, "grad_norm": 0.4154141265683533, "learning_rate": 5.2541678799224795e-06, "loss": 0.4545, "step": 3752 }, { "epoch": 2.5238295368580315, "grad_norm": 0.2843520409523974, "learning_rate": 5.251969165090857e-06, "loss": 0.4477, "step": 3753 }, { "epoch": 2.5245019752878877, "grad_norm": 0.3356459667390433, "learning_rate": 5.2497704014098375e-06, "loss": 0.4437, "step": 3754 }, { "epoch": 2.525174413717744, "grad_norm": 0.40533334967123485, "learning_rate": 5.2475715893057e-06, "loss": 0.4461, "step": 3755 }, { "epoch": 2.5258468521476, "grad_norm": 0.351820305830887, "learning_rate": 5.24537272920473e-06, "loss": 0.4497, "step": 3756 }, { "epoch": 2.5265192905774567, "grad_norm": 0.36700532615869463, "learning_rate": 5.243173821533221e-06, "loss": 0.4603, "step": 3757 }, { "epoch": 2.527191729007313, "grad_norm": 0.36421125558057277, "learning_rate": 5.2409748667174746e-06, "loss": 0.4508, "step": 3758 }, { "epoch": 2.527864167437169, "grad_norm": 0.3593637659685638, "learning_rate": 5.238775865183805e-06, "loss": 0.4319, "step": 3759 }, { "epoch": 2.5285366058670253, "grad_norm": 0.451127830436243, "learning_rate": 5.236576817358533e-06, "loss": 0.4504, "step": 3760 }, { "epoch": 2.5292090442968815, "grad_norm": 0.3612158552582543, "learning_rate": 5.2343777236679905e-06, "loss": 0.4432, "step": 3761 }, { "epoch": 2.5298814827267377, "grad_norm": 0.35963170197322686, "learning_rate": 5.232178584538515e-06, "loss": 0.4586, "step": 3762 }, { "epoch": 2.530553921156594, "grad_norm": 0.3513240604985171, "learning_rate": 5.229979400396455e-06, "loss": 0.4506, "step": 3763 }, { "epoch": 2.5312263595864506, "grad_norm": 0.40705859055602794, "learning_rate": 5.227780171668169e-06, "loss": 0.4413, "step": 3764 }, { "epoch": 2.531898798016307, "grad_norm": 0.38056720078199147, "learning_rate": 5.225580898780022e-06, "loss": 0.449, "step": 3765 }, { "epoch": 2.532571236446163, "grad_norm": 0.3344126266364116, "learning_rate": 5.223381582158388e-06, "loss": 0.4499, "step": 3766 }, { "epoch": 2.533243674876019, "grad_norm": 0.3450151129401091, "learning_rate": 5.2211822222296495e-06, "loss": 0.4584, "step": 3767 }, { "epoch": 2.5339161133058754, "grad_norm": 0.3125769248548215, "learning_rate": 5.218982819420199e-06, "loss": 0.4434, "step": 3768 }, { "epoch": 2.5345885517357316, "grad_norm": 0.3459655360483346, "learning_rate": 5.216783374156432e-06, "loss": 0.4543, "step": 3769 }, { "epoch": 2.535260990165588, "grad_norm": 0.33743900715413994, "learning_rate": 5.21458388686476e-06, "loss": 0.4444, "step": 3770 }, { "epoch": 2.5359334285954445, "grad_norm": 0.34409415630074597, "learning_rate": 5.212384357971599e-06, "loss": 0.4341, "step": 3771 }, { "epoch": 2.5366058670253, "grad_norm": 0.3347872267650656, "learning_rate": 5.21018478790337e-06, "loss": 0.4525, "step": 3772 }, { "epoch": 2.537278305455157, "grad_norm": 0.32985605985546435, "learning_rate": 5.207985177086506e-06, "loss": 0.4302, "step": 3773 }, { "epoch": 2.537950743885013, "grad_norm": 0.3319653933091309, "learning_rate": 5.205785525947445e-06, "loss": 0.4317, "step": 3774 }, { "epoch": 2.5386231823148693, "grad_norm": 0.3318287561536541, "learning_rate": 5.20358583491264e-06, "loss": 0.4572, "step": 3775 }, { "epoch": 2.5392956207447255, "grad_norm": 0.2725473101671124, "learning_rate": 5.20138610440854e-06, "loss": 0.4362, "step": 3776 }, { "epoch": 2.5399680591745817, "grad_norm": 0.6860141812033361, "learning_rate": 5.199186334861612e-06, "loss": 0.4323, "step": 3777 }, { "epoch": 2.5406404976044383, "grad_norm": 0.3415301890154741, "learning_rate": 5.1969865266983245e-06, "loss": 0.4566, "step": 3778 }, { "epoch": 2.541312936034294, "grad_norm": 0.3686557719090204, "learning_rate": 5.1947866803451555e-06, "loss": 0.4478, "step": 3779 }, { "epoch": 2.5419853744641507, "grad_norm": 0.35319179579920057, "learning_rate": 5.192586796228589e-06, "loss": 0.4488, "step": 3780 }, { "epoch": 2.542657812894007, "grad_norm": 0.3668355608939105, "learning_rate": 5.190386874775123e-06, "loss": 0.4484, "step": 3781 }, { "epoch": 2.543330251323863, "grad_norm": 0.34965355223710987, "learning_rate": 5.1881869164112516e-06, "loss": 0.4337, "step": 3782 }, { "epoch": 2.5440026897537193, "grad_norm": 0.35308599479483654, "learning_rate": 5.185986921563485e-06, "loss": 0.4373, "step": 3783 }, { "epoch": 2.5446751281835756, "grad_norm": 0.324205088399401, "learning_rate": 5.183786890658337e-06, "loss": 0.4378, "step": 3784 }, { "epoch": 2.545347566613432, "grad_norm": 0.380163684655463, "learning_rate": 5.181586824122327e-06, "loss": 0.4706, "step": 3785 }, { "epoch": 2.546020005043288, "grad_norm": 0.33074437109441635, "learning_rate": 5.1793867223819846e-06, "loss": 0.4704, "step": 3786 }, { "epoch": 2.5466924434731446, "grad_norm": 0.3292609650828323, "learning_rate": 5.177186585863845e-06, "loss": 0.45, "step": 3787 }, { "epoch": 2.547364881903001, "grad_norm": 0.4139902078750704, "learning_rate": 5.1749864149944485e-06, "loss": 0.4398, "step": 3788 }, { "epoch": 2.548037320332857, "grad_norm": 0.5993488931079183, "learning_rate": 5.172786210200343e-06, "loss": 0.4486, "step": 3789 }, { "epoch": 2.548709758762713, "grad_norm": 0.42042569553752296, "learning_rate": 5.170585971908087e-06, "loss": 0.4456, "step": 3790 }, { "epoch": 2.5493821971925694, "grad_norm": 0.3823238256484114, "learning_rate": 5.168385700544239e-06, "loss": 0.4457, "step": 3791 }, { "epoch": 2.550054635622426, "grad_norm": 0.37123219611674746, "learning_rate": 5.166185396535366e-06, "loss": 0.4599, "step": 3792 }, { "epoch": 2.550727074052282, "grad_norm": 0.36751036038349477, "learning_rate": 5.1639850603080435e-06, "loss": 0.4412, "step": 3793 }, { "epoch": 2.5513995124821385, "grad_norm": 0.28656948143169914, "learning_rate": 5.16178469228885e-06, "loss": 0.4557, "step": 3794 }, { "epoch": 2.5520719509119947, "grad_norm": 0.32446866213313197, "learning_rate": 5.159584292904375e-06, "loss": 0.4583, "step": 3795 }, { "epoch": 2.552744389341851, "grad_norm": 0.5964162794538592, "learning_rate": 5.157383862581209e-06, "loss": 0.4458, "step": 3796 }, { "epoch": 2.553416827771707, "grad_norm": 0.3297847044616623, "learning_rate": 5.155183401745953e-06, "loss": 0.4544, "step": 3797 }, { "epoch": 2.5540892662015633, "grad_norm": 0.3327996968439367, "learning_rate": 5.152982910825207e-06, "loss": 0.437, "step": 3798 }, { "epoch": 2.55476170463142, "grad_norm": 0.3178463020120286, "learning_rate": 5.150782390245586e-06, "loss": 0.4384, "step": 3799 }, { "epoch": 2.5554341430612757, "grad_norm": 0.41892183789374043, "learning_rate": 5.148581840433703e-06, "loss": 0.447, "step": 3800 }, { "epoch": 2.5561065814911323, "grad_norm": 0.39518782274915, "learning_rate": 5.1463812618161815e-06, "loss": 0.4528, "step": 3801 }, { "epoch": 2.5567790199209885, "grad_norm": 0.32549332411081666, "learning_rate": 5.144180654819647e-06, "loss": 0.4519, "step": 3802 }, { "epoch": 2.5574514583508448, "grad_norm": 0.38308949144270205, "learning_rate": 5.1419800198707335e-06, "loss": 0.4513, "step": 3803 }, { "epoch": 2.558123896780701, "grad_norm": 0.3117899154840055, "learning_rate": 5.13977935739608e-06, "loss": 0.4426, "step": 3804 }, { "epoch": 2.558796335210557, "grad_norm": 0.38078454486838426, "learning_rate": 5.13757866782233e-06, "loss": 0.4416, "step": 3805 }, { "epoch": 2.559468773640414, "grad_norm": 0.3892915045158023, "learning_rate": 5.135377951576129e-06, "loss": 0.4484, "step": 3806 }, { "epoch": 2.5601412120702696, "grad_norm": 0.3922351660486267, "learning_rate": 5.133177209084135e-06, "loss": 0.4734, "step": 3807 }, { "epoch": 2.560813650500126, "grad_norm": 0.3145125925952972, "learning_rate": 5.130976440773007e-06, "loss": 0.4418, "step": 3808 }, { "epoch": 2.5614860889299824, "grad_norm": 0.33503272386293115, "learning_rate": 5.128775647069406e-06, "loss": 0.4403, "step": 3809 }, { "epoch": 2.5621585273598386, "grad_norm": 0.3206081131155908, "learning_rate": 5.126574828400005e-06, "loss": 0.4657, "step": 3810 }, { "epoch": 2.562830965789695, "grad_norm": 0.3677827674996036, "learning_rate": 5.124373985191473e-06, "loss": 0.4607, "step": 3811 }, { "epoch": 2.563503404219551, "grad_norm": 0.3313057068776185, "learning_rate": 5.122173117870493e-06, "loss": 0.439, "step": 3812 }, { "epoch": 2.5641758426494077, "grad_norm": 0.33844242490608817, "learning_rate": 5.119972226863746e-06, "loss": 0.4511, "step": 3813 }, { "epoch": 2.5648482810792634, "grad_norm": 0.46303435953046407, "learning_rate": 5.117771312597921e-06, "loss": 0.4489, "step": 3814 }, { "epoch": 2.56552071950912, "grad_norm": 0.3947544136560684, "learning_rate": 5.11557037549971e-06, "loss": 0.4497, "step": 3815 }, { "epoch": 2.5661931579389763, "grad_norm": 0.32707548217635574, "learning_rate": 5.113369415995811e-06, "loss": 0.4528, "step": 3816 }, { "epoch": 2.5668655963688325, "grad_norm": 0.3493213925953552, "learning_rate": 5.1111684345129216e-06, "loss": 0.4442, "step": 3817 }, { "epoch": 2.5675380347986887, "grad_norm": 0.32379197184450675, "learning_rate": 5.10896743147775e-06, "loss": 0.4392, "step": 3818 }, { "epoch": 2.568210473228545, "grad_norm": 0.3734254339295873, "learning_rate": 5.106766407317005e-06, "loss": 0.4531, "step": 3819 }, { "epoch": 2.5688829116584015, "grad_norm": 0.40420746016090003, "learning_rate": 5.104565362457402e-06, "loss": 0.4493, "step": 3820 }, { "epoch": 2.5695553500882573, "grad_norm": 0.35353410656708223, "learning_rate": 5.102364297325658e-06, "loss": 0.4496, "step": 3821 }, { "epoch": 2.570227788518114, "grad_norm": 0.3325958370960057, "learning_rate": 5.100163212348492e-06, "loss": 0.4439, "step": 3822 }, { "epoch": 2.57090022694797, "grad_norm": 0.3045565716634522, "learning_rate": 5.097962107952634e-06, "loss": 0.4547, "step": 3823 }, { "epoch": 2.5715726653778264, "grad_norm": 0.32827460477458903, "learning_rate": 5.0957609845648095e-06, "loss": 0.4599, "step": 3824 }, { "epoch": 2.5722451038076826, "grad_norm": 0.3896500200822667, "learning_rate": 5.093559842611753e-06, "loss": 0.4547, "step": 3825 }, { "epoch": 2.5729175422375388, "grad_norm": 0.3558175143222838, "learning_rate": 5.091358682520201e-06, "loss": 0.4546, "step": 3826 }, { "epoch": 2.5735899806673954, "grad_norm": 0.49023083575365556, "learning_rate": 5.089157504716892e-06, "loss": 0.4364, "step": 3827 }, { "epoch": 2.574262419097251, "grad_norm": 0.46450442763910227, "learning_rate": 5.086956309628571e-06, "loss": 0.4422, "step": 3828 }, { "epoch": 2.574934857527108, "grad_norm": 0.2988745132917568, "learning_rate": 5.084755097681986e-06, "loss": 0.4425, "step": 3829 }, { "epoch": 2.575607295956964, "grad_norm": 0.4269834171435638, "learning_rate": 5.0825538693038855e-06, "loss": 0.451, "step": 3830 }, { "epoch": 2.5762797343868202, "grad_norm": 0.3509948140622454, "learning_rate": 5.080352624921024e-06, "loss": 0.4457, "step": 3831 }, { "epoch": 2.5769521728166764, "grad_norm": 0.3429616383589686, "learning_rate": 5.078151364960155e-06, "loss": 0.4581, "step": 3832 }, { "epoch": 2.5776246112465326, "grad_norm": 0.3287023293291114, "learning_rate": 5.075950089848042e-06, "loss": 0.4388, "step": 3833 }, { "epoch": 2.578297049676389, "grad_norm": 0.39127126728235045, "learning_rate": 5.0737488000114445e-06, "loss": 0.4593, "step": 3834 }, { "epoch": 2.578969488106245, "grad_norm": 0.3211215536959106, "learning_rate": 5.0715474958771285e-06, "loss": 0.4402, "step": 3835 }, { "epoch": 2.5796419265361017, "grad_norm": 0.4274164835536321, "learning_rate": 5.069346177871861e-06, "loss": 0.432, "step": 3836 }, { "epoch": 2.580314364965958, "grad_norm": 0.314409178497314, "learning_rate": 5.067144846422414e-06, "loss": 0.4412, "step": 3837 }, { "epoch": 2.580986803395814, "grad_norm": 0.30381499647440013, "learning_rate": 5.064943501955561e-06, "loss": 0.4449, "step": 3838 }, { "epoch": 2.5816592418256703, "grad_norm": 0.3075225485816905, "learning_rate": 5.062742144898077e-06, "loss": 0.4333, "step": 3839 }, { "epoch": 2.5823316802555265, "grad_norm": 0.31533603082366796, "learning_rate": 5.060540775676741e-06, "loss": 0.4426, "step": 3840 }, { "epoch": 2.5830041186853827, "grad_norm": 0.40525331303601203, "learning_rate": 5.058339394718334e-06, "loss": 0.4429, "step": 3841 }, { "epoch": 2.583676557115239, "grad_norm": 0.3258861688476787, "learning_rate": 5.056138002449637e-06, "loss": 0.4559, "step": 3842 }, { "epoch": 2.5843489955450956, "grad_norm": 0.4413250887046823, "learning_rate": 5.053936599297434e-06, "loss": 0.4413, "step": 3843 }, { "epoch": 2.5850214339749518, "grad_norm": 0.34107229576023107, "learning_rate": 5.051735185688517e-06, "loss": 0.4546, "step": 3844 }, { "epoch": 2.585693872404808, "grad_norm": 0.35051767639704207, "learning_rate": 5.049533762049672e-06, "loss": 0.429, "step": 3845 }, { "epoch": 2.586366310834664, "grad_norm": 0.31965601826054163, "learning_rate": 5.04733232880769e-06, "loss": 0.4484, "step": 3846 }, { "epoch": 2.5870387492645204, "grad_norm": 0.31771321814884435, "learning_rate": 5.045130886389366e-06, "loss": 0.4491, "step": 3847 }, { "epoch": 2.5877111876943766, "grad_norm": 0.4950577405834247, "learning_rate": 5.04292943522149e-06, "loss": 0.4632, "step": 3848 }, { "epoch": 2.588383626124233, "grad_norm": 0.3789943252896637, "learning_rate": 5.040727975730866e-06, "loss": 0.4484, "step": 3849 }, { "epoch": 2.5890560645540894, "grad_norm": 0.3349219315480771, "learning_rate": 5.038526508344286e-06, "loss": 0.4377, "step": 3850 }, { "epoch": 2.5897285029839456, "grad_norm": 0.29139705497556456, "learning_rate": 5.036325033488552e-06, "loss": 0.4329, "step": 3851 }, { "epoch": 2.590400941413802, "grad_norm": 0.31799540729427056, "learning_rate": 5.034123551590464e-06, "loss": 0.4499, "step": 3852 }, { "epoch": 2.591073379843658, "grad_norm": 0.46667337205344145, "learning_rate": 5.031922063076825e-06, "loss": 0.4435, "step": 3853 }, { "epoch": 2.5917458182735142, "grad_norm": 0.3173954396303586, "learning_rate": 5.029720568374441e-06, "loss": 0.4434, "step": 3854 }, { "epoch": 2.5924182567033704, "grad_norm": 0.4038778883302914, "learning_rate": 5.027519067910113e-06, "loss": 0.4562, "step": 3855 }, { "epoch": 2.5930906951332267, "grad_norm": 0.3334517592595017, "learning_rate": 5.0253175621106496e-06, "loss": 0.4641, "step": 3856 }, { "epoch": 2.5937631335630833, "grad_norm": 0.34674178762730656, "learning_rate": 5.023116051402857e-06, "loss": 0.4506, "step": 3857 }, { "epoch": 2.5944355719929395, "grad_norm": 0.33680663953221074, "learning_rate": 5.020914536213544e-06, "loss": 0.4601, "step": 3858 }, { "epoch": 2.5951080104227957, "grad_norm": 0.29991687118698995, "learning_rate": 5.018713016969518e-06, "loss": 0.4384, "step": 3859 }, { "epoch": 2.595780448852652, "grad_norm": 0.6532795206230446, "learning_rate": 5.016511494097589e-06, "loss": 0.444, "step": 3860 }, { "epoch": 2.596452887282508, "grad_norm": 0.40399908418384883, "learning_rate": 5.014309968024569e-06, "loss": 0.4498, "step": 3861 }, { "epoch": 2.5971253257123643, "grad_norm": 0.4697959446174812, "learning_rate": 5.0121084391772675e-06, "loss": 0.4368, "step": 3862 }, { "epoch": 2.5977977641422205, "grad_norm": 0.33135513650808096, "learning_rate": 5.009906907982498e-06, "loss": 0.4527, "step": 3863 }, { "epoch": 2.598470202572077, "grad_norm": 0.4101478794356378, "learning_rate": 5.007705374867069e-06, "loss": 0.46, "step": 3864 }, { "epoch": 2.5991426410019334, "grad_norm": 0.38900473343673664, "learning_rate": 5.005503840257797e-06, "loss": 0.4541, "step": 3865 }, { "epoch": 2.5998150794317896, "grad_norm": 0.35719574910883917, "learning_rate": 5.003302304581491e-06, "loss": 0.4462, "step": 3866 }, { "epoch": 2.600487517861646, "grad_norm": 0.3964272408590807, "learning_rate": 5.001100768264967e-06, "loss": 0.4498, "step": 3867 }, { "epoch": 2.601159956291502, "grad_norm": 0.45572001774437354, "learning_rate": 4.998899231735036e-06, "loss": 0.4397, "step": 3868 }, { "epoch": 2.601832394721358, "grad_norm": 0.3854538081021313, "learning_rate": 4.996697695418509e-06, "loss": 0.4431, "step": 3869 }, { "epoch": 2.6025048331512144, "grad_norm": 0.46467833571113215, "learning_rate": 4.9944961597422045e-06, "loss": 0.4464, "step": 3870 }, { "epoch": 2.603177271581071, "grad_norm": 0.49315106615561277, "learning_rate": 4.992294625132931e-06, "loss": 0.4454, "step": 3871 }, { "epoch": 2.6038497100109272, "grad_norm": 0.41126821224026316, "learning_rate": 4.990093092017505e-06, "loss": 0.4275, "step": 3872 }, { "epoch": 2.6045221484407834, "grad_norm": 0.33764530796775627, "learning_rate": 4.987891560822734e-06, "loss": 0.4448, "step": 3873 }, { "epoch": 2.6051945868706397, "grad_norm": 0.3960123488009799, "learning_rate": 4.9856900319754325e-06, "loss": 0.4433, "step": 3874 }, { "epoch": 2.605867025300496, "grad_norm": 0.40780345359310755, "learning_rate": 4.983488505902412e-06, "loss": 0.4566, "step": 3875 }, { "epoch": 2.606539463730352, "grad_norm": 0.7557178612501396, "learning_rate": 4.981286983030483e-06, "loss": 0.4555, "step": 3876 }, { "epoch": 2.6072119021602083, "grad_norm": 0.42171713653694304, "learning_rate": 4.979085463786458e-06, "loss": 0.4435, "step": 3877 }, { "epoch": 2.607884340590065, "grad_norm": 0.3340723064181383, "learning_rate": 4.976883948597144e-06, "loss": 0.4303, "step": 3878 }, { "epoch": 2.608556779019921, "grad_norm": 0.29293861353368855, "learning_rate": 4.974682437889351e-06, "loss": 0.4565, "step": 3879 }, { "epoch": 2.6092292174497773, "grad_norm": 0.31253385788814575, "learning_rate": 4.972480932089887e-06, "loss": 0.4412, "step": 3880 }, { "epoch": 2.6099016558796335, "grad_norm": 0.42975688666696954, "learning_rate": 4.970279431625562e-06, "loss": 0.4285, "step": 3881 }, { "epoch": 2.6105740943094897, "grad_norm": 0.33551292043635644, "learning_rate": 4.9680779369231765e-06, "loss": 0.4223, "step": 3882 }, { "epoch": 2.611246532739346, "grad_norm": 0.35565215137443845, "learning_rate": 4.965876448409538e-06, "loss": 0.4392, "step": 3883 }, { "epoch": 2.611918971169202, "grad_norm": 0.3795433031972901, "learning_rate": 4.96367496651145e-06, "loss": 0.4491, "step": 3884 }, { "epoch": 2.612591409599059, "grad_norm": 0.33398281760717086, "learning_rate": 4.961473491655716e-06, "loss": 0.4491, "step": 3885 }, { "epoch": 2.613263848028915, "grad_norm": 0.34789460060253946, "learning_rate": 4.9592720242691365e-06, "loss": 0.4449, "step": 3886 }, { "epoch": 2.613936286458771, "grad_norm": 0.7521759371904863, "learning_rate": 4.95707056477851e-06, "loss": 0.462, "step": 3887 }, { "epoch": 2.6146087248886274, "grad_norm": 0.39615418310057654, "learning_rate": 4.954869113610636e-06, "loss": 0.4371, "step": 3888 }, { "epoch": 2.6152811633184836, "grad_norm": 0.3072143702625055, "learning_rate": 4.9526676711923105e-06, "loss": 0.4308, "step": 3889 }, { "epoch": 2.61595360174834, "grad_norm": 0.4958232349535628, "learning_rate": 4.950466237950331e-06, "loss": 0.4411, "step": 3890 }, { "epoch": 2.616626040178196, "grad_norm": 0.3206297035074455, "learning_rate": 4.948264814311485e-06, "loss": 0.4503, "step": 3891 }, { "epoch": 2.6172984786080526, "grad_norm": 0.34153060861871193, "learning_rate": 4.9460634007025666e-06, "loss": 0.4371, "step": 3892 }, { "epoch": 2.617970917037909, "grad_norm": 0.34692001072221584, "learning_rate": 4.943861997550364e-06, "loss": 0.4316, "step": 3893 }, { "epoch": 2.618643355467765, "grad_norm": 0.30897086469776225, "learning_rate": 4.941660605281669e-06, "loss": 0.44, "step": 3894 }, { "epoch": 2.6193157938976213, "grad_norm": 0.2964363692863969, "learning_rate": 4.93945922432326e-06, "loss": 0.4264, "step": 3895 }, { "epoch": 2.6199882323274775, "grad_norm": 0.3451149147036572, "learning_rate": 4.937257855101924e-06, "loss": 0.4494, "step": 3896 }, { "epoch": 2.6206606707573337, "grad_norm": 0.3723792970190891, "learning_rate": 4.9350564980444395e-06, "loss": 0.4466, "step": 3897 }, { "epoch": 2.62133310918719, "grad_norm": 0.49347025859965554, "learning_rate": 4.932855153577586e-06, "loss": 0.4522, "step": 3898 }, { "epoch": 2.6220055476170465, "grad_norm": 0.3619643702258718, "learning_rate": 4.9306538221281405e-06, "loss": 0.435, "step": 3899 }, { "epoch": 2.6226779860469027, "grad_norm": 0.6146626855230981, "learning_rate": 4.928452504122873e-06, "loss": 0.445, "step": 3900 }, { "epoch": 2.623350424476759, "grad_norm": 0.38166015510603984, "learning_rate": 4.926251199988557e-06, "loss": 0.4572, "step": 3901 }, { "epoch": 2.624022862906615, "grad_norm": 0.3070758076618173, "learning_rate": 4.924049910151959e-06, "loss": 0.4391, "step": 3902 }, { "epoch": 2.6246953013364713, "grad_norm": 0.40800701682714846, "learning_rate": 4.9218486350398465e-06, "loss": 0.4461, "step": 3903 }, { "epoch": 2.6253677397663275, "grad_norm": 0.2973151821861155, "learning_rate": 4.919647375078978e-06, "loss": 0.4479, "step": 3904 }, { "epoch": 2.6260401781961837, "grad_norm": 0.38336864739259563, "learning_rate": 4.917446130696115e-06, "loss": 0.4475, "step": 3905 }, { "epoch": 2.6267126166260404, "grad_norm": 0.3214570330346468, "learning_rate": 4.915244902318015e-06, "loss": 0.4498, "step": 3906 }, { "epoch": 2.6273850550558966, "grad_norm": 0.6332904874395928, "learning_rate": 4.913043690371428e-06, "loss": 0.4369, "step": 3907 }, { "epoch": 2.628057493485753, "grad_norm": 0.38335987959677537, "learning_rate": 4.910842495283109e-06, "loss": 0.4459, "step": 3908 }, { "epoch": 2.628729931915609, "grad_norm": 0.6059650455962325, "learning_rate": 4.908641317479801e-06, "loss": 0.4444, "step": 3909 }, { "epoch": 2.629402370345465, "grad_norm": 0.3583108368754653, "learning_rate": 4.906440157388248e-06, "loss": 0.4574, "step": 3910 }, { "epoch": 2.6300748087753214, "grad_norm": 0.4759206529064839, "learning_rate": 4.904239015435191e-06, "loss": 0.4556, "step": 3911 }, { "epoch": 2.6307472472051776, "grad_norm": 0.2995614698468876, "learning_rate": 4.902037892047368e-06, "loss": 0.4504, "step": 3912 }, { "epoch": 2.6314196856350343, "grad_norm": 0.44317698613266354, "learning_rate": 4.899836787651509e-06, "loss": 0.4493, "step": 3913 }, { "epoch": 2.6320921240648905, "grad_norm": 0.349065702885983, "learning_rate": 4.897635702674344e-06, "loss": 0.4488, "step": 3914 }, { "epoch": 2.6327645624947467, "grad_norm": 0.8225983390548947, "learning_rate": 4.895434637542598e-06, "loss": 0.4612, "step": 3915 }, { "epoch": 2.633437000924603, "grad_norm": 0.3849712777720729, "learning_rate": 4.893233592682996e-06, "loss": 0.4457, "step": 3916 }, { "epoch": 2.634109439354459, "grad_norm": 0.37120607081678014, "learning_rate": 4.8910325685222515e-06, "loss": 0.4511, "step": 3917 }, { "epoch": 2.6347818777843153, "grad_norm": 0.3827365226913108, "learning_rate": 4.88883156548708e-06, "loss": 0.4467, "step": 3918 }, { "epoch": 2.6354543162141715, "grad_norm": 0.3254878222098606, "learning_rate": 4.886630584004191e-06, "loss": 0.4584, "step": 3919 }, { "epoch": 2.636126754644028, "grad_norm": 0.771189071779267, "learning_rate": 4.88442962450029e-06, "loss": 0.44, "step": 3920 }, { "epoch": 2.6367991930738843, "grad_norm": 0.3757486231448364, "learning_rate": 4.882228687402081e-06, "loss": 0.4406, "step": 3921 }, { "epoch": 2.6374716315037405, "grad_norm": 0.3503419583682442, "learning_rate": 4.880027773136255e-06, "loss": 0.4483, "step": 3922 }, { "epoch": 2.6381440699335967, "grad_norm": 0.3443790198819944, "learning_rate": 4.877826882129509e-06, "loss": 0.4395, "step": 3923 }, { "epoch": 2.638816508363453, "grad_norm": 0.3113332727795207, "learning_rate": 4.875626014808528e-06, "loss": 0.456, "step": 3924 }, { "epoch": 2.639488946793309, "grad_norm": 0.3685464703821919, "learning_rate": 4.873425171599998e-06, "loss": 0.4355, "step": 3925 }, { "epoch": 2.6401613852231653, "grad_norm": 0.36662723975171313, "learning_rate": 4.871224352930596e-06, "loss": 0.4294, "step": 3926 }, { "epoch": 2.640833823653022, "grad_norm": 0.3117729797433047, "learning_rate": 4.8690235592269946e-06, "loss": 0.4317, "step": 3927 }, { "epoch": 2.641506262082878, "grad_norm": 0.35979033394621746, "learning_rate": 4.866822790915865e-06, "loss": 0.4415, "step": 3928 }, { "epoch": 2.6421787005127344, "grad_norm": 0.37009695722503977, "learning_rate": 4.864622048423871e-06, "loss": 0.4302, "step": 3929 }, { "epoch": 2.6428511389425906, "grad_norm": 0.3568595977172907, "learning_rate": 4.862421332177674e-06, "loss": 0.4406, "step": 3930 }, { "epoch": 2.643523577372447, "grad_norm": 0.41973788949292573, "learning_rate": 4.860220642603922e-06, "loss": 0.4427, "step": 3931 }, { "epoch": 2.644196015802303, "grad_norm": 0.36700531410496684, "learning_rate": 4.858019980129267e-06, "loss": 0.4534, "step": 3932 }, { "epoch": 2.644868454232159, "grad_norm": 0.30269861413872146, "learning_rate": 4.855819345180354e-06, "loss": 0.4497, "step": 3933 }, { "epoch": 2.645540892662016, "grad_norm": 0.6152798719083605, "learning_rate": 4.853618738183821e-06, "loss": 0.4531, "step": 3934 }, { "epoch": 2.6462133310918716, "grad_norm": 0.30401248236622574, "learning_rate": 4.8514181595662984e-06, "loss": 0.4488, "step": 3935 }, { "epoch": 2.6468857695217283, "grad_norm": 0.37429738252667466, "learning_rate": 4.849217609754415e-06, "loss": 0.4411, "step": 3936 }, { "epoch": 2.6475582079515845, "grad_norm": 0.36301547372734627, "learning_rate": 4.847017089174793e-06, "loss": 0.4536, "step": 3937 }, { "epoch": 2.6482306463814407, "grad_norm": 0.4981606026371317, "learning_rate": 4.84481659825405e-06, "loss": 0.4422, "step": 3938 }, { "epoch": 2.648903084811297, "grad_norm": 0.34595349036346, "learning_rate": 4.842616137418792e-06, "loss": 0.4351, "step": 3939 }, { "epoch": 2.649575523241153, "grad_norm": 0.34087659069215315, "learning_rate": 4.840415707095626e-06, "loss": 0.4605, "step": 3940 }, { "epoch": 2.6502479616710097, "grad_norm": 0.4107782880869281, "learning_rate": 4.8382153077111505e-06, "loss": 0.4493, "step": 3941 }, { "epoch": 2.6509204001008655, "grad_norm": 0.32892418309754395, "learning_rate": 4.836014939691957e-06, "loss": 0.4371, "step": 3942 }, { "epoch": 2.651592838530722, "grad_norm": 0.35482669054847, "learning_rate": 4.8338146034646365e-06, "loss": 0.4512, "step": 3943 }, { "epoch": 2.6522652769605783, "grad_norm": 0.4993082701462041, "learning_rate": 4.831614299455763e-06, "loss": 0.4514, "step": 3944 }, { "epoch": 2.6529377153904345, "grad_norm": 0.4296586018836682, "learning_rate": 4.829414028091914e-06, "loss": 0.4436, "step": 3945 }, { "epoch": 2.6536101538202908, "grad_norm": 0.3528586999832636, "learning_rate": 4.827213789799656e-06, "loss": 0.4454, "step": 3946 }, { "epoch": 2.654282592250147, "grad_norm": 0.3524200784847369, "learning_rate": 4.825013585005554e-06, "loss": 0.4505, "step": 3947 }, { "epoch": 2.6549550306800036, "grad_norm": 0.3637313457903069, "learning_rate": 4.822813414136157e-06, "loss": 0.4193, "step": 3948 }, { "epoch": 2.6556274691098594, "grad_norm": 0.30803439934798615, "learning_rate": 4.820613277618016e-06, "loss": 0.4236, "step": 3949 }, { "epoch": 2.656299907539716, "grad_norm": 0.32356597567643813, "learning_rate": 4.818413175877674e-06, "loss": 0.4343, "step": 3950 }, { "epoch": 2.656972345969572, "grad_norm": 0.3117213382948113, "learning_rate": 4.816213109341664e-06, "loss": 0.4297, "step": 3951 }, { "epoch": 2.6576447843994284, "grad_norm": 0.30292637566183234, "learning_rate": 4.814013078436517e-06, "loss": 0.4388, "step": 3952 }, { "epoch": 2.6583172228292846, "grad_norm": 0.4441875471980197, "learning_rate": 4.81181308358875e-06, "loss": 0.4477, "step": 3953 }, { "epoch": 2.658989661259141, "grad_norm": 0.35145162184961165, "learning_rate": 4.8096131252248785e-06, "loss": 0.4514, "step": 3954 }, { "epoch": 2.6596620996889975, "grad_norm": 0.32937856083291966, "learning_rate": 4.80741320377141e-06, "loss": 0.4514, "step": 3955 }, { "epoch": 2.6603345381188532, "grad_norm": 0.3354037013121662, "learning_rate": 4.805213319654847e-06, "loss": 0.4409, "step": 3956 }, { "epoch": 2.66100697654871, "grad_norm": 0.354286594317942, "learning_rate": 4.803013473301677e-06, "loss": 0.449, "step": 3957 }, { "epoch": 2.661679414978566, "grad_norm": 0.3923920764655487, "learning_rate": 4.800813665138389e-06, "loss": 0.4352, "step": 3958 }, { "epoch": 2.6623518534084223, "grad_norm": 0.4069089239628588, "learning_rate": 4.7986138955914604e-06, "loss": 0.4469, "step": 3959 }, { "epoch": 2.6630242918382785, "grad_norm": 0.40021340268552197, "learning_rate": 4.796414165087363e-06, "loss": 0.4566, "step": 3960 }, { "epoch": 2.6636967302681347, "grad_norm": 0.4094469334606063, "learning_rate": 4.794214474052555e-06, "loss": 0.4343, "step": 3961 }, { "epoch": 2.6643691686979913, "grad_norm": 0.3149962928170417, "learning_rate": 4.792014822913496e-06, "loss": 0.4337, "step": 3962 }, { "epoch": 2.665041607127847, "grad_norm": 0.34533606893544566, "learning_rate": 4.789815212096632e-06, "loss": 0.4378, "step": 3963 }, { "epoch": 2.6657140455577037, "grad_norm": 0.42166294954305816, "learning_rate": 4.787615642028402e-06, "loss": 0.441, "step": 3964 }, { "epoch": 2.66638648398756, "grad_norm": 0.3720335576158297, "learning_rate": 4.785416113135241e-06, "loss": 0.454, "step": 3965 }, { "epoch": 2.667058922417416, "grad_norm": 0.34103569866637795, "learning_rate": 4.7832166258435684e-06, "loss": 0.4489, "step": 3966 }, { "epoch": 2.6677313608472724, "grad_norm": 0.3144751920124584, "learning_rate": 4.781017180579803e-06, "loss": 0.4367, "step": 3967 }, { "epoch": 2.6684037992771286, "grad_norm": 0.2934768252750471, "learning_rate": 4.7788177777703505e-06, "loss": 0.451, "step": 3968 }, { "epoch": 2.669076237706985, "grad_norm": 0.44173091051097085, "learning_rate": 4.776618417841615e-06, "loss": 0.4555, "step": 3969 }, { "epoch": 2.669748676136841, "grad_norm": 0.3621724409828, "learning_rate": 4.7744191012199805e-06, "loss": 0.4462, "step": 3970 }, { "epoch": 2.6704211145666976, "grad_norm": 0.4217030265201206, "learning_rate": 4.772219828331833e-06, "loss": 0.4401, "step": 3971 }, { "epoch": 2.671093552996554, "grad_norm": 0.3258720085996176, "learning_rate": 4.7700205996035465e-06, "loss": 0.4438, "step": 3972 }, { "epoch": 2.67176599142641, "grad_norm": 0.3286042825081745, "learning_rate": 4.767821415461487e-06, "loss": 0.4423, "step": 3973 }, { "epoch": 2.6724384298562662, "grad_norm": 0.37240805371753094, "learning_rate": 4.765622276332013e-06, "loss": 0.4476, "step": 3974 }, { "epoch": 2.6731108682861224, "grad_norm": 0.46705034090135494, "learning_rate": 4.763423182641469e-06, "loss": 0.4555, "step": 3975 }, { "epoch": 2.673783306715979, "grad_norm": 0.31248097026521715, "learning_rate": 4.761224134816197e-06, "loss": 0.4265, "step": 3976 }, { "epoch": 2.674455745145835, "grad_norm": 0.3701567562637434, "learning_rate": 4.759025133282526e-06, "loss": 0.4493, "step": 3977 }, { "epoch": 2.6751281835756915, "grad_norm": 0.334949784468544, "learning_rate": 4.756826178466783e-06, "loss": 0.447, "step": 3978 }, { "epoch": 2.6758006220055477, "grad_norm": 0.3687722696307002, "learning_rate": 4.754627270795272e-06, "loss": 0.4402, "step": 3979 }, { "epoch": 2.676473060435404, "grad_norm": 0.33745162534262396, "learning_rate": 4.752428410694301e-06, "loss": 0.4341, "step": 3980 }, { "epoch": 2.67714549886526, "grad_norm": 0.3030594134472027, "learning_rate": 4.750229598590163e-06, "loss": 0.4352, "step": 3981 }, { "epoch": 2.6778179372951163, "grad_norm": 0.34207218560456, "learning_rate": 4.748030834909147e-06, "loss": 0.4322, "step": 3982 }, { "epoch": 2.678490375724973, "grad_norm": 0.518303876053175, "learning_rate": 4.745832120077521e-06, "loss": 0.4557, "step": 3983 }, { "epoch": 2.6791628141548287, "grad_norm": 0.3883315083460478, "learning_rate": 4.7436334545215565e-06, "loss": 0.4499, "step": 3984 }, { "epoch": 2.6798352525846854, "grad_norm": 0.33195023039056065, "learning_rate": 4.741434838667508e-06, "loss": 0.4194, "step": 3985 }, { "epoch": 2.6805076910145416, "grad_norm": 0.3431102125311453, "learning_rate": 4.739236272941623e-06, "loss": 0.4675, "step": 3986 }, { "epoch": 2.6811801294443978, "grad_norm": 0.3330249236624501, "learning_rate": 4.737037757770141e-06, "loss": 0.4536, "step": 3987 }, { "epoch": 2.681852567874254, "grad_norm": 0.7319725476689913, "learning_rate": 4.734839293579284e-06, "loss": 0.4496, "step": 3988 }, { "epoch": 2.68252500630411, "grad_norm": 0.374642251156238, "learning_rate": 4.732640880795272e-06, "loss": 0.4486, "step": 3989 }, { "epoch": 2.683197444733967, "grad_norm": 0.37675456514832767, "learning_rate": 4.7304425198443125e-06, "loss": 0.4308, "step": 3990 }, { "epoch": 2.6838698831638226, "grad_norm": 0.3512707357224236, "learning_rate": 4.728244211152604e-06, "loss": 0.4269, "step": 3991 }, { "epoch": 2.6845423215936792, "grad_norm": 0.4393990814568386, "learning_rate": 4.726045955146331e-06, "loss": 0.4512, "step": 3992 }, { "epoch": 2.6852147600235354, "grad_norm": 0.35171319923804023, "learning_rate": 4.723847752251673e-06, "loss": 0.4468, "step": 3993 }, { "epoch": 2.6858871984533916, "grad_norm": 0.4101286118141828, "learning_rate": 4.721649602894795e-06, "loss": 0.4436, "step": 3994 }, { "epoch": 2.686559636883248, "grad_norm": 0.33289556695129713, "learning_rate": 4.719451507501857e-06, "loss": 0.4577, "step": 3995 }, { "epoch": 2.687232075313104, "grad_norm": 0.3934530287799647, "learning_rate": 4.717253466499e-06, "loss": 0.4391, "step": 3996 }, { "epoch": 2.6879045137429602, "grad_norm": 0.30849467174103273, "learning_rate": 4.71505548031236e-06, "loss": 0.4435, "step": 3997 }, { "epoch": 2.6885769521728164, "grad_norm": 0.46370010026944075, "learning_rate": 4.712857549368065e-06, "loss": 0.4509, "step": 3998 }, { "epoch": 2.689249390602673, "grad_norm": 0.3206942755628773, "learning_rate": 4.710659674092226e-06, "loss": 0.448, "step": 3999 }, { "epoch": 2.6899218290325293, "grad_norm": 0.46281544863600615, "learning_rate": 4.7084618549109515e-06, "loss": 0.4525, "step": 4000 }, { "epoch": 2.6905942674623855, "grad_norm": 0.34585129329342007, "learning_rate": 4.706264092250327e-06, "loss": 0.4379, "step": 4001 }, { "epoch": 2.6912667058922417, "grad_norm": 0.3354546534002248, "learning_rate": 4.704066386536436e-06, "loss": 0.4447, "step": 4002 }, { "epoch": 2.691939144322098, "grad_norm": 0.4748817548224659, "learning_rate": 4.70186873819535e-06, "loss": 0.4485, "step": 4003 }, { "epoch": 2.692611582751954, "grad_norm": 0.33706271599700643, "learning_rate": 4.699671147653129e-06, "loss": 0.4599, "step": 4004 }, { "epoch": 2.6932840211818103, "grad_norm": 0.41419105569679066, "learning_rate": 4.697473615335819e-06, "loss": 0.4354, "step": 4005 }, { "epoch": 2.693956459611667, "grad_norm": 0.48293969803601555, "learning_rate": 4.695276141669458e-06, "loss": 0.443, "step": 4006 }, { "epoch": 2.694628898041523, "grad_norm": 0.3852684945316683, "learning_rate": 4.6930787270800705e-06, "loss": 0.4492, "step": 4007 }, { "epoch": 2.6953013364713794, "grad_norm": 0.35153149593918026, "learning_rate": 4.690881371993671e-06, "loss": 0.4336, "step": 4008 }, { "epoch": 2.6959737749012356, "grad_norm": 0.31717053577082666, "learning_rate": 4.688684076836264e-06, "loss": 0.4458, "step": 4009 }, { "epoch": 2.696646213331092, "grad_norm": 0.4152306709052566, "learning_rate": 4.686486842033836e-06, "loss": 0.4491, "step": 4010 }, { "epoch": 2.697318651760948, "grad_norm": 0.3764797379680534, "learning_rate": 4.684289668012367e-06, "loss": 0.4335, "step": 4011 }, { "epoch": 2.697991090190804, "grad_norm": 0.32045573355406193, "learning_rate": 4.682092555197827e-06, "loss": 0.4396, "step": 4012 }, { "epoch": 2.698663528620661, "grad_norm": 0.36799773366081795, "learning_rate": 4.67989550401617e-06, "loss": 0.4408, "step": 4013 }, { "epoch": 2.699335967050517, "grad_norm": 0.3558258065277898, "learning_rate": 4.677698514893338e-06, "loss": 0.4302, "step": 4014 }, { "epoch": 2.7000084054803732, "grad_norm": 0.3554089068118138, "learning_rate": 4.675501588255264e-06, "loss": 0.4367, "step": 4015 }, { "epoch": 2.7006808439102294, "grad_norm": 0.3475852676857237, "learning_rate": 4.673304724527868e-06, "loss": 0.4486, "step": 4016 }, { "epoch": 2.7013532823400856, "grad_norm": 0.3515982513660669, "learning_rate": 4.671107924137057e-06, "loss": 0.4401, "step": 4017 }, { "epoch": 2.702025720769942, "grad_norm": 0.39299387084846193, "learning_rate": 4.668911187508723e-06, "loss": 0.4507, "step": 4018 }, { "epoch": 2.702698159199798, "grad_norm": 0.3191081249376199, "learning_rate": 4.666714515068751e-06, "loss": 0.4387, "step": 4019 }, { "epoch": 2.7033705976296547, "grad_norm": 0.515033568771394, "learning_rate": 4.664517907243011e-06, "loss": 0.4438, "step": 4020 }, { "epoch": 2.704043036059511, "grad_norm": 0.5144595273589103, "learning_rate": 4.662321364457359e-06, "loss": 0.4285, "step": 4021 }, { "epoch": 2.704715474489367, "grad_norm": 0.3848515807272575, "learning_rate": 4.660124887137643e-06, "loss": 0.4335, "step": 4022 }, { "epoch": 2.7053879129192233, "grad_norm": 0.3245188178348665, "learning_rate": 4.657928475709691e-06, "loss": 0.4494, "step": 4023 }, { "epoch": 2.7060603513490795, "grad_norm": 0.32339811165861526, "learning_rate": 4.6557321305993246e-06, "loss": 0.4481, "step": 4024 }, { "epoch": 2.7067327897789357, "grad_norm": 0.3965925056893796, "learning_rate": 4.65353585223235e-06, "loss": 0.4409, "step": 4025 }, { "epoch": 2.707405228208792, "grad_norm": 0.35551982751089395, "learning_rate": 4.651339641034565e-06, "loss": 0.4524, "step": 4026 }, { "epoch": 2.7080776666386486, "grad_norm": 0.48379414670857646, "learning_rate": 4.649143497431743e-06, "loss": 0.4272, "step": 4027 }, { "epoch": 2.7087501050685048, "grad_norm": 0.41530286231090446, "learning_rate": 4.646947421849655e-06, "loss": 0.4315, "step": 4028 }, { "epoch": 2.709422543498361, "grad_norm": 0.439133912286961, "learning_rate": 4.644751414714056e-06, "loss": 0.4314, "step": 4029 }, { "epoch": 2.710094981928217, "grad_norm": 0.3371496209827778, "learning_rate": 4.642555476450684e-06, "loss": 0.4443, "step": 4030 }, { "epoch": 2.7107674203580734, "grad_norm": 0.4041486073180736, "learning_rate": 4.640359607485272e-06, "loss": 0.4438, "step": 4031 }, { "epoch": 2.7114398587879296, "grad_norm": 0.3215829040360271, "learning_rate": 4.63816380824353e-06, "loss": 0.4231, "step": 4032 }, { "epoch": 2.712112297217786, "grad_norm": 0.3355336267006405, "learning_rate": 4.63596807915116e-06, "loss": 0.4408, "step": 4033 }, { "epoch": 2.7127847356476424, "grad_norm": 0.374171806207746, "learning_rate": 4.633772420633849e-06, "loss": 0.4273, "step": 4034 }, { "epoch": 2.7134571740774986, "grad_norm": 0.30441595052025694, "learning_rate": 4.6315768331172725e-06, "loss": 0.4489, "step": 4035 }, { "epoch": 2.714129612507355, "grad_norm": 0.35620556437115636, "learning_rate": 4.629381317027086e-06, "loss": 0.4401, "step": 4036 }, { "epoch": 2.714802050937211, "grad_norm": 0.35531137314619227, "learning_rate": 4.627185872788938e-06, "loss": 0.4385, "step": 4037 }, { "epoch": 2.7154744893670673, "grad_norm": 0.4110343598892571, "learning_rate": 4.6249905008284605e-06, "loss": 0.4344, "step": 4038 }, { "epoch": 2.7161469277969235, "grad_norm": 0.3394545822035457, "learning_rate": 4.622795201571272e-06, "loss": 0.4559, "step": 4039 }, { "epoch": 2.7168193662267797, "grad_norm": 0.3676872378283041, "learning_rate": 4.620599975442974e-06, "loss": 0.4232, "step": 4040 }, { "epoch": 2.7174918046566363, "grad_norm": 0.4307709856361871, "learning_rate": 4.618404822869157e-06, "loss": 0.4552, "step": 4041 }, { "epoch": 2.7181642430864925, "grad_norm": 0.39071616092546935, "learning_rate": 4.616209744275398e-06, "loss": 0.4494, "step": 4042 }, { "epoch": 2.7188366815163487, "grad_norm": 0.3350976454381335, "learning_rate": 4.614014740087256e-06, "loss": 0.4365, "step": 4043 }, { "epoch": 2.719509119946205, "grad_norm": 0.35175623390334937, "learning_rate": 4.611819810730281e-06, "loss": 0.4249, "step": 4044 }, { "epoch": 2.720181558376061, "grad_norm": 0.44179988158296624, "learning_rate": 4.60962495663e-06, "loss": 0.446, "step": 4045 }, { "epoch": 2.7208539968059173, "grad_norm": 0.31338487083319794, "learning_rate": 4.607430178211933e-06, "loss": 0.4322, "step": 4046 }, { "epoch": 2.7215264352357735, "grad_norm": 0.3153451800447677, "learning_rate": 4.6052354759015815e-06, "loss": 0.4327, "step": 4047 }, { "epoch": 2.72219887366563, "grad_norm": 0.3394799354136693, "learning_rate": 4.60304085012444e-06, "loss": 0.4256, "step": 4048 }, { "epoch": 2.7228713120954864, "grad_norm": 0.30607813581454435, "learning_rate": 4.600846301305973e-06, "loss": 0.4539, "step": 4049 }, { "epoch": 2.7235437505253426, "grad_norm": 0.3282820415912827, "learning_rate": 4.598651829871643e-06, "loss": 0.4416, "step": 4050 }, { "epoch": 2.724216188955199, "grad_norm": 0.324333649191008, "learning_rate": 4.5964574362468925e-06, "loss": 0.4425, "step": 4051 }, { "epoch": 2.724888627385055, "grad_norm": 0.34630251064694667, "learning_rate": 4.594263120857151e-06, "loss": 0.451, "step": 4052 }, { "epoch": 2.725561065814911, "grad_norm": 0.5041706542960473, "learning_rate": 4.5920688841278315e-06, "loss": 0.4337, "step": 4053 }, { "epoch": 2.7262335042447674, "grad_norm": 0.35387917754802173, "learning_rate": 4.58987472648433e-06, "loss": 0.4352, "step": 4054 }, { "epoch": 2.726905942674624, "grad_norm": 0.41760733333223016, "learning_rate": 4.5876806483520305e-06, "loss": 0.4278, "step": 4055 }, { "epoch": 2.7275783811044803, "grad_norm": 0.3397217293758815, "learning_rate": 4.585486650156299e-06, "loss": 0.4257, "step": 4056 }, { "epoch": 2.7282508195343365, "grad_norm": 0.36874491179924024, "learning_rate": 4.58329273232249e-06, "loss": 0.4438, "step": 4057 }, { "epoch": 2.7289232579641927, "grad_norm": 0.3774561271535653, "learning_rate": 4.581098895275935e-06, "loss": 0.448, "step": 4058 }, { "epoch": 2.729595696394049, "grad_norm": 0.33630915462567423, "learning_rate": 4.578905139441957e-06, "loss": 0.4407, "step": 4059 }, { "epoch": 2.730268134823905, "grad_norm": 0.3126494069121454, "learning_rate": 4.57671146524586e-06, "loss": 0.4402, "step": 4060 }, { "epoch": 2.7309405732537613, "grad_norm": 0.3582877240176243, "learning_rate": 4.574517873112934e-06, "loss": 0.445, "step": 4061 }, { "epoch": 2.731613011683618, "grad_norm": 0.3211952898034876, "learning_rate": 4.572324363468449e-06, "loss": 0.4338, "step": 4062 }, { "epoch": 2.732285450113474, "grad_norm": 0.3384746843259489, "learning_rate": 4.570130936737664e-06, "loss": 0.4574, "step": 4063 }, { "epoch": 2.7329578885433303, "grad_norm": 0.38074821139775816, "learning_rate": 4.56793759334582e-06, "loss": 0.4278, "step": 4064 }, { "epoch": 2.7336303269731865, "grad_norm": 0.3958921748871837, "learning_rate": 4.565744333718141e-06, "loss": 0.4445, "step": 4065 }, { "epoch": 2.7343027654030427, "grad_norm": 0.3406667599130005, "learning_rate": 4.563551158279837e-06, "loss": 0.4668, "step": 4066 }, { "epoch": 2.734975203832899, "grad_norm": 0.3326943427455027, "learning_rate": 4.5613580674560964e-06, "loss": 0.4531, "step": 4067 }, { "epoch": 2.735647642262755, "grad_norm": 0.3079837541279532, "learning_rate": 4.5591650616720975e-06, "loss": 0.45, "step": 4068 }, { "epoch": 2.736320080692612, "grad_norm": 0.37775753040507126, "learning_rate": 4.556972141352999e-06, "loss": 0.4235, "step": 4069 }, { "epoch": 2.736992519122468, "grad_norm": 0.3191006036475625, "learning_rate": 4.554779306923943e-06, "loss": 0.4478, "step": 4070 }, { "epoch": 2.737664957552324, "grad_norm": 0.3218784168108929, "learning_rate": 4.5525865588100566e-06, "loss": 0.4358, "step": 4071 }, { "epoch": 2.7383373959821804, "grad_norm": 0.3934979110896771, "learning_rate": 4.550393897436447e-06, "loss": 0.4494, "step": 4072 }, { "epoch": 2.7390098344120366, "grad_norm": 0.43781924786854065, "learning_rate": 4.54820132322821e-06, "loss": 0.4463, "step": 4073 }, { "epoch": 2.739682272841893, "grad_norm": 0.3497296661145289, "learning_rate": 4.546008836610417e-06, "loss": 0.4373, "step": 4074 }, { "epoch": 2.740354711271749, "grad_norm": 1.4186194019979426, "learning_rate": 4.543816438008132e-06, "loss": 0.449, "step": 4075 }, { "epoch": 2.7410271497016057, "grad_norm": 0.39392153382582556, "learning_rate": 4.54162412784639e-06, "loss": 0.4452, "step": 4076 }, { "epoch": 2.741699588131462, "grad_norm": 0.6244952823781512, "learning_rate": 4.539431906550219e-06, "loss": 0.451, "step": 4077 }, { "epoch": 2.742372026561318, "grad_norm": 0.36142281857296416, "learning_rate": 4.537239774544625e-06, "loss": 0.4408, "step": 4078 }, { "epoch": 2.7430444649911743, "grad_norm": 0.5173482600389665, "learning_rate": 4.535047732254601e-06, "loss": 0.4525, "step": 4079 }, { "epoch": 2.7437169034210305, "grad_norm": 0.4456350759158602, "learning_rate": 4.532855780105114e-06, "loss": 0.4397, "step": 4080 }, { "epoch": 2.7443893418508867, "grad_norm": 0.3776474889137343, "learning_rate": 4.530663918521121e-06, "loss": 0.4316, "step": 4081 }, { "epoch": 2.745061780280743, "grad_norm": 0.4622116773312596, "learning_rate": 4.528472147927561e-06, "loss": 0.4318, "step": 4082 }, { "epoch": 2.7457342187105995, "grad_norm": 0.42715846688985476, "learning_rate": 4.526280468749355e-06, "loss": 0.4494, "step": 4083 }, { "epoch": 2.7464066571404557, "grad_norm": 0.33742122624911886, "learning_rate": 4.5240888814114e-06, "loss": 0.4285, "step": 4084 }, { "epoch": 2.747079095570312, "grad_norm": 0.3831275524199066, "learning_rate": 4.521897386338582e-06, "loss": 0.4361, "step": 4085 }, { "epoch": 2.747751534000168, "grad_norm": 0.34803028860558366, "learning_rate": 4.5197059839557694e-06, "loss": 0.4382, "step": 4086 }, { "epoch": 2.7484239724300243, "grad_norm": 0.3358652039154297, "learning_rate": 4.5175146746878086e-06, "loss": 0.4402, "step": 4087 }, { "epoch": 2.7490964108598805, "grad_norm": 0.3596305282095028, "learning_rate": 4.515323458959532e-06, "loss": 0.4499, "step": 4088 }, { "epoch": 2.7497688492897367, "grad_norm": 0.3511275929318625, "learning_rate": 4.513132337195748e-06, "loss": 0.4516, "step": 4089 }, { "epoch": 2.7504412877195934, "grad_norm": 0.31670305524414344, "learning_rate": 4.510941309821254e-06, "loss": 0.4397, "step": 4090 }, { "epoch": 2.7511137261494496, "grad_norm": 0.32598957982308097, "learning_rate": 4.508750377260824e-06, "loss": 0.4392, "step": 4091 }, { "epoch": 2.751786164579306, "grad_norm": 0.4135011005395795, "learning_rate": 4.506559539939218e-06, "loss": 0.4308, "step": 4092 }, { "epoch": 2.752458603009162, "grad_norm": 0.35335225307008133, "learning_rate": 4.50436879828117e-06, "loss": 0.444, "step": 4093 }, { "epoch": 2.753131041439018, "grad_norm": 0.339990579672587, "learning_rate": 4.502178152711403e-06, "loss": 0.4444, "step": 4094 }, { "epoch": 2.7538034798688744, "grad_norm": 1.0034503263282213, "learning_rate": 4.499987603654618e-06, "loss": 0.4327, "step": 4095 }, { "epoch": 2.7544759182987306, "grad_norm": 0.37428166492973364, "learning_rate": 4.497797151535496e-06, "loss": 0.4485, "step": 4096 }, { "epoch": 2.7551483567285873, "grad_norm": 0.38282626112707663, "learning_rate": 4.495606796778707e-06, "loss": 0.4579, "step": 4097 }, { "epoch": 2.7558207951584435, "grad_norm": 0.45749960797959477, "learning_rate": 4.493416539808891e-06, "loss": 0.4277, "step": 4098 }, { "epoch": 2.7564932335882997, "grad_norm": 0.38810401286703083, "learning_rate": 4.4912263810506765e-06, "loss": 0.4263, "step": 4099 }, { "epoch": 2.757165672018156, "grad_norm": 0.3944196941682623, "learning_rate": 4.4890363209286685e-06, "loss": 0.4492, "step": 4100 }, { "epoch": 2.757838110448012, "grad_norm": 0.40649470910613944, "learning_rate": 4.486846359867459e-06, "loss": 0.4535, "step": 4101 }, { "epoch": 2.7585105488778683, "grad_norm": 0.44185871010693245, "learning_rate": 4.484656498291611e-06, "loss": 0.4385, "step": 4102 }, { "epoch": 2.7591829873077245, "grad_norm": 0.3637943966579024, "learning_rate": 4.48246673662568e-06, "loss": 0.4483, "step": 4103 }, { "epoch": 2.759855425737581, "grad_norm": 0.37969279378764087, "learning_rate": 4.480277075294192e-06, "loss": 0.4399, "step": 4104 }, { "epoch": 2.760527864167437, "grad_norm": 0.3138671963424064, "learning_rate": 4.4780875147216625e-06, "loss": 0.4369, "step": 4105 }, { "epoch": 2.7612003025972935, "grad_norm": 0.3263125250207868, "learning_rate": 4.475898055332578e-06, "loss": 0.4395, "step": 4106 }, { "epoch": 2.7618727410271497, "grad_norm": 0.5257770684119895, "learning_rate": 4.473708697551411e-06, "loss": 0.4347, "step": 4107 }, { "epoch": 2.762545179457006, "grad_norm": 0.3248907365494214, "learning_rate": 4.471519441802616e-06, "loss": 0.4613, "step": 4108 }, { "epoch": 2.763217617886862, "grad_norm": 0.32027788836591803, "learning_rate": 4.469330288510622e-06, "loss": 0.4398, "step": 4109 }, { "epoch": 2.7638900563167184, "grad_norm": 0.3472818383721879, "learning_rate": 4.4671412380998456e-06, "loss": 0.45, "step": 4110 }, { "epoch": 2.764562494746575, "grad_norm": 0.32326005111267875, "learning_rate": 4.464952290994674e-06, "loss": 0.4559, "step": 4111 }, { "epoch": 2.7652349331764308, "grad_norm": 0.39181182928822017, "learning_rate": 4.4627634476194826e-06, "loss": 0.4388, "step": 4112 }, { "epoch": 2.7659073716062874, "grad_norm": 0.31195199944135227, "learning_rate": 4.4605747083986225e-06, "loss": 0.4416, "step": 4113 }, { "epoch": 2.7665798100361436, "grad_norm": 0.6106923654849684, "learning_rate": 4.4583860737564285e-06, "loss": 0.4526, "step": 4114 }, { "epoch": 2.767252248466, "grad_norm": 0.3188243064392307, "learning_rate": 4.456197544117208e-06, "loss": 0.4384, "step": 4115 }, { "epoch": 2.767924686895856, "grad_norm": 0.4046926030229404, "learning_rate": 4.454009119905255e-06, "loss": 0.4408, "step": 4116 }, { "epoch": 2.7685971253257122, "grad_norm": 0.31559735785847665, "learning_rate": 4.4518208015448385e-06, "loss": 0.4272, "step": 4117 }, { "epoch": 2.769269563755569, "grad_norm": 0.517474290715874, "learning_rate": 4.449632589460212e-06, "loss": 0.4266, "step": 4118 }, { "epoch": 2.7699420021854246, "grad_norm": 0.5023251432358923, "learning_rate": 4.447444484075603e-06, "loss": 0.4456, "step": 4119 }, { "epoch": 2.7706144406152813, "grad_norm": 0.32524193502553655, "learning_rate": 4.445256485815222e-06, "loss": 0.4409, "step": 4120 }, { "epoch": 2.7712868790451375, "grad_norm": 0.35927764141343127, "learning_rate": 4.4430685951032546e-06, "loss": 0.4466, "step": 4121 }, { "epoch": 2.7719593174749937, "grad_norm": 0.31890742397512795, "learning_rate": 4.440880812363871e-06, "loss": 0.4247, "step": 4122 }, { "epoch": 2.77263175590485, "grad_norm": 0.3545582580996859, "learning_rate": 4.43869313802122e-06, "loss": 0.436, "step": 4123 }, { "epoch": 2.773304194334706, "grad_norm": 0.4796722667815604, "learning_rate": 4.436505572499422e-06, "loss": 0.4489, "step": 4124 }, { "epoch": 2.7739766327645627, "grad_norm": 0.30288716390684745, "learning_rate": 4.434318116222583e-06, "loss": 0.44, "step": 4125 }, { "epoch": 2.7746490711944185, "grad_norm": 0.3279704264872385, "learning_rate": 4.432130769614787e-06, "loss": 0.4327, "step": 4126 }, { "epoch": 2.775321509624275, "grad_norm": 0.45918742036048055, "learning_rate": 4.429943533100098e-06, "loss": 0.4224, "step": 4127 }, { "epoch": 2.7759939480541314, "grad_norm": 0.3816536568297676, "learning_rate": 4.427756407102554e-06, "loss": 0.4453, "step": 4128 }, { "epoch": 2.7766663864839876, "grad_norm": 0.3130350219354691, "learning_rate": 4.425569392046174e-06, "loss": 0.4357, "step": 4129 }, { "epoch": 2.7773388249138438, "grad_norm": 0.31236615723388095, "learning_rate": 4.423382488354957e-06, "loss": 0.4327, "step": 4130 }, { "epoch": 2.7780112633437, "grad_norm": 0.33840814755852966, "learning_rate": 4.4211956964528795e-06, "loss": 0.4528, "step": 4131 }, { "epoch": 2.7786837017735566, "grad_norm": 0.3498544989130826, "learning_rate": 4.4190090167638976e-06, "loss": 0.436, "step": 4132 }, { "epoch": 2.7793561402034124, "grad_norm": 0.359188442813231, "learning_rate": 4.416822449711939e-06, "loss": 0.4328, "step": 4133 }, { "epoch": 2.780028578633269, "grad_norm": 0.4924888241936742, "learning_rate": 4.414635995720918e-06, "loss": 0.4611, "step": 4134 }, { "epoch": 2.7807010170631252, "grad_norm": 0.3342595068619039, "learning_rate": 4.4124496552147225e-06, "loss": 0.4302, "step": 4135 }, { "epoch": 2.7813734554929814, "grad_norm": 0.30432160255455765, "learning_rate": 4.410263428617222e-06, "loss": 0.4408, "step": 4136 }, { "epoch": 2.7820458939228376, "grad_norm": 0.34090270895012315, "learning_rate": 4.408077316352258e-06, "loss": 0.4579, "step": 4137 }, { "epoch": 2.782718332352694, "grad_norm": 0.342366852309439, "learning_rate": 4.405891318843655e-06, "loss": 0.4321, "step": 4138 }, { "epoch": 2.7833907707825505, "grad_norm": 0.300326428255073, "learning_rate": 4.403705436515212e-06, "loss": 0.4386, "step": 4139 }, { "epoch": 2.7840632092124062, "grad_norm": 0.4374188529908497, "learning_rate": 4.401519669790709e-06, "loss": 0.4617, "step": 4140 }, { "epoch": 2.784735647642263, "grad_norm": 0.36903156045600943, "learning_rate": 4.399334019093904e-06, "loss": 0.4501, "step": 4141 }, { "epoch": 2.785408086072119, "grad_norm": 0.3469467309885719, "learning_rate": 4.3971484848485245e-06, "loss": 0.4469, "step": 4142 }, { "epoch": 2.7860805245019753, "grad_norm": 0.3529204570178575, "learning_rate": 4.394963067478283e-06, "loss": 0.4364, "step": 4143 }, { "epoch": 2.7867529629318315, "grad_norm": 0.3156208007068114, "learning_rate": 4.39277776740687e-06, "loss": 0.4405, "step": 4144 }, { "epoch": 2.7874254013616877, "grad_norm": 0.3589296480700081, "learning_rate": 4.390592585057949e-06, "loss": 0.444, "step": 4145 }, { "epoch": 2.7880978397915444, "grad_norm": 0.4860533872791004, "learning_rate": 4.388407520855162e-06, "loss": 0.4198, "step": 4146 }, { "epoch": 2.7887702782214, "grad_norm": 0.31359406004259816, "learning_rate": 4.38622257522213e-06, "loss": 0.444, "step": 4147 }, { "epoch": 2.7894427166512568, "grad_norm": 0.3628458626927062, "learning_rate": 4.384037748582448e-06, "loss": 0.4365, "step": 4148 }, { "epoch": 2.790115155081113, "grad_norm": 0.3971586747602741, "learning_rate": 4.381853041359693e-06, "loss": 0.4303, "step": 4149 }, { "epoch": 2.790787593510969, "grad_norm": 0.3762934083513401, "learning_rate": 4.37966845397741e-06, "loss": 0.4448, "step": 4150 }, { "epoch": 2.7914600319408254, "grad_norm": 0.3429231449205495, "learning_rate": 4.377483986859128e-06, "loss": 0.442, "step": 4151 }, { "epoch": 2.7921324703706816, "grad_norm": 0.5394162778507079, "learning_rate": 4.375299640428352e-06, "loss": 0.4355, "step": 4152 }, { "epoch": 2.792804908800538, "grad_norm": 0.32743745427999177, "learning_rate": 4.373115415108563e-06, "loss": 0.4447, "step": 4153 }, { "epoch": 2.793477347230394, "grad_norm": 0.4000900373043025, "learning_rate": 4.370931311323217e-06, "loss": 0.4482, "step": 4154 }, { "epoch": 2.7941497856602506, "grad_norm": 0.35385970730396854, "learning_rate": 4.368747329495747e-06, "loss": 0.4324, "step": 4155 }, { "epoch": 2.794822224090107, "grad_norm": 0.3965033179606159, "learning_rate": 4.366563470049561e-06, "loss": 0.443, "step": 4156 }, { "epoch": 2.795494662519963, "grad_norm": 0.34703061683707376, "learning_rate": 4.364379733408048e-06, "loss": 0.4407, "step": 4157 }, { "epoch": 2.7961671009498192, "grad_norm": 0.31031118789611056, "learning_rate": 4.36219611999457e-06, "loss": 0.4496, "step": 4158 }, { "epoch": 2.7968395393796754, "grad_norm": 0.356068190864218, "learning_rate": 4.360012630232463e-06, "loss": 0.4386, "step": 4159 }, { "epoch": 2.797511977809532, "grad_norm": 0.3053172018438645, "learning_rate": 4.357829264545042e-06, "loss": 0.4418, "step": 4160 }, { "epoch": 2.798184416239388, "grad_norm": 0.34260686402603957, "learning_rate": 4.355646023355599e-06, "loss": 0.4234, "step": 4161 }, { "epoch": 2.7988568546692445, "grad_norm": 0.37957637036473135, "learning_rate": 4.353462907087397e-06, "loss": 0.4349, "step": 4162 }, { "epoch": 2.7995292930991007, "grad_norm": 0.41495400538925514, "learning_rate": 4.351279916163684e-06, "loss": 0.4217, "step": 4163 }, { "epoch": 2.800201731528957, "grad_norm": 0.39348273057249017, "learning_rate": 4.34909705100767e-06, "loss": 0.4421, "step": 4164 }, { "epoch": 2.800874169958813, "grad_norm": 0.32721102101693095, "learning_rate": 4.346914312042552e-06, "loss": 0.4373, "step": 4165 }, { "epoch": 2.8015466083886693, "grad_norm": 0.4929749225276872, "learning_rate": 4.344731699691498e-06, "loss": 0.4325, "step": 4166 }, { "epoch": 2.8022190468185255, "grad_norm": 0.3944224671487195, "learning_rate": 4.342549214377653e-06, "loss": 0.4428, "step": 4167 }, { "epoch": 2.8028914852483817, "grad_norm": 0.32805134167892863, "learning_rate": 4.340366856524137e-06, "loss": 0.4339, "step": 4168 }, { "epoch": 2.8035639236782384, "grad_norm": 0.3571698135102629, "learning_rate": 4.338184626554043e-06, "loss": 0.448, "step": 4169 }, { "epoch": 2.8042363621080946, "grad_norm": 1.1019836197426351, "learning_rate": 4.336002524890442e-06, "loss": 0.4559, "step": 4170 }, { "epoch": 2.8049088005379508, "grad_norm": 0.5615591083474871, "learning_rate": 4.333820551956381e-06, "loss": 0.4432, "step": 4171 }, { "epoch": 2.805581238967807, "grad_norm": 0.42330074239721943, "learning_rate": 4.331638708174877e-06, "loss": 0.4495, "step": 4172 }, { "epoch": 2.806253677397663, "grad_norm": 0.41645460574901194, "learning_rate": 4.329456993968926e-06, "loss": 0.431, "step": 4173 }, { "epoch": 2.8069261158275194, "grad_norm": 0.3709084412027702, "learning_rate": 4.327275409761499e-06, "loss": 0.4378, "step": 4174 }, { "epoch": 2.8075985542573756, "grad_norm": 0.353736146094937, "learning_rate": 4.325093955975541e-06, "loss": 0.4446, "step": 4175 }, { "epoch": 2.8082709926872322, "grad_norm": 0.3818149149271061, "learning_rate": 4.322912633033972e-06, "loss": 0.4341, "step": 4176 }, { "epoch": 2.8089434311170884, "grad_norm": 0.4666823798883692, "learning_rate": 4.320731441359684e-06, "loss": 0.4504, "step": 4177 }, { "epoch": 2.8096158695469446, "grad_norm": 0.3905860953030715, "learning_rate": 4.318550381375548e-06, "loss": 0.4313, "step": 4178 }, { "epoch": 2.810288307976801, "grad_norm": 0.4089372449811153, "learning_rate": 4.3163694535044055e-06, "loss": 0.4312, "step": 4179 }, { "epoch": 2.810960746406657, "grad_norm": 0.37258055042296084, "learning_rate": 4.314188658169078e-06, "loss": 0.4441, "step": 4180 }, { "epoch": 2.8116331848365133, "grad_norm": 0.35262230137425327, "learning_rate": 4.312007995792353e-06, "loss": 0.4423, "step": 4181 }, { "epoch": 2.8123056232663695, "grad_norm": 0.4462012393921559, "learning_rate": 4.309827466796996e-06, "loss": 0.4647, "step": 4182 }, { "epoch": 2.812978061696226, "grad_norm": 0.423231503154175, "learning_rate": 4.307647071605751e-06, "loss": 0.4494, "step": 4183 }, { "epoch": 2.8136505001260823, "grad_norm": 0.37000476204116944, "learning_rate": 4.30546681064133e-06, "loss": 0.4381, "step": 4184 }, { "epoch": 2.8143229385559385, "grad_norm": 0.4382581644812144, "learning_rate": 4.303286684326424e-06, "loss": 0.4331, "step": 4185 }, { "epoch": 2.8149953769857947, "grad_norm": 0.3426587641141498, "learning_rate": 4.301106693083692e-06, "loss": 0.4344, "step": 4186 }, { "epoch": 2.815667815415651, "grad_norm": 0.4400919829618176, "learning_rate": 4.298926837335772e-06, "loss": 0.4499, "step": 4187 }, { "epoch": 2.816340253845507, "grad_norm": 0.43886870253233085, "learning_rate": 4.2967471175052725e-06, "loss": 0.4469, "step": 4188 }, { "epoch": 2.8170126922753633, "grad_norm": 0.35837375545485195, "learning_rate": 4.2945675340147796e-06, "loss": 0.4345, "step": 4189 }, { "epoch": 2.81768513070522, "grad_norm": 0.3634353514020071, "learning_rate": 4.292388087286846e-06, "loss": 0.431, "step": 4190 }, { "epoch": 2.818357569135076, "grad_norm": 0.3444790616942859, "learning_rate": 4.290208777744006e-06, "loss": 0.4385, "step": 4191 }, { "epoch": 2.8190300075649324, "grad_norm": 0.4673667498158592, "learning_rate": 4.288029605808761e-06, "loss": 0.4545, "step": 4192 }, { "epoch": 2.8197024459947886, "grad_norm": 0.4901550150844472, "learning_rate": 4.28585057190359e-06, "loss": 0.4252, "step": 4193 }, { "epoch": 2.820374884424645, "grad_norm": 0.34004725640003464, "learning_rate": 4.283671676450941e-06, "loss": 0.4339, "step": 4194 }, { "epoch": 2.821047322854501, "grad_norm": 0.37589666520860127, "learning_rate": 4.28149291987324e-06, "loss": 0.4454, "step": 4195 }, { "epoch": 2.821719761284357, "grad_norm": 0.43697203610507124, "learning_rate": 4.2793143025928835e-06, "loss": 0.4246, "step": 4196 }, { "epoch": 2.822392199714214, "grad_norm": 0.32464672726116545, "learning_rate": 4.27713582503224e-06, "loss": 0.4458, "step": 4197 }, { "epoch": 2.82306463814407, "grad_norm": 0.362476088230034, "learning_rate": 4.274957487613654e-06, "loss": 0.4426, "step": 4198 }, { "epoch": 2.8237370765739263, "grad_norm": 0.3578027590759422, "learning_rate": 4.272779290759439e-06, "loss": 0.424, "step": 4199 }, { "epoch": 2.8244095150037825, "grad_norm": 0.742708640591173, "learning_rate": 4.270601234891883e-06, "loss": 0.4413, "step": 4200 }, { "epoch": 2.8250819534336387, "grad_norm": 0.3279396144376603, "learning_rate": 4.268423320433247e-06, "loss": 0.4392, "step": 4201 }, { "epoch": 2.825754391863495, "grad_norm": 0.35208092982367156, "learning_rate": 4.266245547805767e-06, "loss": 0.4416, "step": 4202 }, { "epoch": 2.826426830293351, "grad_norm": 0.36251601382145465, "learning_rate": 4.264067917431645e-06, "loss": 0.4561, "step": 4203 }, { "epoch": 2.8270992687232077, "grad_norm": 0.4306499066372681, "learning_rate": 4.261890429733063e-06, "loss": 0.4324, "step": 4204 }, { "epoch": 2.827771707153064, "grad_norm": 0.35960347296205575, "learning_rate": 4.2597130851321685e-06, "loss": 0.4246, "step": 4205 }, { "epoch": 2.82844414558292, "grad_norm": 0.3312427958990518, "learning_rate": 4.257535884051086e-06, "loss": 0.4662, "step": 4206 }, { "epoch": 2.8291165840127763, "grad_norm": 0.3127332077974222, "learning_rate": 4.255358826911912e-06, "loss": 0.4256, "step": 4207 }, { "epoch": 2.8297890224426325, "grad_norm": 0.36043322840857506, "learning_rate": 4.253181914136711e-06, "loss": 0.4442, "step": 4208 }, { "epoch": 2.8304614608724887, "grad_norm": 0.3183818541164439, "learning_rate": 4.251005146147522e-06, "loss": 0.4271, "step": 4209 }, { "epoch": 2.831133899302345, "grad_norm": 0.4745175769316324, "learning_rate": 4.248828523366359e-06, "loss": 0.4312, "step": 4210 }, { "epoch": 2.8318063377322016, "grad_norm": 0.33201401995383073, "learning_rate": 4.246652046215206e-06, "loss": 0.453, "step": 4211 }, { "epoch": 2.832478776162058, "grad_norm": 0.324963818023879, "learning_rate": 4.244475715116012e-06, "loss": 0.4339, "step": 4212 }, { "epoch": 2.833151214591914, "grad_norm": 0.3568436731790368, "learning_rate": 4.242299530490708e-06, "loss": 0.4498, "step": 4213 }, { "epoch": 2.83382365302177, "grad_norm": 0.4182822403247682, "learning_rate": 4.24012349276119e-06, "loss": 0.4609, "step": 4214 }, { "epoch": 2.8344960914516264, "grad_norm": 3.131545947607575, "learning_rate": 4.23794760234933e-06, "loss": 0.4352, "step": 4215 }, { "epoch": 2.8351685298814826, "grad_norm": 0.4583669298743143, "learning_rate": 4.2357718596769655e-06, "loss": 0.4469, "step": 4216 }, { "epoch": 2.835840968311339, "grad_norm": 0.3794591328890642, "learning_rate": 4.233596265165912e-06, "loss": 0.4319, "step": 4217 }, { "epoch": 2.8365134067411955, "grad_norm": 0.4474970862074643, "learning_rate": 4.231420819237951e-06, "loss": 0.4468, "step": 4218 }, { "epoch": 2.8371858451710517, "grad_norm": 0.4264990089485767, "learning_rate": 4.229245522314839e-06, "loss": 0.4457, "step": 4219 }, { "epoch": 2.837858283600908, "grad_norm": 0.32880018687738627, "learning_rate": 4.2270703748183025e-06, "loss": 0.4394, "step": 4220 }, { "epoch": 2.838530722030764, "grad_norm": 0.4347059065127276, "learning_rate": 4.2248953771700356e-06, "loss": 0.45, "step": 4221 }, { "epoch": 2.8392031604606203, "grad_norm": 0.3539200637449874, "learning_rate": 4.222720529791708e-06, "loss": 0.44, "step": 4222 }, { "epoch": 2.8398755988904765, "grad_norm": 0.31172589745609186, "learning_rate": 4.220545833104958e-06, "loss": 0.4481, "step": 4223 }, { "epoch": 2.8405480373203327, "grad_norm": 0.35007131216723775, "learning_rate": 4.218371287531397e-06, "loss": 0.4293, "step": 4224 }, { "epoch": 2.8412204757501893, "grad_norm": 0.449397329929107, "learning_rate": 4.216196893492602e-06, "loss": 0.4361, "step": 4225 }, { "epoch": 2.8418929141800455, "grad_norm": 0.4494250507992524, "learning_rate": 4.2140226514101265e-06, "loss": 0.4327, "step": 4226 }, { "epoch": 2.8425653526099017, "grad_norm": 0.328475017212245, "learning_rate": 4.2118485617054916e-06, "loss": 0.4507, "step": 4227 }, { "epoch": 2.843237791039758, "grad_norm": 0.4306809663698929, "learning_rate": 4.20967462480019e-06, "loss": 0.4374, "step": 4228 }, { "epoch": 2.843910229469614, "grad_norm": 0.35037605536830907, "learning_rate": 4.207500841115682e-06, "loss": 0.4409, "step": 4229 }, { "epoch": 2.8445826678994703, "grad_norm": 0.33765625836092356, "learning_rate": 4.205327211073399e-06, "loss": 0.4381, "step": 4230 }, { "epoch": 2.8452551063293265, "grad_norm": 0.5551967692371169, "learning_rate": 4.203153735094747e-06, "loss": 0.4328, "step": 4231 }, { "epoch": 2.845927544759183, "grad_norm": 0.40994218394479104, "learning_rate": 4.200980413601097e-06, "loss": 0.4471, "step": 4232 }, { "epoch": 2.8465999831890394, "grad_norm": 0.37418125193428436, "learning_rate": 4.198807247013794e-06, "loss": 0.4518, "step": 4233 }, { "epoch": 2.8472724216188956, "grad_norm": 0.41210781906159627, "learning_rate": 4.1966342357541486e-06, "loss": 0.4363, "step": 4234 }, { "epoch": 2.847944860048752, "grad_norm": 0.4155183764811362, "learning_rate": 4.194461380243445e-06, "loss": 0.4373, "step": 4235 }, { "epoch": 2.848617298478608, "grad_norm": 0.3669197743717589, "learning_rate": 4.192288680902935e-06, "loss": 0.4453, "step": 4236 }, { "epoch": 2.849289736908464, "grad_norm": 0.425957665347839, "learning_rate": 4.190116138153844e-06, "loss": 0.4562, "step": 4237 }, { "epoch": 2.8499621753383204, "grad_norm": 0.3404491078149701, "learning_rate": 4.187943752417359e-06, "loss": 0.4301, "step": 4238 }, { "epoch": 2.850634613768177, "grad_norm": 0.3677165947890313, "learning_rate": 4.185771524114644e-06, "loss": 0.4263, "step": 4239 }, { "epoch": 2.8513070521980333, "grad_norm": 0.31412292626872296, "learning_rate": 4.18359945366683e-06, "loss": 0.4299, "step": 4240 }, { "epoch": 2.8519794906278895, "grad_norm": 0.3716609911042492, "learning_rate": 4.181427541495018e-06, "loss": 0.4427, "step": 4241 }, { "epoch": 2.8526519290577457, "grad_norm": 0.3518355157188309, "learning_rate": 4.179255788020278e-06, "loss": 0.4342, "step": 4242 }, { "epoch": 2.853324367487602, "grad_norm": 0.35252230576663934, "learning_rate": 4.177084193663647e-06, "loss": 0.4337, "step": 4243 }, { "epoch": 2.853996805917458, "grad_norm": 0.3379641016136965, "learning_rate": 4.174912758846134e-06, "loss": 0.4207, "step": 4244 }, { "epoch": 2.8546692443473143, "grad_norm": 0.3524487510511978, "learning_rate": 4.172741483988717e-06, "loss": 0.4353, "step": 4245 }, { "epoch": 2.855341682777171, "grad_norm": 0.40793087769440395, "learning_rate": 4.170570369512344e-06, "loss": 0.4253, "step": 4246 }, { "epoch": 2.856014121207027, "grad_norm": 0.422585208007085, "learning_rate": 4.1683994158379255e-06, "loss": 0.4295, "step": 4247 }, { "epoch": 2.8566865596368833, "grad_norm": 0.45569305713966374, "learning_rate": 4.166228623386347e-06, "loss": 0.4325, "step": 4248 }, { "epoch": 2.8573589980667395, "grad_norm": 0.5739734252923965, "learning_rate": 4.164057992578463e-06, "loss": 0.4316, "step": 4249 }, { "epoch": 2.8580314364965957, "grad_norm": 0.38539807899996864, "learning_rate": 4.1618875238350945e-06, "loss": 0.4518, "step": 4250 }, { "epoch": 2.858703874926452, "grad_norm": 0.4344583117890214, "learning_rate": 4.15971721757703e-06, "loss": 0.435, "step": 4251 }, { "epoch": 2.859376313356308, "grad_norm": 0.37028539101648733, "learning_rate": 4.157547074225028e-06, "loss": 0.4475, "step": 4252 }, { "epoch": 2.860048751786165, "grad_norm": 0.3612453578542226, "learning_rate": 4.155377094199816e-06, "loss": 0.4343, "step": 4253 }, { "epoch": 2.860721190216021, "grad_norm": 0.46991632221235885, "learning_rate": 4.153207277922089e-06, "loss": 0.433, "step": 4254 }, { "epoch": 2.861393628645877, "grad_norm": 0.34980489848240764, "learning_rate": 4.151037625812513e-06, "loss": 0.4365, "step": 4255 }, { "epoch": 2.8620660670757334, "grad_norm": 0.3750905114580679, "learning_rate": 4.148868138291714e-06, "loss": 0.4365, "step": 4256 }, { "epoch": 2.8627385055055896, "grad_norm": 0.38187841266966155, "learning_rate": 4.146698815780295e-06, "loss": 0.4443, "step": 4257 }, { "epoch": 2.863410943935446, "grad_norm": 0.33677571728502814, "learning_rate": 4.144529658698824e-06, "loss": 0.4423, "step": 4258 }, { "epoch": 2.864083382365302, "grad_norm": 0.402470212628385, "learning_rate": 4.142360667467838e-06, "loss": 0.4259, "step": 4259 }, { "epoch": 2.8647558207951587, "grad_norm": 0.6996119095805915, "learning_rate": 4.1401918425078365e-06, "loss": 0.4397, "step": 4260 }, { "epoch": 2.865428259225015, "grad_norm": 0.32779799605137916, "learning_rate": 4.138023184239294e-06, "loss": 0.4488, "step": 4261 }, { "epoch": 2.866100697654871, "grad_norm": 0.31941981810706765, "learning_rate": 4.135854693082646e-06, "loss": 0.4242, "step": 4262 }, { "epoch": 2.8667731360847273, "grad_norm": 0.4026405587453875, "learning_rate": 4.133686369458303e-06, "loss": 0.437, "step": 4263 }, { "epoch": 2.8674455745145835, "grad_norm": 0.3102554702479224, "learning_rate": 4.131518213786638e-06, "loss": 0.453, "step": 4264 }, { "epoch": 2.8681180129444397, "grad_norm": 0.4258431129398313, "learning_rate": 4.1293502264879895e-06, "loss": 0.4534, "step": 4265 }, { "epoch": 2.868790451374296, "grad_norm": 0.6528324085825112, "learning_rate": 4.1271824079826695e-06, "loss": 0.4371, "step": 4266 }, { "epoch": 2.8694628898041525, "grad_norm": 0.376635634239984, "learning_rate": 4.1250147586909534e-06, "loss": 0.4323, "step": 4267 }, { "epoch": 2.8701353282340083, "grad_norm": 0.45667317364587295, "learning_rate": 4.122847279033086e-06, "loss": 0.4647, "step": 4268 }, { "epoch": 2.870807766663865, "grad_norm": 0.40142667751818195, "learning_rate": 4.120679969429274e-06, "loss": 0.4471, "step": 4269 }, { "epoch": 2.871480205093721, "grad_norm": 0.39789184379035186, "learning_rate": 4.118512830299696e-06, "loss": 0.4405, "step": 4270 }, { "epoch": 2.8721526435235774, "grad_norm": 0.34470586336361553, "learning_rate": 4.116345862064498e-06, "loss": 0.4551, "step": 4271 }, { "epoch": 2.8728250819534336, "grad_norm": 0.3288657110198987, "learning_rate": 4.114179065143792e-06, "loss": 0.4506, "step": 4272 }, { "epoch": 2.8734975203832898, "grad_norm": 0.316861492206836, "learning_rate": 4.112012439957653e-06, "loss": 0.4336, "step": 4273 }, { "epoch": 2.8741699588131464, "grad_norm": 0.3757760463793481, "learning_rate": 4.109845986926127e-06, "loss": 0.4187, "step": 4274 }, { "epoch": 2.874842397243002, "grad_norm": 0.36283664838320717, "learning_rate": 4.107679706469226e-06, "loss": 0.4286, "step": 4275 }, { "epoch": 2.875514835672859, "grad_norm": 0.3972832536818271, "learning_rate": 4.105513599006927e-06, "loss": 0.4417, "step": 4276 }, { "epoch": 2.876187274102715, "grad_norm": 0.3395551214278122, "learning_rate": 4.103347664959179e-06, "loss": 0.447, "step": 4277 }, { "epoch": 2.8768597125325712, "grad_norm": 0.3844461209754712, "learning_rate": 4.101181904745885e-06, "loss": 0.4556, "step": 4278 }, { "epoch": 2.8775321509624274, "grad_norm": 0.3931268402696742, "learning_rate": 4.099016318786926e-06, "loss": 0.4535, "step": 4279 }, { "epoch": 2.8782045893922836, "grad_norm": 0.3069355737860178, "learning_rate": 4.096850907502145e-06, "loss": 0.4391, "step": 4280 }, { "epoch": 2.8788770278221403, "grad_norm": 1.1404325595603302, "learning_rate": 4.094685671311353e-06, "loss": 0.4477, "step": 4281 }, { "epoch": 2.879549466251996, "grad_norm": 0.3256061095754914, "learning_rate": 4.092520610634323e-06, "loss": 0.435, "step": 4282 }, { "epoch": 2.8802219046818527, "grad_norm": 0.3556430518345657, "learning_rate": 4.0903557258907975e-06, "loss": 0.4357, "step": 4283 }, { "epoch": 2.880894343111709, "grad_norm": 0.4232484835520356, "learning_rate": 4.088191017500484e-06, "loss": 0.4407, "step": 4284 }, { "epoch": 2.881566781541565, "grad_norm": 0.5641353968555147, "learning_rate": 4.0860264858830545e-06, "loss": 0.4343, "step": 4285 }, { "epoch": 2.8822392199714213, "grad_norm": 0.35668894593439726, "learning_rate": 4.083862131458152e-06, "loss": 0.4384, "step": 4286 }, { "epoch": 2.8829116584012775, "grad_norm": 0.5268429700767152, "learning_rate": 4.081697954645375e-06, "loss": 0.4454, "step": 4287 }, { "epoch": 2.883584096831134, "grad_norm": 0.3592019898868432, "learning_rate": 4.0795339558642964e-06, "loss": 0.43, "step": 4288 }, { "epoch": 2.88425653526099, "grad_norm": 0.3355202443504104, "learning_rate": 4.077370135534452e-06, "loss": 0.4256, "step": 4289 }, { "epoch": 2.8849289736908466, "grad_norm": 0.35761333702572906, "learning_rate": 4.075206494075344e-06, "loss": 0.4093, "step": 4290 }, { "epoch": 2.8856014121207028, "grad_norm": 0.36261425680219084, "learning_rate": 4.073043031906437e-06, "loss": 0.434, "step": 4291 }, { "epoch": 2.886273850550559, "grad_norm": 0.48381493475754367, "learning_rate": 4.070879749447162e-06, "loss": 0.4384, "step": 4292 }, { "epoch": 2.886946288980415, "grad_norm": 0.3368170714364876, "learning_rate": 4.0687166471169156e-06, "loss": 0.4339, "step": 4293 }, { "epoch": 2.8876187274102714, "grad_norm": 0.37122650595240925, "learning_rate": 4.066553725335064e-06, "loss": 0.4456, "step": 4294 }, { "epoch": 2.888291165840128, "grad_norm": 0.472617643332299, "learning_rate": 4.0643909845209286e-06, "loss": 0.4395, "step": 4295 }, { "epoch": 2.8889636042699838, "grad_norm": 0.4182466531187184, "learning_rate": 4.062228425093802e-06, "loss": 0.4311, "step": 4296 }, { "epoch": 2.8896360426998404, "grad_norm": 0.4305460750947574, "learning_rate": 4.060066047472942e-06, "loss": 0.4396, "step": 4297 }, { "epoch": 2.8903084811296966, "grad_norm": 0.33529669856356104, "learning_rate": 4.05790385207757e-06, "loss": 0.4318, "step": 4298 }, { "epoch": 2.890980919559553, "grad_norm": 0.31791553757939606, "learning_rate": 4.055741839326872e-06, "loss": 0.4511, "step": 4299 }, { "epoch": 2.891653357989409, "grad_norm": 0.4054930402209033, "learning_rate": 4.053580009639998e-06, "loss": 0.442, "step": 4300 }, { "epoch": 2.8923257964192652, "grad_norm": 0.4424163335137873, "learning_rate": 4.051418363436062e-06, "loss": 0.4409, "step": 4301 }, { "epoch": 2.892998234849122, "grad_norm": 0.3115659861446721, "learning_rate": 4.049256901134146e-06, "loss": 0.4344, "step": 4302 }, { "epoch": 2.8936706732789776, "grad_norm": 0.35125570354866636, "learning_rate": 4.047095623153294e-06, "loss": 0.4417, "step": 4303 }, { "epoch": 2.8943431117088343, "grad_norm": 0.5242489123159184, "learning_rate": 4.0449345299125105e-06, "loss": 0.4424, "step": 4304 }, { "epoch": 2.8950155501386905, "grad_norm": 0.39148138912349173, "learning_rate": 4.042773621830769e-06, "loss": 0.4341, "step": 4305 }, { "epoch": 2.8956879885685467, "grad_norm": 0.4032256382439055, "learning_rate": 4.040612899327007e-06, "loss": 0.4374, "step": 4306 }, { "epoch": 2.896360426998403, "grad_norm": 0.45494911382000336, "learning_rate": 4.0384523628201246e-06, "loss": 0.4405, "step": 4307 }, { "epoch": 2.897032865428259, "grad_norm": 0.5259742575105177, "learning_rate": 4.03629201272899e-06, "loss": 0.4419, "step": 4308 }, { "epoch": 2.8977053038581158, "grad_norm": 0.3713800624548828, "learning_rate": 4.034131849472423e-06, "loss": 0.4203, "step": 4309 }, { "epoch": 2.8983777422879715, "grad_norm": 0.35812631028349245, "learning_rate": 4.031971873469222e-06, "loss": 0.4432, "step": 4310 }, { "epoch": 2.899050180717828, "grad_norm": 0.3508903930067445, "learning_rate": 4.0298120851381405e-06, "loss": 0.4601, "step": 4311 }, { "epoch": 2.8997226191476844, "grad_norm": 0.36295881177394324, "learning_rate": 4.0276524848978985e-06, "loss": 0.4389, "step": 4312 }, { "epoch": 2.9003950575775406, "grad_norm": 0.31969764112024057, "learning_rate": 4.0254930731671785e-06, "loss": 0.4254, "step": 4313 }, { "epoch": 2.9010674960073968, "grad_norm": 0.3590482150560216, "learning_rate": 4.0233338503646255e-06, "loss": 0.4522, "step": 4314 }, { "epoch": 2.901739934437253, "grad_norm": 0.42649867839913475, "learning_rate": 4.021174816908852e-06, "loss": 0.44, "step": 4315 }, { "epoch": 2.9024123728671096, "grad_norm": 0.35706898525852293, "learning_rate": 4.0190159732184305e-06, "loss": 0.4449, "step": 4316 }, { "epoch": 2.9030848112969654, "grad_norm": 0.3270427305336125, "learning_rate": 4.016857319711893e-06, "loss": 0.4353, "step": 4317 }, { "epoch": 2.903757249726822, "grad_norm": 0.3247344819275317, "learning_rate": 4.014698856807744e-06, "loss": 0.4326, "step": 4318 }, { "epoch": 2.9044296881566782, "grad_norm": 0.36525032765947724, "learning_rate": 4.012540584924442e-06, "loss": 0.4276, "step": 4319 }, { "epoch": 2.9051021265865344, "grad_norm": 0.4000180432810828, "learning_rate": 4.010382504480415e-06, "loss": 0.4422, "step": 4320 }, { "epoch": 2.9057745650163906, "grad_norm": 0.3840704237209392, "learning_rate": 4.00822461589405e-06, "loss": 0.435, "step": 4321 }, { "epoch": 2.906447003446247, "grad_norm": 0.3185174756415056, "learning_rate": 4.006066919583698e-06, "loss": 0.4412, "step": 4322 }, { "epoch": 2.9071194418761035, "grad_norm": 0.35060641499913836, "learning_rate": 4.003909415967672e-06, "loss": 0.446, "step": 4323 }, { "epoch": 2.9077918803059593, "grad_norm": 0.3229457585115087, "learning_rate": 4.001752105464249e-06, "loss": 0.4338, "step": 4324 }, { "epoch": 2.908464318735816, "grad_norm": 0.35988129806189556, "learning_rate": 3.999594988491672e-06, "loss": 0.4393, "step": 4325 }, { "epoch": 2.909136757165672, "grad_norm": 0.5260762120795631, "learning_rate": 3.997438065468135e-06, "loss": 0.4366, "step": 4326 }, { "epoch": 2.9098091955955283, "grad_norm": 0.33383556534355013, "learning_rate": 3.9952813368118054e-06, "loss": 0.4236, "step": 4327 }, { "epoch": 2.9104816340253845, "grad_norm": 0.3549944344912472, "learning_rate": 3.9931248029408096e-06, "loss": 0.4194, "step": 4328 }, { "epoch": 2.9111540724552407, "grad_norm": 0.340081867293386, "learning_rate": 3.9909684642732346e-06, "loss": 0.438, "step": 4329 }, { "epoch": 2.911826510885097, "grad_norm": 0.41570221401733837, "learning_rate": 3.988812321227134e-06, "loss": 0.4427, "step": 4330 }, { "epoch": 2.912498949314953, "grad_norm": 0.39149912394492825, "learning_rate": 3.986656374220516e-06, "loss": 0.4401, "step": 4331 }, { "epoch": 2.9131713877448098, "grad_norm": 0.5597983452383962, "learning_rate": 3.984500623671359e-06, "loss": 0.4343, "step": 4332 }, { "epoch": 2.913843826174666, "grad_norm": 0.46909294423105663, "learning_rate": 3.982345069997597e-06, "loss": 0.4362, "step": 4333 }, { "epoch": 2.914516264604522, "grad_norm": 0.4440529949481681, "learning_rate": 3.980189713617132e-06, "loss": 0.447, "step": 4334 }, { "epoch": 2.9151887030343784, "grad_norm": 0.3846651417453312, "learning_rate": 3.9780345549478185e-06, "loss": 0.4319, "step": 4335 }, { "epoch": 2.9158611414642346, "grad_norm": 0.3907841131878748, "learning_rate": 3.975879594407481e-06, "loss": 0.4302, "step": 4336 }, { "epoch": 2.916533579894091, "grad_norm": 0.34113166552315544, "learning_rate": 3.9737248324139035e-06, "loss": 0.4581, "step": 4337 }, { "epoch": 2.917206018323947, "grad_norm": 0.4001528551480132, "learning_rate": 3.971570269384831e-06, "loss": 0.4256, "step": 4338 }, { "epoch": 2.9178784567538036, "grad_norm": 0.3707242145332742, "learning_rate": 3.969415905737967e-06, "loss": 0.4358, "step": 4339 }, { "epoch": 2.91855089518366, "grad_norm": 0.3583102598519517, "learning_rate": 3.967261741890982e-06, "loss": 0.4364, "step": 4340 }, { "epoch": 2.919223333613516, "grad_norm": 0.3255702338682238, "learning_rate": 3.965107778261504e-06, "loss": 0.4398, "step": 4341 }, { "epoch": 2.9198957720433723, "grad_norm": 0.34596311967659127, "learning_rate": 3.962954015267123e-06, "loss": 0.4166, "step": 4342 }, { "epoch": 2.9205682104732285, "grad_norm": 0.3569938436608133, "learning_rate": 3.960800453325392e-06, "loss": 0.4401, "step": 4343 }, { "epoch": 2.9212406489030847, "grad_norm": 0.4270059848200292, "learning_rate": 3.958647092853819e-06, "loss": 0.4399, "step": 4344 }, { "epoch": 2.921913087332941, "grad_norm": 0.45722521864625526, "learning_rate": 3.95649393426988e-06, "loss": 0.4399, "step": 4345 }, { "epoch": 2.9225855257627975, "grad_norm": 0.326045276220502, "learning_rate": 3.954340977991008e-06, "loss": 0.4371, "step": 4346 }, { "epoch": 2.9232579641926537, "grad_norm": 0.5610976445828544, "learning_rate": 3.9521882244345996e-06, "loss": 0.443, "step": 4347 }, { "epoch": 2.92393040262251, "grad_norm": 0.37308037941952066, "learning_rate": 3.950035674018008e-06, "loss": 0.4418, "step": 4348 }, { "epoch": 2.924602841052366, "grad_norm": 0.2977451673809849, "learning_rate": 3.9478833271585494e-06, "loss": 0.4423, "step": 4349 }, { "epoch": 2.9252752794822223, "grad_norm": 0.3645061290192601, "learning_rate": 3.945731184273502e-06, "loss": 0.4206, "step": 4350 }, { "epoch": 2.9259477179120785, "grad_norm": 0.37103446938698675, "learning_rate": 3.943579245780101e-06, "loss": 0.4396, "step": 4351 }, { "epoch": 2.9266201563419347, "grad_norm": 0.33635430725600896, "learning_rate": 3.941427512095548e-06, "loss": 0.4409, "step": 4352 }, { "epoch": 2.9272925947717914, "grad_norm": 0.44784358321516776, "learning_rate": 3.939275983636995e-06, "loss": 0.443, "step": 4353 }, { "epoch": 2.9279650332016476, "grad_norm": 0.32436690281265607, "learning_rate": 3.937124660821562e-06, "loss": 0.4371, "step": 4354 }, { "epoch": 2.928637471631504, "grad_norm": 0.33151113447889863, "learning_rate": 3.934973544066328e-06, "loss": 0.4312, "step": 4355 }, { "epoch": 2.92930991006136, "grad_norm": 0.39549657605539945, "learning_rate": 3.932822633788334e-06, "loss": 0.4484, "step": 4356 }, { "epoch": 2.929982348491216, "grad_norm": 0.3303839780249808, "learning_rate": 3.930671930404572e-06, "loss": 0.4392, "step": 4357 }, { "epoch": 2.9306547869210724, "grad_norm": 0.45877058828119155, "learning_rate": 3.928521434332002e-06, "loss": 0.4452, "step": 4358 }, { "epoch": 2.9313272253509286, "grad_norm": 0.3174532018147674, "learning_rate": 3.926371145987544e-06, "loss": 0.4355, "step": 4359 }, { "epoch": 2.9319996637807852, "grad_norm": 0.3864330463989357, "learning_rate": 3.9242210657880745e-06, "loss": 0.4528, "step": 4360 }, { "epoch": 2.9326721022106415, "grad_norm": 0.376800759867609, "learning_rate": 3.92207119415043e-06, "loss": 0.438, "step": 4361 }, { "epoch": 2.9333445406404977, "grad_norm": 0.32073634660336636, "learning_rate": 3.919921531491407e-06, "loss": 0.4443, "step": 4362 }, { "epoch": 2.934016979070354, "grad_norm": 0.3172303339676808, "learning_rate": 3.917772078227763e-06, "loss": 0.4454, "step": 4363 }, { "epoch": 2.93468941750021, "grad_norm": 0.3850781141799853, "learning_rate": 3.9156228347762115e-06, "loss": 0.4228, "step": 4364 }, { "epoch": 2.9353618559300663, "grad_norm": 0.2930249871479131, "learning_rate": 3.913473801553433e-06, "loss": 0.4452, "step": 4365 }, { "epoch": 2.9360342943599225, "grad_norm": 0.3473829718278103, "learning_rate": 3.911324978976054e-06, "loss": 0.4289, "step": 4366 }, { "epoch": 2.936706732789779, "grad_norm": 0.3217487492540504, "learning_rate": 3.909176367460672e-06, "loss": 0.4403, "step": 4367 }, { "epoch": 2.9373791712196353, "grad_norm": 0.4522829535415397, "learning_rate": 3.907027967423839e-06, "loss": 0.4355, "step": 4368 }, { "epoch": 2.9380516096494915, "grad_norm": 0.3485041332119282, "learning_rate": 3.904879779282067e-06, "loss": 0.4336, "step": 4369 }, { "epoch": 2.9387240480793477, "grad_norm": 0.4578987985171744, "learning_rate": 3.9027318034518245e-06, "loss": 0.4625, "step": 4370 }, { "epoch": 2.939396486509204, "grad_norm": 0.4543264370604624, "learning_rate": 3.900584040349543e-06, "loss": 0.4496, "step": 4371 }, { "epoch": 2.94006892493906, "grad_norm": 0.3338600468951495, "learning_rate": 3.8984364903916086e-06, "loss": 0.4394, "step": 4372 }, { "epoch": 2.9407413633689163, "grad_norm": 0.3022669546557657, "learning_rate": 3.896289153994369e-06, "loss": 0.4248, "step": 4373 }, { "epoch": 2.941413801798773, "grad_norm": 0.36615243953150717, "learning_rate": 3.894142031574131e-06, "loss": 0.4413, "step": 4374 }, { "epoch": 2.942086240228629, "grad_norm": 0.8356858213298126, "learning_rate": 3.891995123547154e-06, "loss": 0.437, "step": 4375 }, { "epoch": 2.9427586786584854, "grad_norm": 0.3351703752251006, "learning_rate": 3.889848430329664e-06, "loss": 0.4423, "step": 4376 }, { "epoch": 2.9434311170883416, "grad_norm": 0.29626714008291044, "learning_rate": 3.887701952337839e-06, "loss": 0.4284, "step": 4377 }, { "epoch": 2.944103555518198, "grad_norm": 0.28788910704281123, "learning_rate": 3.88555568998782e-06, "loss": 0.4332, "step": 4378 }, { "epoch": 2.944775993948054, "grad_norm": 0.3341832475121459, "learning_rate": 3.883409643695702e-06, "loss": 0.4526, "step": 4379 }, { "epoch": 2.94544843237791, "grad_norm": 0.3747649741539901, "learning_rate": 3.881263813877542e-06, "loss": 0.4448, "step": 4380 }, { "epoch": 2.946120870807767, "grad_norm": 0.34952520777191076, "learning_rate": 3.879118200949352e-06, "loss": 0.4326, "step": 4381 }, { "epoch": 2.946793309237623, "grad_norm": 0.4237596153227539, "learning_rate": 3.876972805327105e-06, "loss": 0.4422, "step": 4382 }, { "epoch": 2.9474657476674793, "grad_norm": 0.3219451157593167, "learning_rate": 3.874827627426727e-06, "loss": 0.4405, "step": 4383 }, { "epoch": 2.9481381860973355, "grad_norm": 0.44034715574883926, "learning_rate": 3.872682667664105e-06, "loss": 0.4244, "step": 4384 }, { "epoch": 2.9488106245271917, "grad_norm": 0.3202514401249179, "learning_rate": 3.870537926455086e-06, "loss": 0.4474, "step": 4385 }, { "epoch": 2.949483062957048, "grad_norm": 0.3718417568910564, "learning_rate": 3.868393404215469e-06, "loss": 0.4301, "step": 4386 }, { "epoch": 2.950155501386904, "grad_norm": 0.35650965227510006, "learning_rate": 3.866249101361018e-06, "loss": 0.4379, "step": 4387 }, { "epoch": 2.9508279398167607, "grad_norm": 0.4331958919885307, "learning_rate": 3.864105018307446e-06, "loss": 0.4652, "step": 4388 }, { "epoch": 2.951500378246617, "grad_norm": 0.36349900094158416, "learning_rate": 3.861961155470428e-06, "loss": 0.4429, "step": 4389 }, { "epoch": 2.952172816676473, "grad_norm": 0.34935962224647293, "learning_rate": 3.8598175132655975e-06, "loss": 0.4501, "step": 4390 }, { "epoch": 2.9528452551063293, "grad_norm": 0.36038031146784655, "learning_rate": 3.8576740921085455e-06, "loss": 0.4352, "step": 4391 }, { "epoch": 2.9535176935361855, "grad_norm": 0.39743775222788014, "learning_rate": 3.855530892414813e-06, "loss": 0.4352, "step": 4392 }, { "epoch": 2.9541901319660417, "grad_norm": 0.32815871640241545, "learning_rate": 3.853387914599905e-06, "loss": 0.4474, "step": 4393 }, { "epoch": 2.954862570395898, "grad_norm": 0.3279877663767734, "learning_rate": 3.851245159079283e-06, "loss": 0.4388, "step": 4394 }, { "epoch": 2.9555350088257546, "grad_norm": 0.37024229207542203, "learning_rate": 3.849102626268364e-06, "loss": 0.4348, "step": 4395 }, { "epoch": 2.956207447255611, "grad_norm": 0.3295138624701636, "learning_rate": 3.8469603165825226e-06, "loss": 0.4402, "step": 4396 }, { "epoch": 2.956879885685467, "grad_norm": 0.36905622085010975, "learning_rate": 3.844818230437087e-06, "loss": 0.4338, "step": 4397 }, { "epoch": 2.957552324115323, "grad_norm": 0.32310577320388223, "learning_rate": 3.842676368247347e-06, "loss": 0.436, "step": 4398 }, { "epoch": 2.9582247625451794, "grad_norm": 0.3368728139063044, "learning_rate": 3.840534730428545e-06, "loss": 0.4124, "step": 4399 }, { "epoch": 2.9588972009750356, "grad_norm": 0.356940651326783, "learning_rate": 3.8383933173958835e-06, "loss": 0.4377, "step": 4400 }, { "epoch": 2.959569639404892, "grad_norm": 0.4407601298417127, "learning_rate": 3.8362521295645164e-06, "loss": 0.4388, "step": 4401 }, { "epoch": 2.9602420778347485, "grad_norm": 0.3699052538556628, "learning_rate": 3.834111167349558e-06, "loss": 0.4566, "step": 4402 }, { "epoch": 2.9609145162646047, "grad_norm": 0.41370129265662003, "learning_rate": 3.8319704311660785e-06, "loss": 0.4443, "step": 4403 }, { "epoch": 2.961586954694461, "grad_norm": 0.3563826234769003, "learning_rate": 3.829829921429103e-06, "loss": 0.4551, "step": 4404 }, { "epoch": 2.962259393124317, "grad_norm": 0.5052402730139329, "learning_rate": 3.8276896385536145e-06, "loss": 0.4235, "step": 4405 }, { "epoch": 2.9629318315541733, "grad_norm": 0.3742072239640784, "learning_rate": 3.825549582954548e-06, "loss": 0.425, "step": 4406 }, { "epoch": 2.9636042699840295, "grad_norm": 0.484152781124374, "learning_rate": 3.8234097550468e-06, "loss": 0.4319, "step": 4407 }, { "epoch": 2.9642767084138857, "grad_norm": 0.41888443325550634, "learning_rate": 3.821270155245219e-06, "loss": 0.4564, "step": 4408 }, { "epoch": 2.9649491468437423, "grad_norm": 0.3124106039017705, "learning_rate": 3.81913078396461e-06, "loss": 0.4434, "step": 4409 }, { "epoch": 2.9656215852735985, "grad_norm": 0.3310828872080542, "learning_rate": 3.816991641619736e-06, "loss": 0.449, "step": 4410 }, { "epoch": 2.9662940237034547, "grad_norm": 0.3283652682944646, "learning_rate": 3.81485272862531e-06, "loss": 0.4258, "step": 4411 }, { "epoch": 2.966966462133311, "grad_norm": 0.3375407699567821, "learning_rate": 3.8127140453960065e-06, "loss": 0.4444, "step": 4412 }, { "epoch": 2.967638900563167, "grad_norm": 0.3991924856965535, "learning_rate": 3.810575592346455e-06, "loss": 0.4355, "step": 4413 }, { "epoch": 2.9683113389930234, "grad_norm": 0.31755611053343813, "learning_rate": 3.8084373698912334e-06, "loss": 0.4395, "step": 4414 }, { "epoch": 2.9689837774228796, "grad_norm": 0.3789286228953469, "learning_rate": 3.806299378444884e-06, "loss": 0.4415, "step": 4415 }, { "epoch": 2.969656215852736, "grad_norm": 0.3697470094575646, "learning_rate": 3.804161618421899e-06, "loss": 0.4376, "step": 4416 }, { "epoch": 2.9703286542825924, "grad_norm": 0.3486963933380699, "learning_rate": 3.802024090236727e-06, "loss": 0.4323, "step": 4417 }, { "epoch": 2.9710010927124486, "grad_norm": 0.3036241491697172, "learning_rate": 3.799886794303773e-06, "loss": 0.4355, "step": 4418 }, { "epoch": 2.971673531142305, "grad_norm": 0.3629807388059824, "learning_rate": 3.7977497310373946e-06, "loss": 0.4411, "step": 4419 }, { "epoch": 2.972345969572161, "grad_norm": 0.33530428915772964, "learning_rate": 3.7956129008519046e-06, "loss": 0.4275, "step": 4420 }, { "epoch": 2.973018408002017, "grad_norm": 0.32335921903193104, "learning_rate": 3.7934763041615717e-06, "loss": 0.432, "step": 4421 }, { "epoch": 2.9736908464318734, "grad_norm": 0.5711599036875669, "learning_rate": 3.7913399413806227e-06, "loss": 0.4504, "step": 4422 }, { "epoch": 2.97436328486173, "grad_norm": 0.32928993885534397, "learning_rate": 3.78920381292323e-06, "loss": 0.428, "step": 4423 }, { "epoch": 2.9750357232915863, "grad_norm": 0.3275175794711282, "learning_rate": 3.787067919203528e-06, "loss": 0.4517, "step": 4424 }, { "epoch": 2.9757081617214425, "grad_norm": 0.4021150224715988, "learning_rate": 3.784932260635604e-06, "loss": 0.4355, "step": 4425 }, { "epoch": 2.9763806001512987, "grad_norm": 0.4063896786721347, "learning_rate": 3.7827968376334996e-06, "loss": 0.432, "step": 4426 }, { "epoch": 2.977053038581155, "grad_norm": 0.45938151658635823, "learning_rate": 3.780661650611209e-06, "loss": 0.4371, "step": 4427 }, { "epoch": 2.977725477011011, "grad_norm": 0.3171925401712923, "learning_rate": 3.7785266999826826e-06, "loss": 0.449, "step": 4428 }, { "epoch": 2.9783979154408673, "grad_norm": 0.3228017458469971, "learning_rate": 3.7763919861618247e-06, "loss": 0.4408, "step": 4429 }, { "epoch": 2.979070353870724, "grad_norm": 0.4159499590049895, "learning_rate": 3.774257509562493e-06, "loss": 0.4389, "step": 4430 }, { "epoch": 2.9797427923005797, "grad_norm": 0.3722689785310936, "learning_rate": 3.7721232705985022e-06, "loss": 0.4492, "step": 4431 }, { "epoch": 2.9804152307304363, "grad_norm": 0.4000912931409732, "learning_rate": 3.769989269683614e-06, "loss": 0.4395, "step": 4432 }, { "epoch": 2.9810876691602926, "grad_norm": 0.35943578317497027, "learning_rate": 3.7678555072315496e-06, "loss": 0.4355, "step": 4433 }, { "epoch": 2.9817601075901488, "grad_norm": 0.31215073655493997, "learning_rate": 3.765721983655984e-06, "loss": 0.432, "step": 4434 }, { "epoch": 2.982432546020005, "grad_norm": 0.32125224482976794, "learning_rate": 3.7635886993705443e-06, "loss": 0.4465, "step": 4435 }, { "epoch": 2.983104984449861, "grad_norm": 0.35945927270651473, "learning_rate": 3.76145565478881e-06, "loss": 0.4427, "step": 4436 }, { "epoch": 2.983777422879718, "grad_norm": 0.3154339889699578, "learning_rate": 3.7593228503243173e-06, "loss": 0.4375, "step": 4437 }, { "epoch": 2.9844498613095736, "grad_norm": 0.3717430845968554, "learning_rate": 3.7571902863905524e-06, "loss": 0.432, "step": 4438 }, { "epoch": 2.98512229973943, "grad_norm": 0.3863997070542928, "learning_rate": 3.7550579634009582e-06, "loss": 0.4455, "step": 4439 }, { "epoch": 2.9857947381692864, "grad_norm": 0.2909428722373954, "learning_rate": 3.752925881768931e-06, "loss": 0.4362, "step": 4440 }, { "epoch": 2.9864671765991426, "grad_norm": 0.34718339418997324, "learning_rate": 3.7507940419078127e-06, "loss": 0.447, "step": 4441 }, { "epoch": 2.987139615028999, "grad_norm": 0.40098733777174367, "learning_rate": 3.7486624442309087e-06, "loss": 0.4382, "step": 4442 }, { "epoch": 2.987812053458855, "grad_norm": 0.3695593860512126, "learning_rate": 3.7465310891514716e-06, "loss": 0.4378, "step": 4443 }, { "epoch": 2.9884844918887117, "grad_norm": 0.3398051541434089, "learning_rate": 3.74439997708271e-06, "loss": 0.4374, "step": 4444 }, { "epoch": 2.9891569303185674, "grad_norm": 0.34199486719995503, "learning_rate": 3.7422691084377817e-06, "loss": 0.4422, "step": 4445 }, { "epoch": 2.989829368748424, "grad_norm": 0.35147755808673253, "learning_rate": 3.7401384836297994e-06, "loss": 0.4316, "step": 4446 }, { "epoch": 2.9905018071782803, "grad_norm": 0.32203234479982756, "learning_rate": 3.7380081030718296e-06, "loss": 0.4385, "step": 4447 }, { "epoch": 2.9911742456081365, "grad_norm": 0.3200714589833798, "learning_rate": 3.7358779671768917e-06, "loss": 0.448, "step": 4448 }, { "epoch": 2.9918466840379927, "grad_norm": 0.3481011119507389, "learning_rate": 3.733748076357952e-06, "loss": 0.4416, "step": 4449 }, { "epoch": 2.992519122467849, "grad_norm": 0.32614372611646925, "learning_rate": 3.7316184310279356e-06, "loss": 0.434, "step": 4450 }, { "epoch": 2.9931915608977055, "grad_norm": 1.4320532111873607, "learning_rate": 3.729489031599719e-06, "loss": 0.4359, "step": 4451 }, { "epoch": 2.9938639993275613, "grad_norm": 0.34225309102035456, "learning_rate": 3.7273598784861288e-06, "loss": 0.4575, "step": 4452 }, { "epoch": 2.994536437757418, "grad_norm": 0.3854212013922402, "learning_rate": 3.725230972099947e-06, "loss": 0.438, "step": 4453 }, { "epoch": 2.995208876187274, "grad_norm": 0.49023520566482387, "learning_rate": 3.7231023128539023e-06, "loss": 0.4372, "step": 4454 }, { "epoch": 2.9958813146171304, "grad_norm": 0.343889522075791, "learning_rate": 3.7209739011606814e-06, "loss": 0.4305, "step": 4455 }, { "epoch": 2.9965537530469866, "grad_norm": 0.3953877234343036, "learning_rate": 3.71884573743292e-06, "loss": 0.4341, "step": 4456 }, { "epoch": 2.9972261914768428, "grad_norm": 0.3238747253395985, "learning_rate": 3.716717822083209e-06, "loss": 0.4341, "step": 4457 }, { "epoch": 2.9978986299066994, "grad_norm": 0.3730996167012987, "learning_rate": 3.714590155524084e-06, "loss": 0.4407, "step": 4458 }, { "epoch": 2.998571068336555, "grad_norm": 0.37637480012700475, "learning_rate": 3.7124627381680367e-06, "loss": 0.4266, "step": 4459 }, { "epoch": 2.999243506766412, "grad_norm": 0.38744039917159373, "learning_rate": 3.7103355704275136e-06, "loss": 0.4359, "step": 4460 }, { "epoch": 2.999915945196268, "grad_norm": 0.32943006457297874, "learning_rate": 3.708208652714912e-06, "loss": 0.4349, "step": 4461 }, { "epoch": 3.0005883836261242, "grad_norm": 0.6176820385280498, "learning_rate": 3.7060819854425723e-06, "loss": 0.4499, "step": 4462 }, { "epoch": 3.0012608220559804, "grad_norm": 0.33412830767673857, "learning_rate": 3.7039555690227963e-06, "loss": 0.442, "step": 4463 }, { "epoch": 3.0019332604858366, "grad_norm": 0.40413572576992607, "learning_rate": 3.7018294038678326e-06, "loss": 0.4357, "step": 4464 }, { "epoch": 3.002605698915693, "grad_norm": 0.3732340624532239, "learning_rate": 3.6997034903898826e-06, "loss": 0.4394, "step": 4465 }, { "epoch": 3.0032781373455495, "grad_norm": 0.3435046155970762, "learning_rate": 3.697577829001099e-06, "loss": 0.4296, "step": 4466 }, { "epoch": 3.0039505757754057, "grad_norm": 0.2881616624216002, "learning_rate": 3.6954524201135823e-06, "loss": 0.4372, "step": 4467 }, { "epoch": 3.004623014205262, "grad_norm": 0.35940667588121067, "learning_rate": 3.693327264139388e-06, "loss": 0.4407, "step": 4468 }, { "epoch": 3.005295452635118, "grad_norm": 0.3429020968951988, "learning_rate": 3.6912023614905218e-06, "loss": 0.4262, "step": 4469 }, { "epoch": 3.0059678910649743, "grad_norm": 0.31452139822598385, "learning_rate": 3.689077712578941e-06, "loss": 0.4474, "step": 4470 }, { "epoch": 3.0066403294948305, "grad_norm": 0.35211524591414006, "learning_rate": 3.686953317816548e-06, "loss": 0.4178, "step": 4471 }, { "epoch": 3.0073127679246867, "grad_norm": 0.38554047368580263, "learning_rate": 3.6848291776152044e-06, "loss": 0.4281, "step": 4472 }, { "epoch": 3.0079852063545434, "grad_norm": 0.37807020266172264, "learning_rate": 3.682705292386716e-06, "loss": 0.4405, "step": 4473 }, { "epoch": 3.0086576447843996, "grad_norm": 0.5783834918565653, "learning_rate": 3.6805816625428424e-06, "loss": 0.4303, "step": 4474 }, { "epoch": 3.0093300832142558, "grad_norm": 0.3027429867873164, "learning_rate": 3.6784582884952935e-06, "loss": 0.4284, "step": 4475 }, { "epoch": 3.010002521644112, "grad_norm": 0.3573843602129143, "learning_rate": 3.676335170655728e-06, "loss": 0.4257, "step": 4476 }, { "epoch": 3.010674960073968, "grad_norm": 0.5170737055546644, "learning_rate": 3.674212309435756e-06, "loss": 0.4343, "step": 4477 }, { "epoch": 3.0113473985038244, "grad_norm": 0.39620461894404074, "learning_rate": 3.6720897052469374e-06, "loss": 0.4352, "step": 4478 }, { "epoch": 3.0120198369336806, "grad_norm": 0.34278914287919265, "learning_rate": 3.6699673585007858e-06, "loss": 0.4303, "step": 4479 }, { "epoch": 3.0126922753635372, "grad_norm": 0.3574083848789049, "learning_rate": 3.6678452696087563e-06, "loss": 0.4235, "step": 4480 }, { "epoch": 3.0133647137933934, "grad_norm": 0.4886644604880471, "learning_rate": 3.6657234389822626e-06, "loss": 0.4326, "step": 4481 }, { "epoch": 3.0140371522232496, "grad_norm": 0.4046875113455944, "learning_rate": 3.6636018670326646e-06, "loss": 0.4461, "step": 4482 }, { "epoch": 3.014709590653106, "grad_norm": 0.3058637048547598, "learning_rate": 3.6614805541712726e-06, "loss": 0.4428, "step": 4483 }, { "epoch": 3.015382029082962, "grad_norm": 0.3532756588998309, "learning_rate": 3.6593595008093464e-06, "loss": 0.419, "step": 4484 }, { "epoch": 3.0160544675128182, "grad_norm": 0.4818989394385238, "learning_rate": 3.657238707358096e-06, "loss": 0.4308, "step": 4485 }, { "epoch": 3.0167269059426745, "grad_norm": 0.38255122084895843, "learning_rate": 3.6551181742286803e-06, "loss": 0.4438, "step": 4486 }, { "epoch": 3.017399344372531, "grad_norm": 0.3754752510920237, "learning_rate": 3.652997901832208e-06, "loss": 0.4444, "step": 4487 }, { "epoch": 3.0180717828023873, "grad_norm": 0.36663369477192764, "learning_rate": 3.6508778905797404e-06, "loss": 0.4342, "step": 4488 }, { "epoch": 3.0187442212322435, "grad_norm": 0.31769006125646865, "learning_rate": 3.648758140882281e-06, "loss": 0.4397, "step": 4489 }, { "epoch": 3.0194166596620997, "grad_norm": 0.37354204632756116, "learning_rate": 3.646638653150788e-06, "loss": 0.4223, "step": 4490 }, { "epoch": 3.020089098091956, "grad_norm": 0.3219142891575576, "learning_rate": 3.644519427796168e-06, "loss": 0.4477, "step": 4491 }, { "epoch": 3.020761536521812, "grad_norm": 0.347480754410478, "learning_rate": 3.6424004652292778e-06, "loss": 0.4461, "step": 4492 }, { "epoch": 3.0214339749516683, "grad_norm": 0.30296795201848176, "learning_rate": 3.640281765860919e-06, "loss": 0.4481, "step": 4493 }, { "epoch": 3.022106413381525, "grad_norm": 0.39347590564289325, "learning_rate": 3.638163330101847e-06, "loss": 0.4327, "step": 4494 }, { "epoch": 3.022778851811381, "grad_norm": 0.32219672839312713, "learning_rate": 3.636045158362763e-06, "loss": 0.451, "step": 4495 }, { "epoch": 3.0234512902412374, "grad_norm": 0.34578066800769225, "learning_rate": 3.6339272510543184e-06, "loss": 0.4284, "step": 4496 }, { "epoch": 3.0241237286710936, "grad_norm": 0.5388113685987523, "learning_rate": 3.6318096085871148e-06, "loss": 0.4295, "step": 4497 }, { "epoch": 3.02479616710095, "grad_norm": 0.38941952487264886, "learning_rate": 3.6296922313716976e-06, "loss": 0.4391, "step": 4498 }, { "epoch": 3.025468605530806, "grad_norm": 0.3382159734636173, "learning_rate": 3.6275751198185644e-06, "loss": 0.4447, "step": 4499 }, { "epoch": 3.026141043960662, "grad_norm": 0.3602686931463578, "learning_rate": 3.6254582743381617e-06, "loss": 0.4295, "step": 4500 }, { "epoch": 3.0268134823905184, "grad_norm": 0.44577634985746306, "learning_rate": 3.6233416953408844e-06, "loss": 0.4402, "step": 4501 }, { "epoch": 3.027485920820375, "grad_norm": 0.35712384379947537, "learning_rate": 3.6212253832370727e-06, "loss": 0.4294, "step": 4502 }, { "epoch": 3.0281583592502312, "grad_norm": 0.3874521973589743, "learning_rate": 3.6191093384370173e-06, "loss": 0.4288, "step": 4503 }, { "epoch": 3.0288307976800874, "grad_norm": 0.43785321982355346, "learning_rate": 3.6169935613509585e-06, "loss": 0.4489, "step": 4504 }, { "epoch": 3.0295032361099437, "grad_norm": 0.35880506553273245, "learning_rate": 3.6148780523890836e-06, "loss": 0.4387, "step": 4505 }, { "epoch": 3.0301756745398, "grad_norm": 0.31171991877169425, "learning_rate": 3.6127628119615245e-06, "loss": 0.4415, "step": 4506 }, { "epoch": 3.030848112969656, "grad_norm": 0.5210472160505484, "learning_rate": 3.610647840478365e-06, "loss": 0.4404, "step": 4507 }, { "epoch": 3.0315205513995123, "grad_norm": 0.3050302944530371, "learning_rate": 3.6085331383496357e-06, "loss": 0.4542, "step": 4508 }, { "epoch": 3.032192989829369, "grad_norm": 0.3415842205365484, "learning_rate": 3.6064187059853173e-06, "loss": 0.4304, "step": 4509 }, { "epoch": 3.032865428259225, "grad_norm": 0.3195233141623651, "learning_rate": 3.604304543795335e-06, "loss": 0.424, "step": 4510 }, { "epoch": 3.0335378666890813, "grad_norm": 0.3758174522413181, "learning_rate": 3.60219065218956e-06, "loss": 0.4417, "step": 4511 }, { "epoch": 3.0342103051189375, "grad_norm": 0.3266941100665439, "learning_rate": 3.6000770315778157e-06, "loss": 0.432, "step": 4512 }, { "epoch": 3.0348827435487937, "grad_norm": 0.3321494460098997, "learning_rate": 3.5979636823698704e-06, "loss": 0.4364, "step": 4513 }, { "epoch": 3.03555518197865, "grad_norm": 0.390727902210108, "learning_rate": 3.595850604975441e-06, "loss": 0.4429, "step": 4514 }, { "epoch": 3.036227620408506, "grad_norm": 0.3715492533589567, "learning_rate": 3.5937377998041888e-06, "loss": 0.4391, "step": 4515 }, { "epoch": 3.036900058838363, "grad_norm": 0.4056712467446377, "learning_rate": 3.591625267265727e-06, "loss": 0.4296, "step": 4516 }, { "epoch": 3.037572497268219, "grad_norm": 0.9494418041097662, "learning_rate": 3.589513007769611e-06, "loss": 0.418, "step": 4517 }, { "epoch": 3.038244935698075, "grad_norm": 0.3523303983594626, "learning_rate": 3.5874010217253473e-06, "loss": 0.4373, "step": 4518 }, { "epoch": 3.0389173741279314, "grad_norm": 0.3689719434747409, "learning_rate": 3.58528930954239e-06, "loss": 0.425, "step": 4519 }, { "epoch": 3.0395898125577876, "grad_norm": 0.3906852991531941, "learning_rate": 3.5831778716301325e-06, "loss": 0.4267, "step": 4520 }, { "epoch": 3.040262250987644, "grad_norm": 0.5857781559126515, "learning_rate": 3.5810667083979228e-06, "loss": 0.4309, "step": 4521 }, { "epoch": 3.0409346894175, "grad_norm": 0.4321142668318707, "learning_rate": 3.5789558202550533e-06, "loss": 0.4344, "step": 4522 }, { "epoch": 3.0416071278473567, "grad_norm": 0.3307722284146976, "learning_rate": 3.576845207610765e-06, "loss": 0.448, "step": 4523 }, { "epoch": 3.042279566277213, "grad_norm": 0.4335724324922873, "learning_rate": 3.5747348708742404e-06, "loss": 0.4443, "step": 4524 }, { "epoch": 3.042952004707069, "grad_norm": 0.4024654859333773, "learning_rate": 3.572624810454612e-06, "loss": 0.4296, "step": 4525 }, { "epoch": 3.0436244431369253, "grad_norm": 0.34310394163753305, "learning_rate": 3.5705150267609596e-06, "loss": 0.4468, "step": 4526 }, { "epoch": 3.0442968815667815, "grad_norm": 0.3634416843777775, "learning_rate": 3.5684055202023093e-06, "loss": 0.4484, "step": 4527 }, { "epoch": 3.0449693199966377, "grad_norm": 0.3003360998378971, "learning_rate": 3.566296291187629e-06, "loss": 0.4466, "step": 4528 }, { "epoch": 3.045641758426494, "grad_norm": 0.32281364279096514, "learning_rate": 3.5641873401258377e-06, "loss": 0.4367, "step": 4529 }, { "epoch": 3.0463141968563505, "grad_norm": 0.3521914518525131, "learning_rate": 3.5620786674257983e-06, "loss": 0.4342, "step": 4530 }, { "epoch": 3.0469866352862067, "grad_norm": 0.3169201581905446, "learning_rate": 3.559970273496321e-06, "loss": 0.4329, "step": 4531 }, { "epoch": 3.047659073716063, "grad_norm": 0.3902227518137739, "learning_rate": 3.5578621587461615e-06, "loss": 0.4413, "step": 4532 }, { "epoch": 3.048331512145919, "grad_norm": 0.4085131117285104, "learning_rate": 3.55575432358402e-06, "loss": 0.4384, "step": 4533 }, { "epoch": 3.0490039505757753, "grad_norm": 0.3264488825660449, "learning_rate": 3.553646768418544e-06, "loss": 0.4228, "step": 4534 }, { "epoch": 3.0496763890056315, "grad_norm": 0.4476131212157327, "learning_rate": 3.5515394936583265e-06, "loss": 0.4418, "step": 4535 }, { "epoch": 3.0503488274354877, "grad_norm": 0.5043788346159002, "learning_rate": 3.549432499711908e-06, "loss": 0.4466, "step": 4536 }, { "epoch": 3.0510212658653444, "grad_norm": 0.3253028263392405, "learning_rate": 3.547325786987768e-06, "loss": 0.431, "step": 4537 }, { "epoch": 3.0516937042952006, "grad_norm": 0.34369162109844337, "learning_rate": 3.545219355894339e-06, "loss": 0.4341, "step": 4538 }, { "epoch": 3.052366142725057, "grad_norm": 0.33327263778378285, "learning_rate": 3.543113206839995e-06, "loss": 0.4323, "step": 4539 }, { "epoch": 3.053038581154913, "grad_norm": 0.3719428111807546, "learning_rate": 3.5410073402330565e-06, "loss": 0.4514, "step": 4540 }, { "epoch": 3.053711019584769, "grad_norm": 0.37129283477446157, "learning_rate": 3.53890175648179e-06, "loss": 0.4186, "step": 4541 }, { "epoch": 3.0543834580146254, "grad_norm": 0.37228958234421883, "learning_rate": 3.5367964559944045e-06, "loss": 0.4449, "step": 4542 }, { "epoch": 3.0550558964444816, "grad_norm": 0.32190002037023063, "learning_rate": 3.5346914391790566e-06, "loss": 0.4429, "step": 4543 }, { "epoch": 3.0557283348743383, "grad_norm": 0.31796748081394494, "learning_rate": 3.532586706443846e-06, "loss": 0.4451, "step": 4544 }, { "epoch": 3.0564007733041945, "grad_norm": 0.31043316844484037, "learning_rate": 3.5304822581968214e-06, "loss": 0.4321, "step": 4545 }, { "epoch": 3.0570732117340507, "grad_norm": 0.44760175036381356, "learning_rate": 3.528378094845969e-06, "loss": 0.4357, "step": 4546 }, { "epoch": 3.057745650163907, "grad_norm": 0.3789634341908856, "learning_rate": 3.5262742167992265e-06, "loss": 0.4279, "step": 4547 }, { "epoch": 3.058418088593763, "grad_norm": 0.30327069801108236, "learning_rate": 3.5241706244644724e-06, "loss": 0.434, "step": 4548 }, { "epoch": 3.0590905270236193, "grad_norm": 0.3517385667754944, "learning_rate": 3.5220673182495346e-06, "loss": 0.4369, "step": 4549 }, { "epoch": 3.0597629654534755, "grad_norm": 0.3115038237779994, "learning_rate": 3.5199642985621775e-06, "loss": 0.4481, "step": 4550 }, { "epoch": 3.060435403883332, "grad_norm": 0.4064540175949745, "learning_rate": 3.517861565810118e-06, "loss": 0.4277, "step": 4551 }, { "epoch": 3.0611078423131883, "grad_norm": 0.40234286779242157, "learning_rate": 3.5157591204010123e-06, "loss": 0.4371, "step": 4552 }, { "epoch": 3.0617802807430445, "grad_norm": 0.4273834795088497, "learning_rate": 3.513656962742463e-06, "loss": 0.4359, "step": 4553 }, { "epoch": 3.0624527191729007, "grad_norm": 0.4395651494398156, "learning_rate": 3.5115550932420194e-06, "loss": 0.4392, "step": 4554 }, { "epoch": 3.063125157602757, "grad_norm": 0.47998674278000614, "learning_rate": 3.509453512307167e-06, "loss": 0.4382, "step": 4555 }, { "epoch": 3.063797596032613, "grad_norm": 0.2963015918779441, "learning_rate": 3.507352220345343e-06, "loss": 0.4365, "step": 4556 }, { "epoch": 3.0644700344624693, "grad_norm": 0.3417701165760403, "learning_rate": 3.505251217763925e-06, "loss": 0.433, "step": 4557 }, { "epoch": 3.065142472892326, "grad_norm": 0.5514900169517631, "learning_rate": 3.5031505049702388e-06, "loss": 0.4288, "step": 4558 }, { "epoch": 3.065814911322182, "grad_norm": 0.3463449584856086, "learning_rate": 3.5010500823715453e-06, "loss": 0.4267, "step": 4559 }, { "epoch": 3.0664873497520384, "grad_norm": 0.3585885707085387, "learning_rate": 3.498949950375059e-06, "loss": 0.4235, "step": 4560 }, { "epoch": 3.0671597881818946, "grad_norm": 0.3581184175510112, "learning_rate": 3.496850109387931e-06, "loss": 0.4363, "step": 4561 }, { "epoch": 3.067832226611751, "grad_norm": 0.3978062858624349, "learning_rate": 3.494750559817259e-06, "loss": 0.4301, "step": 4562 }, { "epoch": 3.068504665041607, "grad_norm": 0.3134466285837552, "learning_rate": 3.4926513020700862e-06, "loss": 0.4457, "step": 4563 }, { "epoch": 3.069177103471463, "grad_norm": 0.43058514574800544, "learning_rate": 3.4905523365533933e-06, "loss": 0.4355, "step": 4564 }, { "epoch": 3.06984954190132, "grad_norm": 0.34477356186422137, "learning_rate": 3.48845366367411e-06, "loss": 0.4391, "step": 4565 }, { "epoch": 3.070521980331176, "grad_norm": 0.3388934365750416, "learning_rate": 3.4863552838391063e-06, "loss": 0.4331, "step": 4566 }, { "epoch": 3.0711944187610323, "grad_norm": 0.4684156526808039, "learning_rate": 3.4842571974551988e-06, "loss": 0.4234, "step": 4567 }, { "epoch": 3.0718668571908885, "grad_norm": 0.3313737505540861, "learning_rate": 3.4821594049291397e-06, "loss": 0.4276, "step": 4568 }, { "epoch": 3.0725392956207447, "grad_norm": 0.36942260054250975, "learning_rate": 3.480061906667631e-06, "loss": 0.4314, "step": 4569 }, { "epoch": 3.073211734050601, "grad_norm": 0.32521269600746494, "learning_rate": 3.477964703077318e-06, "loss": 0.4495, "step": 4570 }, { "epoch": 3.073884172480457, "grad_norm": 0.3608809742566614, "learning_rate": 3.4758677945647845e-06, "loss": 0.4246, "step": 4571 }, { "epoch": 3.0745566109103137, "grad_norm": 0.3344989550477347, "learning_rate": 3.47377118153656e-06, "loss": 0.4497, "step": 4572 }, { "epoch": 3.07522904934017, "grad_norm": 0.3572676656882878, "learning_rate": 3.4716748643991156e-06, "loss": 0.431, "step": 4573 }, { "epoch": 3.075901487770026, "grad_norm": 0.553248539316614, "learning_rate": 3.469578843558865e-06, "loss": 0.4304, "step": 4574 }, { "epoch": 3.0765739261998823, "grad_norm": 0.35331663767167226, "learning_rate": 3.4674831194221664e-06, "loss": 0.4319, "step": 4575 }, { "epoch": 3.0772463646297386, "grad_norm": 0.42518322373957207, "learning_rate": 3.46538769239532e-06, "loss": 0.4308, "step": 4576 }, { "epoch": 3.0779188030595948, "grad_norm": 0.3338628786350596, "learning_rate": 3.4632925628845627e-06, "loss": 0.4416, "step": 4577 }, { "epoch": 3.078591241489451, "grad_norm": 0.32805731158939805, "learning_rate": 3.461197731296081e-06, "loss": 0.4427, "step": 4578 }, { "epoch": 3.0792636799193076, "grad_norm": 0.36026355171320523, "learning_rate": 3.4591031980360014e-06, "loss": 0.4369, "step": 4579 }, { "epoch": 3.079936118349164, "grad_norm": 0.30893179819019195, "learning_rate": 3.4570089635103934e-06, "loss": 0.4347, "step": 4580 }, { "epoch": 3.08060855677902, "grad_norm": 0.4345687627764726, "learning_rate": 3.4549150281252635e-06, "loss": 0.4427, "step": 4581 }, { "epoch": 3.081280995208876, "grad_norm": 0.40103013471154253, "learning_rate": 3.452821392286567e-06, "loss": 0.4304, "step": 4582 }, { "epoch": 3.0819534336387324, "grad_norm": 0.30194327621664857, "learning_rate": 3.4507280564001968e-06, "loss": 0.4434, "step": 4583 }, { "epoch": 3.0826258720685886, "grad_norm": 0.332364236177735, "learning_rate": 3.44863502087199e-06, "loss": 0.4478, "step": 4584 }, { "epoch": 3.083298310498445, "grad_norm": 0.33594200809082103, "learning_rate": 3.4465422861077267e-06, "loss": 0.4263, "step": 4585 }, { "epoch": 3.0839707489283015, "grad_norm": 0.3689223762605296, "learning_rate": 3.4444498525131215e-06, "loss": 0.446, "step": 4586 }, { "epoch": 3.0846431873581577, "grad_norm": 0.7450107564695669, "learning_rate": 3.442357720493838e-06, "loss": 0.4424, "step": 4587 }, { "epoch": 3.085315625788014, "grad_norm": 0.3466911797566331, "learning_rate": 3.4402658904554785e-06, "loss": 0.4394, "step": 4588 }, { "epoch": 3.08598806421787, "grad_norm": 0.34958332313672374, "learning_rate": 3.43817436280359e-06, "loss": 0.4432, "step": 4589 }, { "epoch": 3.0866605026477263, "grad_norm": 0.42559182941801493, "learning_rate": 3.4360831379436533e-06, "loss": 0.4315, "step": 4590 }, { "epoch": 3.0873329410775825, "grad_norm": 0.33547003791092794, "learning_rate": 3.4339922162810983e-06, "loss": 0.4357, "step": 4591 }, { "epoch": 3.0880053795074387, "grad_norm": 0.38646265904258303, "learning_rate": 3.4319015982212922e-06, "loss": 0.4434, "step": 4592 }, { "epoch": 3.0886778179372953, "grad_norm": 0.35206075599972964, "learning_rate": 3.4298112841695477e-06, "loss": 0.4416, "step": 4593 }, { "epoch": 3.0893502563671515, "grad_norm": 0.34848617744389093, "learning_rate": 3.4277212745311084e-06, "loss": 0.4366, "step": 4594 }, { "epoch": 3.0900226947970078, "grad_norm": 0.3533361661758744, "learning_rate": 3.425631569711169e-06, "loss": 0.4401, "step": 4595 }, { "epoch": 3.090695133226864, "grad_norm": 0.36406170069571986, "learning_rate": 3.4235421701148625e-06, "loss": 0.4273, "step": 4596 }, { "epoch": 3.09136757165672, "grad_norm": 1.4039836290122003, "learning_rate": 3.4214530761472607e-06, "loss": 0.4357, "step": 4597 }, { "epoch": 3.0920400100865764, "grad_norm": 0.3308515481068115, "learning_rate": 3.419364288213379e-06, "loss": 0.4441, "step": 4598 }, { "epoch": 3.0927124485164326, "grad_norm": 0.3119654188294005, "learning_rate": 3.4172758067181687e-06, "loss": 0.4208, "step": 4599 }, { "epoch": 3.0933848869462888, "grad_norm": 0.2867191112981178, "learning_rate": 3.4151876320665276e-06, "loss": 0.4422, "step": 4600 }, { "epoch": 3.0940573253761454, "grad_norm": 0.35474956609831393, "learning_rate": 3.4130997646632895e-06, "loss": 0.4314, "step": 4601 }, { "epoch": 3.0947297638060016, "grad_norm": 0.3159002316785881, "learning_rate": 3.411012204913233e-06, "loss": 0.4459, "step": 4602 }, { "epoch": 3.095402202235858, "grad_norm": 0.3194766649563873, "learning_rate": 3.4089249532210717e-06, "loss": 0.4496, "step": 4603 }, { "epoch": 3.096074640665714, "grad_norm": 0.6258388924333276, "learning_rate": 3.406838009991462e-06, "loss": 0.4433, "step": 4604 }, { "epoch": 3.0967470790955702, "grad_norm": 0.3838652445382094, "learning_rate": 3.404751375629002e-06, "loss": 0.4416, "step": 4605 }, { "epoch": 3.0974195175254264, "grad_norm": 0.32257798582969266, "learning_rate": 3.402665050538228e-06, "loss": 0.4256, "step": 4606 }, { "epoch": 3.0980919559552826, "grad_norm": 0.31236500506479076, "learning_rate": 3.4005790351236185e-06, "loss": 0.4342, "step": 4607 }, { "epoch": 3.0987643943851393, "grad_norm": 0.6161557458503163, "learning_rate": 3.3984933297895876e-06, "loss": 0.4219, "step": 4608 }, { "epoch": 3.0994368328149955, "grad_norm": 0.31925387633231034, "learning_rate": 3.3964079349404937e-06, "loss": 0.4293, "step": 4609 }, { "epoch": 3.1001092712448517, "grad_norm": 0.43522890072392556, "learning_rate": 3.3943228509806337e-06, "loss": 0.4343, "step": 4610 }, { "epoch": 3.100781709674708, "grad_norm": 0.345784014529943, "learning_rate": 3.392238078314245e-06, "loss": 0.4335, "step": 4611 }, { "epoch": 3.101454148104564, "grad_norm": 0.42119438239765744, "learning_rate": 3.390153617345501e-06, "loss": 0.4222, "step": 4612 }, { "epoch": 3.1021265865344203, "grad_norm": 0.32674762135053875, "learning_rate": 3.388069468478516e-06, "loss": 0.4317, "step": 4613 }, { "epoch": 3.1027990249642765, "grad_norm": 0.4307593993758202, "learning_rate": 3.385985632117349e-06, "loss": 0.4522, "step": 4614 }, { "epoch": 3.103471463394133, "grad_norm": 0.432891284456854, "learning_rate": 3.3839021086659944e-06, "loss": 0.4315, "step": 4615 }, { "epoch": 3.1041439018239894, "grad_norm": 0.48510915643605573, "learning_rate": 3.3818188985283827e-06, "loss": 0.4209, "step": 4616 }, { "epoch": 3.1048163402538456, "grad_norm": 0.5217158789525718, "learning_rate": 3.379736002108388e-06, "loss": 0.4352, "step": 4617 }, { "epoch": 3.1054887786837018, "grad_norm": 0.38286786080522917, "learning_rate": 3.3776534198098245e-06, "loss": 0.4428, "step": 4618 }, { "epoch": 3.106161217113558, "grad_norm": 0.39261664609612695, "learning_rate": 3.375571152036441e-06, "loss": 0.4394, "step": 4619 }, { "epoch": 3.106833655543414, "grad_norm": 0.9636478000474139, "learning_rate": 3.37348919919193e-06, "loss": 0.4277, "step": 4620 }, { "epoch": 3.1075060939732704, "grad_norm": 0.5307538668903624, "learning_rate": 3.371407561679919e-06, "loss": 0.4312, "step": 4621 }, { "epoch": 3.108178532403127, "grad_norm": 0.39308857470248204, "learning_rate": 3.3693262399039765e-06, "loss": 0.422, "step": 4622 }, { "epoch": 3.1088509708329832, "grad_norm": 0.42116258159068504, "learning_rate": 3.3672452342676104e-06, "loss": 0.4508, "step": 4623 }, { "epoch": 3.1095234092628394, "grad_norm": 0.34333009940872494, "learning_rate": 3.3651645451742677e-06, "loss": 0.4381, "step": 4624 }, { "epoch": 3.1101958476926956, "grad_norm": 0.32574292490361406, "learning_rate": 3.363084173027329e-06, "loss": 0.4373, "step": 4625 }, { "epoch": 3.110868286122552, "grad_norm": 0.47585240478550994, "learning_rate": 3.3610041182301185e-06, "loss": 0.4407, "step": 4626 }, { "epoch": 3.111540724552408, "grad_norm": 0.30577248963550213, "learning_rate": 3.358924381185898e-06, "loss": 0.4431, "step": 4627 }, { "epoch": 3.1122131629822642, "grad_norm": 0.34454421287433445, "learning_rate": 3.3568449622978672e-06, "loss": 0.4249, "step": 4628 }, { "epoch": 3.112885601412121, "grad_norm": 0.3124320595245157, "learning_rate": 3.354765861969165e-06, "loss": 0.428, "step": 4629 }, { "epoch": 3.113558039841977, "grad_norm": 0.33255926005497843, "learning_rate": 3.352687080602866e-06, "loss": 0.4339, "step": 4630 }, { "epoch": 3.1142304782718333, "grad_norm": 0.3263425191480317, "learning_rate": 3.3506086186019853e-06, "loss": 0.4396, "step": 4631 }, { "epoch": 3.1149029167016895, "grad_norm": 0.3311160032209623, "learning_rate": 3.348530476369476e-06, "loss": 0.4413, "step": 4632 }, { "epoch": 3.1155753551315457, "grad_norm": 0.3542262654933314, "learning_rate": 3.3464526543082294e-06, "loss": 0.4459, "step": 4633 }, { "epoch": 3.116247793561402, "grad_norm": 0.325571930550553, "learning_rate": 3.344375152821071e-06, "loss": 0.4362, "step": 4634 }, { "epoch": 3.116920231991258, "grad_norm": 0.3358903620675028, "learning_rate": 3.3422979723107684e-06, "loss": 0.4424, "step": 4635 }, { "epoch": 3.1175926704211148, "grad_norm": 0.305451735662324, "learning_rate": 3.3402211131800267e-06, "loss": 0.4259, "step": 4636 }, { "epoch": 3.118265108850971, "grad_norm": 0.42671107335206243, "learning_rate": 3.338144575831488e-06, "loss": 0.4215, "step": 4637 }, { "epoch": 3.118937547280827, "grad_norm": 0.43720263887286137, "learning_rate": 3.336068360667729e-06, "loss": 0.4331, "step": 4638 }, { "epoch": 3.1196099857106834, "grad_norm": 0.32109468966367455, "learning_rate": 3.333992468091268e-06, "loss": 0.4357, "step": 4639 }, { "epoch": 3.1202824241405396, "grad_norm": 0.3038497432055437, "learning_rate": 3.3319168985045613e-06, "loss": 0.4396, "step": 4640 }, { "epoch": 3.120954862570396, "grad_norm": 0.34822304470317245, "learning_rate": 3.329841652309998e-06, "loss": 0.4324, "step": 4641 }, { "epoch": 3.121627301000252, "grad_norm": 0.33377784623106194, "learning_rate": 3.3277667299099104e-06, "loss": 0.4339, "step": 4642 }, { "epoch": 3.1222997394301086, "grad_norm": 0.3474041561897187, "learning_rate": 3.3256921317065603e-06, "loss": 0.4531, "step": 4643 }, { "epoch": 3.122972177859965, "grad_norm": 0.35275400166282933, "learning_rate": 3.3236178581021543e-06, "loss": 0.4435, "step": 4644 }, { "epoch": 3.123644616289821, "grad_norm": 0.3766660078631167, "learning_rate": 3.3215439094988315e-06, "loss": 0.4408, "step": 4645 }, { "epoch": 3.1243170547196772, "grad_norm": 0.34077492250456387, "learning_rate": 3.3194702862986716e-06, "loss": 0.4237, "step": 4646 }, { "epoch": 3.1249894931495334, "grad_norm": 0.3990472748064627, "learning_rate": 3.3173969889036858e-06, "loss": 0.4252, "step": 4647 }, { "epoch": 3.1256619315793897, "grad_norm": 0.3324141727579477, "learning_rate": 3.3153240177158267e-06, "loss": 0.4459, "step": 4648 }, { "epoch": 3.126334370009246, "grad_norm": 0.3740768618563349, "learning_rate": 3.3132513731369832e-06, "loss": 0.4497, "step": 4649 }, { "epoch": 3.127006808439102, "grad_norm": 0.5408116642166466, "learning_rate": 3.311179055568979e-06, "loss": 0.433, "step": 4650 }, { "epoch": 3.1276792468689587, "grad_norm": 0.29868137271676387, "learning_rate": 3.3091070654135777e-06, "loss": 0.4381, "step": 4651 }, { "epoch": 3.128351685298815, "grad_norm": 0.31638763072719994, "learning_rate": 3.3070354030724735e-06, "loss": 0.4414, "step": 4652 }, { "epoch": 3.129024123728671, "grad_norm": 0.3291056686507435, "learning_rate": 3.3049640689473015e-06, "loss": 0.4389, "step": 4653 }, { "epoch": 3.1296965621585273, "grad_norm": 0.3468954593487525, "learning_rate": 3.302893063439634e-06, "loss": 0.4297, "step": 4654 }, { "epoch": 3.1303690005883835, "grad_norm": 0.3827760044042586, "learning_rate": 3.300822386950978e-06, "loss": 0.4352, "step": 4655 }, { "epoch": 3.1310414390182397, "grad_norm": 0.35013542676495674, "learning_rate": 3.298752039882774e-06, "loss": 0.4232, "step": 4656 }, { "epoch": 3.131713877448096, "grad_norm": 0.3148957999424402, "learning_rate": 3.2966820226364037e-06, "loss": 0.4345, "step": 4657 }, { "epoch": 3.1323863158779526, "grad_norm": 0.31776486418488736, "learning_rate": 3.2946123356131817e-06, "loss": 0.4474, "step": 4658 }, { "epoch": 3.133058754307809, "grad_norm": 0.336646129660973, "learning_rate": 3.292542979214361e-06, "loss": 0.4159, "step": 4659 }, { "epoch": 3.133731192737665, "grad_norm": 0.3276513923565536, "learning_rate": 3.2904739538411253e-06, "loss": 0.4296, "step": 4660 }, { "epoch": 3.134403631167521, "grad_norm": 0.4116312387173316, "learning_rate": 3.288405259894599e-06, "loss": 0.4351, "step": 4661 }, { "epoch": 3.1350760695973774, "grad_norm": 0.3403757223273257, "learning_rate": 3.2863368977758408e-06, "loss": 0.4371, "step": 4662 }, { "epoch": 3.1357485080272336, "grad_norm": 0.3659654866043058, "learning_rate": 3.284268867885847e-06, "loss": 0.4295, "step": 4663 }, { "epoch": 3.13642094645709, "grad_norm": 0.4652447138237276, "learning_rate": 3.2822011706255473e-06, "loss": 0.4324, "step": 4664 }, { "epoch": 3.1370933848869464, "grad_norm": 0.33165855878747724, "learning_rate": 3.2801338063958055e-06, "loss": 0.4229, "step": 4665 }, { "epoch": 3.1377658233168026, "grad_norm": 0.3838062127845923, "learning_rate": 3.278066775597423e-06, "loss": 0.4432, "step": 4666 }, { "epoch": 3.138438261746659, "grad_norm": 0.3439324081941863, "learning_rate": 3.276000078631136e-06, "loss": 0.4308, "step": 4667 }, { "epoch": 3.139110700176515, "grad_norm": 0.3777137266840129, "learning_rate": 3.2739337158976183e-06, "loss": 0.438, "step": 4668 }, { "epoch": 3.1397831386063713, "grad_norm": 0.3770860508041263, "learning_rate": 3.2718676877974732e-06, "loss": 0.4284, "step": 4669 }, { "epoch": 3.1404555770362275, "grad_norm": 0.4052584725799753, "learning_rate": 3.2698019947312447e-06, "loss": 0.419, "step": 4670 }, { "epoch": 3.1411280154660837, "grad_norm": 0.33877127331626405, "learning_rate": 3.26773663709941e-06, "loss": 0.4375, "step": 4671 }, { "epoch": 3.1418004538959403, "grad_norm": 0.38442473662802157, "learning_rate": 3.2656716153023806e-06, "loss": 0.4204, "step": 4672 }, { "epoch": 3.1424728923257965, "grad_norm": 0.38480592954673515, "learning_rate": 3.263606929740505e-06, "loss": 0.4339, "step": 4673 }, { "epoch": 3.1431453307556527, "grad_norm": 0.4169876926992838, "learning_rate": 3.2615425808140617e-06, "loss": 0.4431, "step": 4674 }, { "epoch": 3.143817769185509, "grad_norm": 0.3583867437549412, "learning_rate": 3.259478568923269e-06, "loss": 0.4353, "step": 4675 }, { "epoch": 3.144490207615365, "grad_norm": 0.36524560376873244, "learning_rate": 3.257414894468277e-06, "loss": 0.4236, "step": 4676 }, { "epoch": 3.1451626460452213, "grad_norm": 0.4022822932069097, "learning_rate": 3.2553515578491727e-06, "loss": 0.4367, "step": 4677 }, { "epoch": 3.1458350844750775, "grad_norm": 0.34481558379751426, "learning_rate": 3.2532885594659756e-06, "loss": 0.4174, "step": 4678 }, { "epoch": 3.146507522904934, "grad_norm": 0.3678747660739935, "learning_rate": 3.2512258997186396e-06, "loss": 0.4448, "step": 4679 }, { "epoch": 3.1471799613347904, "grad_norm": 0.36854708191939056, "learning_rate": 3.249163579007054e-06, "loss": 0.4303, "step": 4680 }, { "epoch": 3.1478523997646466, "grad_norm": 0.33787152789012104, "learning_rate": 3.247101597731045e-06, "loss": 0.4429, "step": 4681 }, { "epoch": 3.148524838194503, "grad_norm": 0.3562201680510042, "learning_rate": 3.245039956290365e-06, "loss": 0.4181, "step": 4682 }, { "epoch": 3.149197276624359, "grad_norm": 0.3401836758777454, "learning_rate": 3.2429786550847074e-06, "loss": 0.4351, "step": 4683 }, { "epoch": 3.149869715054215, "grad_norm": 0.4177430294864894, "learning_rate": 3.240917694513699e-06, "loss": 0.4335, "step": 4684 }, { "epoch": 3.1505421534840714, "grad_norm": 0.4272694717550732, "learning_rate": 3.238857074976898e-06, "loss": 0.4403, "step": 4685 }, { "epoch": 3.151214591913928, "grad_norm": 0.5249306400567656, "learning_rate": 3.2367967968737982e-06, "loss": 0.4442, "step": 4686 }, { "epoch": 3.1518870303437843, "grad_norm": 0.29953421863302154, "learning_rate": 3.2347368606038265e-06, "loss": 0.4291, "step": 4687 }, { "epoch": 3.1525594687736405, "grad_norm": 0.37863611117307133, "learning_rate": 3.2326772665663443e-06, "loss": 0.4394, "step": 4688 }, { "epoch": 3.1532319072034967, "grad_norm": 0.3344756042740373, "learning_rate": 3.230618015160646e-06, "loss": 0.4205, "step": 4689 }, { "epoch": 3.153904345633353, "grad_norm": 0.3554230676368455, "learning_rate": 3.228559106785961e-06, "loss": 0.4313, "step": 4690 }, { "epoch": 3.154576784063209, "grad_norm": 0.36182306292558464, "learning_rate": 3.2265005418414486e-06, "loss": 0.4412, "step": 4691 }, { "epoch": 3.1552492224930653, "grad_norm": 0.3154854946548312, "learning_rate": 3.2244423207262047e-06, "loss": 0.4303, "step": 4692 }, { "epoch": 3.155921660922922, "grad_norm": 0.42983282243562276, "learning_rate": 3.2223844438392583e-06, "loss": 0.4446, "step": 4693 }, { "epoch": 3.156594099352778, "grad_norm": 0.30269250195619035, "learning_rate": 3.220326911579571e-06, "loss": 0.432, "step": 4694 }, { "epoch": 3.1572665377826343, "grad_norm": 0.424479235474068, "learning_rate": 3.218269724346037e-06, "loss": 0.4561, "step": 4695 }, { "epoch": 3.1579389762124905, "grad_norm": 0.33574116245111074, "learning_rate": 3.216212882537484e-06, "loss": 0.4264, "step": 4696 }, { "epoch": 3.1586114146423467, "grad_norm": 0.3498294739538529, "learning_rate": 3.214156386552674e-06, "loss": 0.4368, "step": 4697 }, { "epoch": 3.159283853072203, "grad_norm": 0.37375534804380817, "learning_rate": 3.2121002367903005e-06, "loss": 0.4257, "step": 4698 }, { "epoch": 3.159956291502059, "grad_norm": 0.3090641749528765, "learning_rate": 3.2100444336489923e-06, "loss": 0.4339, "step": 4699 }, { "epoch": 3.160628729931916, "grad_norm": 0.9572396401829548, "learning_rate": 3.2079889775273053e-06, "loss": 0.4307, "step": 4700 }, { "epoch": 3.161301168361772, "grad_norm": 0.45192994900712563, "learning_rate": 3.205933868823734e-06, "loss": 0.4384, "step": 4701 }, { "epoch": 3.161973606791628, "grad_norm": 0.36941191503540555, "learning_rate": 3.2038791079367025e-06, "loss": 0.4322, "step": 4702 }, { "epoch": 3.1626460452214844, "grad_norm": 4.38470789270265, "learning_rate": 3.20182469526457e-06, "loss": 0.4199, "step": 4703 }, { "epoch": 3.1633184836513406, "grad_norm": 0.34675448007122195, "learning_rate": 3.1997706312056254e-06, "loss": 0.4402, "step": 4704 }, { "epoch": 3.163990922081197, "grad_norm": 0.40977690757777413, "learning_rate": 3.1977169161580905e-06, "loss": 0.4452, "step": 4705 }, { "epoch": 3.164663360511053, "grad_norm": 0.4220151900113677, "learning_rate": 3.1956635505201217e-06, "loss": 0.4345, "step": 4706 }, { "epoch": 3.1653357989409097, "grad_norm": 0.478450870024991, "learning_rate": 3.193610534689805e-06, "loss": 0.4352, "step": 4707 }, { "epoch": 3.166008237370766, "grad_norm": 0.3188804493495357, "learning_rate": 3.1915578690651614e-06, "loss": 0.4395, "step": 4708 }, { "epoch": 3.166680675800622, "grad_norm": 0.3183761201737437, "learning_rate": 3.189505554044139e-06, "loss": 0.4423, "step": 4709 }, { "epoch": 3.1673531142304783, "grad_norm": 0.30600258196367147, "learning_rate": 3.1874535900246232e-06, "loss": 0.4316, "step": 4710 }, { "epoch": 3.1680255526603345, "grad_norm": 0.3562444091319478, "learning_rate": 3.1854019774044293e-06, "loss": 0.4238, "step": 4711 }, { "epoch": 3.1686979910901907, "grad_norm": 0.3773529419732106, "learning_rate": 3.183350716581305e-06, "loss": 0.4271, "step": 4712 }, { "epoch": 3.169370429520047, "grad_norm": 0.4118248790288195, "learning_rate": 3.181299807952928e-06, "loss": 0.4305, "step": 4713 }, { "epoch": 3.1700428679499035, "grad_norm": 0.4317878872718693, "learning_rate": 3.1792492519169094e-06, "loss": 0.4404, "step": 4714 }, { "epoch": 3.1707153063797597, "grad_norm": 0.3565325816330687, "learning_rate": 3.177199048870792e-06, "loss": 0.4442, "step": 4715 }, { "epoch": 3.171387744809616, "grad_norm": 0.5234862327575597, "learning_rate": 3.175149199212052e-06, "loss": 0.428, "step": 4716 }, { "epoch": 3.172060183239472, "grad_norm": 0.38414172568977173, "learning_rate": 3.173099703338089e-06, "loss": 0.4614, "step": 4717 }, { "epoch": 3.1727326216693283, "grad_norm": 0.43272101090757875, "learning_rate": 3.171050561646244e-06, "loss": 0.4336, "step": 4718 }, { "epoch": 3.1734050600991845, "grad_norm": 0.3643160602771628, "learning_rate": 3.169001774533785e-06, "loss": 0.4373, "step": 4719 }, { "epoch": 3.1740774985290408, "grad_norm": 0.388990393612831, "learning_rate": 3.1669533423979105e-06, "loss": 0.4231, "step": 4720 }, { "epoch": 3.1747499369588974, "grad_norm": 0.38034827680550565, "learning_rate": 3.1649052656357536e-06, "loss": 0.4433, "step": 4721 }, { "epoch": 3.1754223753887536, "grad_norm": 0.3678383641889589, "learning_rate": 3.1628575446443722e-06, "loss": 0.4442, "step": 4722 }, { "epoch": 3.17609481381861, "grad_norm": 0.3981919440636663, "learning_rate": 3.16081017982076e-06, "loss": 0.4163, "step": 4723 }, { "epoch": 3.176767252248466, "grad_norm": 0.38403716118646863, "learning_rate": 3.158763171561842e-06, "loss": 0.4341, "step": 4724 }, { "epoch": 3.177439690678322, "grad_norm": 0.3583236464931998, "learning_rate": 3.1567165202644733e-06, "loss": 0.441, "step": 4725 }, { "epoch": 3.1781121291081784, "grad_norm": 0.36116918921683977, "learning_rate": 3.154670226325437e-06, "loss": 0.4566, "step": 4726 }, { "epoch": 3.1787845675380346, "grad_norm": 0.34424155111991867, "learning_rate": 3.15262429014145e-06, "loss": 0.4348, "step": 4727 }, { "epoch": 3.1794570059678913, "grad_norm": 0.36997385256532983, "learning_rate": 3.1505787121091595e-06, "loss": 0.4195, "step": 4728 }, { "epoch": 3.1801294443977475, "grad_norm": 0.3275366678701998, "learning_rate": 3.1485334926251433e-06, "loss": 0.4444, "step": 4729 }, { "epoch": 3.1808018828276037, "grad_norm": 0.37805491290009186, "learning_rate": 3.1464886320859096e-06, "loss": 0.4262, "step": 4730 }, { "epoch": 3.18147432125746, "grad_norm": 0.30860103148911594, "learning_rate": 3.1444441308878935e-06, "loss": 0.4292, "step": 4731 }, { "epoch": 3.182146759687316, "grad_norm": 0.32504355552537767, "learning_rate": 3.142399989427466e-06, "loss": 0.4366, "step": 4732 }, { "epoch": 3.1828191981171723, "grad_norm": 0.3226580089600235, "learning_rate": 3.1403562081009252e-06, "loss": 0.4199, "step": 4733 }, { "epoch": 3.1834916365470285, "grad_norm": 0.4895344773329832, "learning_rate": 3.1383127873045016e-06, "loss": 0.4197, "step": 4734 }, { "epoch": 3.184164074976885, "grad_norm": 0.3564439182384866, "learning_rate": 3.1362697274343512e-06, "loss": 0.4396, "step": 4735 }, { "epoch": 3.1848365134067413, "grad_norm": 0.37691746552945693, "learning_rate": 3.1342270288865655e-06, "loss": 0.431, "step": 4736 }, { "epoch": 3.1855089518365975, "grad_norm": 0.4162265918187403, "learning_rate": 3.1321846920571627e-06, "loss": 0.4268, "step": 4737 }, { "epoch": 3.1861813902664538, "grad_norm": 0.39649204601421295, "learning_rate": 3.1301427173420935e-06, "loss": 0.4447, "step": 4738 }, { "epoch": 3.18685382869631, "grad_norm": 0.49702336290092375, "learning_rate": 3.1281011051372327e-06, "loss": 0.4637, "step": 4739 }, { "epoch": 3.187526267126166, "grad_norm": 0.3006983383812492, "learning_rate": 3.1260598558383913e-06, "loss": 0.4224, "step": 4740 }, { "epoch": 3.1881987055560224, "grad_norm": 0.40406229463083043, "learning_rate": 3.124018969841307e-06, "loss": 0.4494, "step": 4741 }, { "epoch": 3.188871143985879, "grad_norm": 0.3584595005860944, "learning_rate": 3.121978447541648e-06, "loss": 0.432, "step": 4742 }, { "epoch": 3.189543582415735, "grad_norm": 0.3686743773892617, "learning_rate": 3.1199382893350115e-06, "loss": 0.4333, "step": 4743 }, { "epoch": 3.1902160208455914, "grad_norm": 0.3359050197895751, "learning_rate": 3.1178984956169225e-06, "loss": 0.4175, "step": 4744 }, { "epoch": 3.1908884592754476, "grad_norm": 0.31833285001091, "learning_rate": 3.1158590667828376e-06, "loss": 0.4212, "step": 4745 }, { "epoch": 3.191560897705304, "grad_norm": 0.4891485403436755, "learning_rate": 3.113820003228142e-06, "loss": 0.4414, "step": 4746 }, { "epoch": 3.19223333613516, "grad_norm": 0.4269682563934399, "learning_rate": 3.111781305348153e-06, "loss": 0.4449, "step": 4747 }, { "epoch": 3.1929057745650162, "grad_norm": 0.3584339588087221, "learning_rate": 3.109742973538108e-06, "loss": 0.4255, "step": 4748 }, { "epoch": 3.193578212994873, "grad_norm": 0.40390970804608317, "learning_rate": 3.1077050081931835e-06, "loss": 0.4405, "step": 4749 }, { "epoch": 3.194250651424729, "grad_norm": 0.40806604809088715, "learning_rate": 3.10566740970848e-06, "loss": 0.4223, "step": 4750 }, { "epoch": 3.1949230898545853, "grad_norm": 0.3056085110647207, "learning_rate": 3.103630178479028e-06, "loss": 0.4376, "step": 4751 }, { "epoch": 3.1955955282844415, "grad_norm": 0.34709925682469184, "learning_rate": 3.1015933148997868e-06, "loss": 0.4374, "step": 4752 }, { "epoch": 3.1962679667142977, "grad_norm": 0.30168107092410623, "learning_rate": 3.0995568193656435e-06, "loss": 0.4348, "step": 4753 }, { "epoch": 3.196940405144154, "grad_norm": 0.45845343718385345, "learning_rate": 3.097520692271414e-06, "loss": 0.4438, "step": 4754 }, { "epoch": 3.19761284357401, "grad_norm": 0.33802532153175735, "learning_rate": 3.0954849340118454e-06, "loss": 0.4197, "step": 4755 }, { "epoch": 3.1982852820038667, "grad_norm": 0.4163329714506242, "learning_rate": 3.0934495449816117e-06, "loss": 0.439, "step": 4756 }, { "epoch": 3.198957720433723, "grad_norm": 0.3638694248396867, "learning_rate": 3.09141452557531e-06, "loss": 0.4208, "step": 4757 }, { "epoch": 3.199630158863579, "grad_norm": 0.36166532189611367, "learning_rate": 3.0893798761874754e-06, "loss": 0.4257, "step": 4758 }, { "epoch": 3.2003025972934354, "grad_norm": 0.35809717619963644, "learning_rate": 3.0873455972125644e-06, "loss": 0.4317, "step": 4759 }, { "epoch": 3.2009750357232916, "grad_norm": 0.35426721678748335, "learning_rate": 3.0853116890449646e-06, "loss": 0.4367, "step": 4760 }, { "epoch": 3.2016474741531478, "grad_norm": 0.3126887790627608, "learning_rate": 3.08327815207899e-06, "loss": 0.4274, "step": 4761 }, { "epoch": 3.202319912583004, "grad_norm": 0.39450856417405994, "learning_rate": 3.0812449867088833e-06, "loss": 0.4224, "step": 4762 }, { "epoch": 3.2029923510128606, "grad_norm": 0.32818810265606196, "learning_rate": 3.0792121933288162e-06, "loss": 0.4395, "step": 4763 }, { "epoch": 3.203664789442717, "grad_norm": 0.35599959312630275, "learning_rate": 3.0771797723328868e-06, "loss": 0.4312, "step": 4764 }, { "epoch": 3.204337227872573, "grad_norm": 0.4090587251102353, "learning_rate": 3.075147724115124e-06, "loss": 0.4372, "step": 4765 }, { "epoch": 3.2050096663024292, "grad_norm": 0.468838029036378, "learning_rate": 3.073116049069478e-06, "loss": 0.4244, "step": 4766 }, { "epoch": 3.2056821047322854, "grad_norm": 0.37953456910976113, "learning_rate": 3.071084747589832e-06, "loss": 0.4327, "step": 4767 }, { "epoch": 3.2063545431621416, "grad_norm": 0.3686688100535242, "learning_rate": 3.0690538200699973e-06, "loss": 0.4229, "step": 4768 }, { "epoch": 3.207026981591998, "grad_norm": 0.31807479799014965, "learning_rate": 3.0670232669037112e-06, "loss": 0.4385, "step": 4769 }, { "epoch": 3.207699420021854, "grad_norm": 0.3307981654169214, "learning_rate": 3.0649930884846348e-06, "loss": 0.4304, "step": 4770 }, { "epoch": 3.2083718584517107, "grad_norm": 0.4029112853490653, "learning_rate": 3.0629632852063616e-06, "loss": 0.4358, "step": 4771 }, { "epoch": 3.209044296881567, "grad_norm": 0.3732050035030929, "learning_rate": 3.060933857462411e-06, "loss": 0.421, "step": 4772 }, { "epoch": 3.209716735311423, "grad_norm": 0.3742535869075959, "learning_rate": 3.058904805646229e-06, "loss": 0.4622, "step": 4773 }, { "epoch": 3.2103891737412793, "grad_norm": 0.3659673176100718, "learning_rate": 3.0568761301511894e-06, "loss": 0.4347, "step": 4774 }, { "epoch": 3.2110616121711355, "grad_norm": 0.3358417737771693, "learning_rate": 3.0548478313705917e-06, "loss": 0.4521, "step": 4775 }, { "epoch": 3.2117340506009917, "grad_norm": 0.4766779055355382, "learning_rate": 3.052819909697663e-06, "loss": 0.4451, "step": 4776 }, { "epoch": 3.212406489030848, "grad_norm": 0.3932548693099987, "learning_rate": 3.0507923655255588e-06, "loss": 0.4352, "step": 4777 }, { "epoch": 3.2130789274607046, "grad_norm": 0.3385982906612951, "learning_rate": 3.048765199247361e-06, "loss": 0.431, "step": 4778 }, { "epoch": 3.2137513658905608, "grad_norm": 0.5054161825659945, "learning_rate": 3.046738411256074e-06, "loss": 0.4342, "step": 4779 }, { "epoch": 3.214423804320417, "grad_norm": 0.43428867622105366, "learning_rate": 3.044712001944634e-06, "loss": 0.4428, "step": 4780 }, { "epoch": 3.215096242750273, "grad_norm": 0.5603923248105427, "learning_rate": 3.042685971705903e-06, "loss": 0.4547, "step": 4781 }, { "epoch": 3.2157686811801294, "grad_norm": 0.35307316001332334, "learning_rate": 3.040660320932668e-06, "loss": 0.4331, "step": 4782 }, { "epoch": 3.2164411196099856, "grad_norm": 0.3614379883327843, "learning_rate": 3.038635050017642e-06, "loss": 0.4428, "step": 4783 }, { "epoch": 3.217113558039842, "grad_norm": 0.3314145010300879, "learning_rate": 3.036610159353466e-06, "loss": 0.4218, "step": 4784 }, { "epoch": 3.2177859964696984, "grad_norm": 0.31371322318633665, "learning_rate": 3.0345856493327066e-06, "loss": 0.4411, "step": 4785 }, { "epoch": 3.2184584348995546, "grad_norm": 0.3213531388938575, "learning_rate": 3.0325615203478563e-06, "loss": 0.4182, "step": 4786 }, { "epoch": 3.219130873329411, "grad_norm": 0.41555390009757326, "learning_rate": 3.0305377727913366e-06, "loss": 0.4341, "step": 4787 }, { "epoch": 3.219803311759267, "grad_norm": 0.5289558142311216, "learning_rate": 3.0285144070554884e-06, "loss": 0.4428, "step": 4788 }, { "epoch": 3.2204757501891232, "grad_norm": 0.39461029318470636, "learning_rate": 3.0264914235325847e-06, "loss": 0.4287, "step": 4789 }, { "epoch": 3.2211481886189794, "grad_norm": 0.4359852144425983, "learning_rate": 3.024468822614822e-06, "loss": 0.4289, "step": 4790 }, { "epoch": 3.2218206270488357, "grad_norm": 0.3223360264707932, "learning_rate": 3.0224466046943245e-06, "loss": 0.4259, "step": 4791 }, { "epoch": 3.2224930654786923, "grad_norm": 0.3298543675630738, "learning_rate": 3.020424770163138e-06, "loss": 0.4435, "step": 4792 }, { "epoch": 3.2231655039085485, "grad_norm": 0.3712648442771377, "learning_rate": 3.018403319413238e-06, "loss": 0.4399, "step": 4793 }, { "epoch": 3.2238379423384047, "grad_norm": 0.35017531257266965, "learning_rate": 3.016382252836525e-06, "loss": 0.4117, "step": 4794 }, { "epoch": 3.224510380768261, "grad_norm": 0.3906473263678258, "learning_rate": 3.014361570824823e-06, "loss": 0.4473, "step": 4795 }, { "epoch": 3.225182819198117, "grad_norm": 0.31963992523794227, "learning_rate": 3.012341273769885e-06, "loss": 0.4388, "step": 4796 }, { "epoch": 3.2258552576279733, "grad_norm": 0.3652391401530828, "learning_rate": 3.010321362063383e-06, "loss": 0.4234, "step": 4797 }, { "epoch": 3.2265276960578295, "grad_norm": 0.40334376458618504, "learning_rate": 3.0083018360969213e-06, "loss": 0.4531, "step": 4798 }, { "epoch": 3.227200134487686, "grad_norm": 0.3849107781967533, "learning_rate": 3.0062826962620252e-06, "loss": 0.4355, "step": 4799 }, { "epoch": 3.2278725729175424, "grad_norm": 0.41994305213491073, "learning_rate": 3.004263942950148e-06, "loss": 0.4298, "step": 4800 } ], "logging_steps": 1.0, "max_steps": 7435, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.434020777725788e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }