{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998560778331078, "eval_steps": 500, "global_step": 2171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00046055093405486313, "grad_norm": 10.224479026613773, "learning_rate": 3.0303030303030305e-08, "loss": 1.084, "step": 1 }, { "epoch": 0.0009211018681097263, "grad_norm": 10.903636883871654, "learning_rate": 6.060606060606061e-08, "loss": 0.9824, "step": 2 }, { "epoch": 0.0013816528021645895, "grad_norm": 11.344940351126398, "learning_rate": 9.09090909090909e-08, "loss": 1.0421, "step": 3 }, { "epoch": 0.0018422037362194525, "grad_norm": 13.25793073962023, "learning_rate": 1.2121212121212122e-07, "loss": 0.9886, "step": 4 }, { "epoch": 0.002302754670274316, "grad_norm": 9.70140482967039, "learning_rate": 1.5151515151515152e-07, "loss": 0.9634, "step": 5 }, { "epoch": 0.002763305604329179, "grad_norm": 12.03838558755089, "learning_rate": 1.818181818181818e-07, "loss": 0.9483, "step": 6 }, { "epoch": 0.003223856538384042, "grad_norm": 12.588391333344033, "learning_rate": 2.121212121212121e-07, "loss": 0.9984, "step": 7 }, { "epoch": 0.003684407472438905, "grad_norm": 11.836562687027973, "learning_rate": 2.4242424242424244e-07, "loss": 1.0708, "step": 8 }, { "epoch": 0.004144958406493768, "grad_norm": 12.706113691995517, "learning_rate": 2.727272727272727e-07, "loss": 1.0863, "step": 9 }, { "epoch": 0.004605509340548632, "grad_norm": 11.407622369949724, "learning_rate": 3.0303030303030305e-07, "loss": 1.2087, "step": 10 }, { "epoch": 0.005066060274603494, "grad_norm": 12.237310594074764, "learning_rate": 3.333333333333333e-07, "loss": 1.0967, "step": 11 }, { "epoch": 0.005526611208658358, "grad_norm": 10.99654178909918, "learning_rate": 3.636363636363636e-07, "loss": 1.1732, "step": 12 }, { "epoch": 0.0059871621427132204, "grad_norm": 11.042210513697741, "learning_rate": 3.939393939393939e-07, "loss": 1.1777, "step": 13 }, { "epoch": 0.006447713076768084, "grad_norm": 11.496495867822293, "learning_rate": 4.242424242424242e-07, "loss": 1.0627, "step": 14 }, { "epoch": 0.006908264010822947, "grad_norm": 11.882894204754962, "learning_rate": 4.545454545454545e-07, "loss": 0.9844, "step": 15 }, { "epoch": 0.00736881494487781, "grad_norm": 11.794166405407287, "learning_rate": 4.848484848484849e-07, "loss": 1.0168, "step": 16 }, { "epoch": 0.007829365878932673, "grad_norm": 10.01774185514084, "learning_rate": 5.151515151515151e-07, "loss": 1.2505, "step": 17 }, { "epoch": 0.008289916812987536, "grad_norm": 9.567696400885666, "learning_rate": 5.454545454545454e-07, "loss": 1.1344, "step": 18 }, { "epoch": 0.0087504677470424, "grad_norm": 11.67769227316676, "learning_rate": 5.757575757575758e-07, "loss": 0.9009, "step": 19 }, { "epoch": 0.009211018681097263, "grad_norm": 9.530138294227273, "learning_rate": 6.060606060606061e-07, "loss": 1.0289, "step": 20 }, { "epoch": 0.009671569615152125, "grad_norm": 8.071657974205257, "learning_rate": 6.363636363636363e-07, "loss": 1.2151, "step": 21 }, { "epoch": 0.010132120549206989, "grad_norm": 11.50961336906488, "learning_rate": 6.666666666666666e-07, "loss": 0.8917, "step": 22 }, { "epoch": 0.010592671483261852, "grad_norm": 9.46367673420139, "learning_rate": 6.96969696969697e-07, "loss": 1.1334, "step": 23 }, { "epoch": 0.011053222417316716, "grad_norm": 7.980897491793901, "learning_rate": 7.272727272727272e-07, "loss": 0.968, "step": 24 }, { "epoch": 0.011513773351371579, "grad_norm": 6.49691041072656, "learning_rate": 7.575757575757575e-07, "loss": 1.0277, "step": 25 }, { "epoch": 0.011974324285426441, "grad_norm": 6.041354612689945, "learning_rate": 7.878787878787878e-07, "loss": 0.9477, "step": 26 }, { "epoch": 0.012434875219481304, "grad_norm": 6.34623067352923, "learning_rate": 8.181818181818182e-07, "loss": 0.9635, "step": 27 }, { "epoch": 0.012895426153536168, "grad_norm": 5.768779273607869, "learning_rate": 8.484848484848484e-07, "loss": 0.9567, "step": 28 }, { "epoch": 0.013355977087591031, "grad_norm": 5.159941543576065, "learning_rate": 8.787878787878787e-07, "loss": 0.8742, "step": 29 }, { "epoch": 0.013816528021645893, "grad_norm": 4.459623755013343, "learning_rate": 9.09090909090909e-07, "loss": 0.905, "step": 30 }, { "epoch": 0.014277078955700757, "grad_norm": 4.578386981237651, "learning_rate": 9.393939393939395e-07, "loss": 0.9856, "step": 31 }, { "epoch": 0.01473762988975562, "grad_norm": 4.599340605830885, "learning_rate": 9.696969696969698e-07, "loss": 1.0623, "step": 32 }, { "epoch": 0.015198180823810484, "grad_norm": 4.715495998419273, "learning_rate": 1e-06, "loss": 0.9893, "step": 33 }, { "epoch": 0.015658731757865346, "grad_norm": 3.9725156691925405, "learning_rate": 1.0303030303030302e-06, "loss": 0.9039, "step": 34 }, { "epoch": 0.01611928269192021, "grad_norm": 4.01091093069899, "learning_rate": 1.0606060606060606e-06, "loss": 0.7272, "step": 35 }, { "epoch": 0.016579833625975073, "grad_norm": 4.427996895865591, "learning_rate": 1.0909090909090908e-06, "loss": 0.8846, "step": 36 }, { "epoch": 0.017040384560029934, "grad_norm": 3.545754336884005, "learning_rate": 1.121212121212121e-06, "loss": 0.829, "step": 37 }, { "epoch": 0.0175009354940848, "grad_norm": 5.525053440753492, "learning_rate": 1.1515151515151516e-06, "loss": 0.6791, "step": 38 }, { "epoch": 0.01796148642813966, "grad_norm": 3.383199059764453, "learning_rate": 1.1818181818181818e-06, "loss": 0.8036, "step": 39 }, { "epoch": 0.018422037362194527, "grad_norm": 4.1135930428353, "learning_rate": 1.2121212121212122e-06, "loss": 0.89, "step": 40 }, { "epoch": 0.01888258829624939, "grad_norm": 4.200860213314929, "learning_rate": 1.2424242424242424e-06, "loss": 0.8742, "step": 41 }, { "epoch": 0.01934313923030425, "grad_norm": 4.621032204910162, "learning_rate": 1.2727272727272726e-06, "loss": 0.9061, "step": 42 }, { "epoch": 0.019803690164359115, "grad_norm": 4.05937719343738, "learning_rate": 1.303030303030303e-06, "loss": 0.8869, "step": 43 }, { "epoch": 0.020264241098413977, "grad_norm": 3.908268562451142, "learning_rate": 1.3333333333333332e-06, "loss": 0.8652, "step": 44 }, { "epoch": 0.020724792032468842, "grad_norm": 3.8127642183113104, "learning_rate": 1.3636363636363634e-06, "loss": 0.8547, "step": 45 }, { "epoch": 0.021185342966523704, "grad_norm": 3.7006255903086016, "learning_rate": 1.393939393939394e-06, "loss": 0.8816, "step": 46 }, { "epoch": 0.021645893900578566, "grad_norm": 4.278643366823466, "learning_rate": 1.4242424242424242e-06, "loss": 0.9507, "step": 47 }, { "epoch": 0.02210644483463343, "grad_norm": 5.121316020450439, "learning_rate": 1.4545454545454544e-06, "loss": 0.8144, "step": 48 }, { "epoch": 0.022566995768688293, "grad_norm": 3.048465483366566, "learning_rate": 1.4848484848484848e-06, "loss": 0.654, "step": 49 }, { "epoch": 0.023027546702743158, "grad_norm": 3.9812784832535133, "learning_rate": 1.515151515151515e-06, "loss": 0.6679, "step": 50 }, { "epoch": 0.02348809763679802, "grad_norm": 3.787536459284705, "learning_rate": 1.5454545454545454e-06, "loss": 0.9465, "step": 51 }, { "epoch": 0.023948648570852882, "grad_norm": 3.6018385428584123, "learning_rate": 1.5757575757575756e-06, "loss": 0.7828, "step": 52 }, { "epoch": 0.024409199504907747, "grad_norm": 3.5109451677027614, "learning_rate": 1.6060606060606058e-06, "loss": 0.8131, "step": 53 }, { "epoch": 0.02486975043896261, "grad_norm": 3.0290570716615903, "learning_rate": 1.6363636363636365e-06, "loss": 0.6437, "step": 54 }, { "epoch": 0.02533030137301747, "grad_norm": 3.682837755403324, "learning_rate": 1.6666666666666667e-06, "loss": 0.7215, "step": 55 }, { "epoch": 0.025790852307072336, "grad_norm": 3.476439822446147, "learning_rate": 1.6969696969696969e-06, "loss": 0.7884, "step": 56 }, { "epoch": 0.026251403241127198, "grad_norm": 3.9813216927132475, "learning_rate": 1.7272727272727273e-06, "loss": 0.7466, "step": 57 }, { "epoch": 0.026711954175182063, "grad_norm": 3.6806123465375116, "learning_rate": 1.7575757575757575e-06, "loss": 0.7043, "step": 58 }, { "epoch": 0.027172505109236925, "grad_norm": 3.1611663708183344, "learning_rate": 1.7878787878787877e-06, "loss": 0.7341, "step": 59 }, { "epoch": 0.027633056043291786, "grad_norm": 3.444488689830893, "learning_rate": 1.818181818181818e-06, "loss": 0.8762, "step": 60 }, { "epoch": 0.02809360697734665, "grad_norm": 3.549558394232681, "learning_rate": 1.8484848484848483e-06, "loss": 0.8806, "step": 61 }, { "epoch": 0.028554157911401513, "grad_norm": 3.9711382750959747, "learning_rate": 1.878787878787879e-06, "loss": 0.8029, "step": 62 }, { "epoch": 0.02901470884545638, "grad_norm": 3.5959357584947615, "learning_rate": 1.909090909090909e-06, "loss": 0.7292, "step": 63 }, { "epoch": 0.02947525977951124, "grad_norm": 3.9415569403625024, "learning_rate": 1.9393939393939395e-06, "loss": 0.856, "step": 64 }, { "epoch": 0.029935810713566102, "grad_norm": 3.406751239525263, "learning_rate": 1.9696969696969695e-06, "loss": 0.7986, "step": 65 }, { "epoch": 0.030396361647620967, "grad_norm": 3.6931502231787183, "learning_rate": 2e-06, "loss": 0.6504, "step": 66 }, { "epoch": 0.03085691258167583, "grad_norm": 3.940678039571864, "learning_rate": 1.9999988863070544e-06, "loss": 0.6272, "step": 67 }, { "epoch": 0.03131746351573069, "grad_norm": 3.361818700666645, "learning_rate": 1.999995545230698e-06, "loss": 0.8018, "step": 68 }, { "epoch": 0.031778014449785556, "grad_norm": 4.745930001970848, "learning_rate": 1.9999899767783724e-06, "loss": 0.7793, "step": 69 }, { "epoch": 0.03223856538384042, "grad_norm": 4.566864649641224, "learning_rate": 1.999982180962482e-06, "loss": 0.7536, "step": 70 }, { "epoch": 0.03269911631789528, "grad_norm": 3.4766274202922123, "learning_rate": 1.999972157800389e-06, "loss": 0.6682, "step": 71 }, { "epoch": 0.033159667251950145, "grad_norm": 3.2407423987046564, "learning_rate": 1.999959907314421e-06, "loss": 0.5877, "step": 72 }, { "epoch": 0.03362021818600501, "grad_norm": 3.854719893893145, "learning_rate": 1.999945429531863e-06, "loss": 0.8257, "step": 73 }, { "epoch": 0.03408076912005987, "grad_norm": 3.384850197913963, "learning_rate": 1.9999287244849633e-06, "loss": 0.7341, "step": 74 }, { "epoch": 0.034541320054114734, "grad_norm": 3.826326260538933, "learning_rate": 1.9999097922109303e-06, "loss": 0.7884, "step": 75 }, { "epoch": 0.0350018709881696, "grad_norm": 3.5685218916656805, "learning_rate": 1.9998886327519336e-06, "loss": 0.7556, "step": 76 }, { "epoch": 0.035462421922224464, "grad_norm": 3.314138350796672, "learning_rate": 1.999865246155103e-06, "loss": 0.7505, "step": 77 }, { "epoch": 0.03592297285627932, "grad_norm": 3.218790293838154, "learning_rate": 1.9998396324725305e-06, "loss": 0.6178, "step": 78 }, { "epoch": 0.03638352379033419, "grad_norm": 3.5441600733921055, "learning_rate": 1.999811791761267e-06, "loss": 0.711, "step": 79 }, { "epoch": 0.03684407472438905, "grad_norm": 3.2797802004892773, "learning_rate": 1.999781724083324e-06, "loss": 0.6754, "step": 80 }, { "epoch": 0.03730462565844391, "grad_norm": 3.514532124195952, "learning_rate": 1.9997494295056746e-06, "loss": 0.7056, "step": 81 }, { "epoch": 0.03776517659249878, "grad_norm": 3.4677096105337433, "learning_rate": 1.9997149081002514e-06, "loss": 0.6394, "step": 82 }, { "epoch": 0.03822572752655364, "grad_norm": 2.932267254260553, "learning_rate": 1.9996781599439464e-06, "loss": 0.7235, "step": 83 }, { "epoch": 0.0386862784606085, "grad_norm": 3.096434205139558, "learning_rate": 1.9996391851186118e-06, "loss": 0.674, "step": 84 }, { "epoch": 0.039146829394663366, "grad_norm": 3.858101988445306, "learning_rate": 1.99959798371106e-06, "loss": 0.6127, "step": 85 }, { "epoch": 0.03960738032871823, "grad_norm": 3.361207098481145, "learning_rate": 1.999554555813062e-06, "loss": 0.6981, "step": 86 }, { "epoch": 0.04006793126277309, "grad_norm": 3.4015641056770503, "learning_rate": 1.9995089015213493e-06, "loss": 0.8552, "step": 87 }, { "epoch": 0.040528482196827954, "grad_norm": 3.1305710457612004, "learning_rate": 1.999461020937611e-06, "loss": 0.6014, "step": 88 }, { "epoch": 0.04098903313088282, "grad_norm": 3.820781969106205, "learning_rate": 1.999410914168495e-06, "loss": 0.8311, "step": 89 }, { "epoch": 0.041449584064937685, "grad_norm": 3.2079786714114906, "learning_rate": 1.99935858132561e-06, "loss": 0.682, "step": 90 }, { "epoch": 0.04191013499899254, "grad_norm": 3.643489327427482, "learning_rate": 1.99930402252552e-06, "loss": 0.6605, "step": 91 }, { "epoch": 0.04237068593304741, "grad_norm": 3.3738758807758766, "learning_rate": 1.9992472378897497e-06, "loss": 0.6143, "step": 92 }, { "epoch": 0.042831236867102274, "grad_norm": 3.077610620836017, "learning_rate": 1.9991882275447794e-06, "loss": 0.6217, "step": 93 }, { "epoch": 0.04329178780115713, "grad_norm": 3.162135055961725, "learning_rate": 1.9991269916220485e-06, "loss": 0.7265, "step": 94 }, { "epoch": 0.043752338735212, "grad_norm": 2.9823928585506647, "learning_rate": 1.999063530257952e-06, "loss": 0.6289, "step": 95 }, { "epoch": 0.04421288966926686, "grad_norm": 3.511247382327224, "learning_rate": 1.998997843593845e-06, "loss": 0.6145, "step": 96 }, { "epoch": 0.04467344060332172, "grad_norm": 3.6517354310292265, "learning_rate": 1.9989299317760345e-06, "loss": 0.6997, "step": 97 }, { "epoch": 0.045133991537376586, "grad_norm": 3.5728547634334746, "learning_rate": 1.9988597949557883e-06, "loss": 0.7399, "step": 98 }, { "epoch": 0.04559454247143145, "grad_norm": 3.4027410732295653, "learning_rate": 1.998787433289327e-06, "loss": 0.695, "step": 99 }, { "epoch": 0.046055093405486316, "grad_norm": 3.2197558124034984, "learning_rate": 1.9987128469378284e-06, "loss": 0.6144, "step": 100 }, { "epoch": 0.046515644339541175, "grad_norm": 3.1601430739183245, "learning_rate": 1.998636036067425e-06, "loss": 0.6233, "step": 101 }, { "epoch": 0.04697619527359604, "grad_norm": 3.0419954341335087, "learning_rate": 1.9985570008492044e-06, "loss": 0.5879, "step": 102 }, { "epoch": 0.047436746207650905, "grad_norm": 3.4126431282945027, "learning_rate": 1.9984757414592083e-06, "loss": 0.7941, "step": 103 }, { "epoch": 0.047897297141705764, "grad_norm": 2.973595724909931, "learning_rate": 1.998392258078433e-06, "loss": 0.6119, "step": 104 }, { "epoch": 0.04835784807576063, "grad_norm": 4.503321929117909, "learning_rate": 1.998306550892828e-06, "loss": 0.6559, "step": 105 }, { "epoch": 0.048818399009815494, "grad_norm": 3.615113444334991, "learning_rate": 1.9982186200932964e-06, "loss": 0.6638, "step": 106 }, { "epoch": 0.04927894994387035, "grad_norm": 3.4410984720501094, "learning_rate": 1.998128465875694e-06, "loss": 0.6764, "step": 107 }, { "epoch": 0.04973950087792522, "grad_norm": 3.1749314754075617, "learning_rate": 1.9980360884408288e-06, "loss": 0.701, "step": 108 }, { "epoch": 0.05020005181198008, "grad_norm": 3.2473102824540083, "learning_rate": 1.997941487994461e-06, "loss": 0.6957, "step": 109 }, { "epoch": 0.05066060274603494, "grad_norm": 3.6947631666546843, "learning_rate": 1.9978446647473024e-06, "loss": 0.7448, "step": 110 }, { "epoch": 0.051121153680089806, "grad_norm": 3.261951870816807, "learning_rate": 1.9977456189150163e-06, "loss": 0.7727, "step": 111 }, { "epoch": 0.05158170461414467, "grad_norm": 3.3464119040387925, "learning_rate": 1.9976443507182152e-06, "loss": 0.6483, "step": 112 }, { "epoch": 0.05204225554819954, "grad_norm": 3.533060942648228, "learning_rate": 1.997540860382463e-06, "loss": 0.8126, "step": 113 }, { "epoch": 0.052502806482254395, "grad_norm": 3.2761042459948877, "learning_rate": 1.997435148138272e-06, "loss": 0.5982, "step": 114 }, { "epoch": 0.05296335741630926, "grad_norm": 3.1082981914368473, "learning_rate": 1.9973272142211046e-06, "loss": 0.6504, "step": 115 }, { "epoch": 0.053423908350364126, "grad_norm": 3.3969089614282444, "learning_rate": 1.997217058871371e-06, "loss": 0.5928, "step": 116 }, { "epoch": 0.053884459284418984, "grad_norm": 3.1930029822538173, "learning_rate": 1.9971046823344304e-06, "loss": 0.5868, "step": 117 }, { "epoch": 0.05434501021847385, "grad_norm": 3.6974861112788457, "learning_rate": 1.9969900848605877e-06, "loss": 0.7963, "step": 118 }, { "epoch": 0.054805561152528715, "grad_norm": 3.315165825861059, "learning_rate": 1.9968732667050966e-06, "loss": 0.6313, "step": 119 }, { "epoch": 0.05526611208658357, "grad_norm": 3.539748098126486, "learning_rate": 1.9967542281281557e-06, "loss": 0.7429, "step": 120 }, { "epoch": 0.05572666302063844, "grad_norm": 3.338078973425553, "learning_rate": 1.9966329693949093e-06, "loss": 0.7662, "step": 121 }, { "epoch": 0.0561872139546933, "grad_norm": 3.472065054117364, "learning_rate": 1.996509490775449e-06, "loss": 0.6625, "step": 122 }, { "epoch": 0.05664776488874816, "grad_norm": 3.1792157328292974, "learning_rate": 1.996383792544808e-06, "loss": 0.6792, "step": 123 }, { "epoch": 0.05710831582280303, "grad_norm": 3.5210682757899145, "learning_rate": 1.996255874982965e-06, "loss": 0.6079, "step": 124 }, { "epoch": 0.05756886675685789, "grad_norm": 3.2138238654727416, "learning_rate": 1.996125738374842e-06, "loss": 0.5784, "step": 125 }, { "epoch": 0.05802941769091276, "grad_norm": 3.3633304567565196, "learning_rate": 1.995993383010303e-06, "loss": 0.6367, "step": 126 }, { "epoch": 0.058489968624967616, "grad_norm": 3.781021254907743, "learning_rate": 1.9958588091841553e-06, "loss": 0.6771, "step": 127 }, { "epoch": 0.05895051955902248, "grad_norm": 3.6201021154476516, "learning_rate": 1.9957220171961465e-06, "loss": 0.6707, "step": 128 }, { "epoch": 0.059411070493077346, "grad_norm": 3.6881373018752885, "learning_rate": 1.995583007350964e-06, "loss": 0.7512, "step": 129 }, { "epoch": 0.059871621427132204, "grad_norm": 3.3800092959402566, "learning_rate": 1.9954417799582382e-06, "loss": 0.5795, "step": 130 }, { "epoch": 0.06033217236118707, "grad_norm": 3.6812949356112936, "learning_rate": 1.9952983353325356e-06, "loss": 0.59, "step": 131 }, { "epoch": 0.060792723295241935, "grad_norm": 3.730872974671056, "learning_rate": 1.9951526737933634e-06, "loss": 0.6077, "step": 132 }, { "epoch": 0.06125327422929679, "grad_norm": 3.5504209377161757, "learning_rate": 1.9950047956651657e-06, "loss": 0.606, "step": 133 }, { "epoch": 0.06171382516335166, "grad_norm": 3.2843333588119727, "learning_rate": 1.9948547012773246e-06, "loss": 0.6067, "step": 134 }, { "epoch": 0.062174376097406524, "grad_norm": 3.3876731358864403, "learning_rate": 1.9947023909641574e-06, "loss": 0.7097, "step": 135 }, { "epoch": 0.06263492703146138, "grad_norm": 4.024239315384627, "learning_rate": 1.994547865064919e-06, "loss": 0.8154, "step": 136 }, { "epoch": 0.06309547796551625, "grad_norm": 3.47895486959038, "learning_rate": 1.9943911239237974e-06, "loss": 0.5583, "step": 137 }, { "epoch": 0.06355602889957111, "grad_norm": 3.409716745725618, "learning_rate": 1.9942321678899163e-06, "loss": 0.5774, "step": 138 }, { "epoch": 0.06401657983362598, "grad_norm": 3.32970506539607, "learning_rate": 1.9940709973173314e-06, "loss": 0.6011, "step": 139 }, { "epoch": 0.06447713076768084, "grad_norm": 3.1166430692289246, "learning_rate": 1.993907612565032e-06, "loss": 0.5851, "step": 140 }, { "epoch": 0.0649376817017357, "grad_norm": 3.5411479372126426, "learning_rate": 1.9937420139969395e-06, "loss": 0.7496, "step": 141 }, { "epoch": 0.06539823263579056, "grad_norm": 3.421729369489728, "learning_rate": 1.993574201981905e-06, "loss": 0.6842, "step": 142 }, { "epoch": 0.06585878356984542, "grad_norm": 3.1449216041214205, "learning_rate": 1.9934041768937114e-06, "loss": 0.6461, "step": 143 }, { "epoch": 0.06631933450390029, "grad_norm": 3.6494550010926328, "learning_rate": 1.9932319391110695e-06, "loss": 0.7231, "step": 144 }, { "epoch": 0.06677988543795516, "grad_norm": 3.728480459329745, "learning_rate": 1.99305748901762e-06, "loss": 0.7552, "step": 145 }, { "epoch": 0.06724043637201002, "grad_norm": 3.2822173129999137, "learning_rate": 1.9928808270019296e-06, "loss": 0.6228, "step": 146 }, { "epoch": 0.06770098730606489, "grad_norm": 3.2867458531135196, "learning_rate": 1.9927019534574937e-06, "loss": 0.7294, "step": 147 }, { "epoch": 0.06816153824011974, "grad_norm": 3.1013936687675714, "learning_rate": 1.992520868782732e-06, "loss": 0.5613, "step": 148 }, { "epoch": 0.0686220891741746, "grad_norm": 3.159174332303596, "learning_rate": 1.9923375733809905e-06, "loss": 0.7149, "step": 149 }, { "epoch": 0.06908264010822947, "grad_norm": 3.516091730035019, "learning_rate": 1.992152067660539e-06, "loss": 0.6315, "step": 150 }, { "epoch": 0.06954319104228433, "grad_norm": 3.165146365156602, "learning_rate": 1.9919643520345695e-06, "loss": 0.5459, "step": 151 }, { "epoch": 0.0700037419763392, "grad_norm": 4.0062232007227685, "learning_rate": 1.991774426921198e-06, "loss": 0.6454, "step": 152 }, { "epoch": 0.07046429291039406, "grad_norm": 3.548301168807787, "learning_rate": 1.99158229274346e-06, "loss": 0.676, "step": 153 }, { "epoch": 0.07092484384444893, "grad_norm": 3.4249899072112027, "learning_rate": 1.9913879499293136e-06, "loss": 0.6644, "step": 154 }, { "epoch": 0.07138539477850378, "grad_norm": 3.707615974526274, "learning_rate": 1.9911913989116345e-06, "loss": 0.6739, "step": 155 }, { "epoch": 0.07184594571255865, "grad_norm": 3.086896742538257, "learning_rate": 1.990992640128218e-06, "loss": 0.5981, "step": 156 }, { "epoch": 0.07230649664661351, "grad_norm": 3.1043886733514983, "learning_rate": 1.990791674021776e-06, "loss": 0.5621, "step": 157 }, { "epoch": 0.07276704758066838, "grad_norm": 2.7518305668279432, "learning_rate": 1.9905885010399386e-06, "loss": 0.5827, "step": 158 }, { "epoch": 0.07322759851472324, "grad_norm": 3.1688929903728904, "learning_rate": 1.9903831216352494e-06, "loss": 0.5834, "step": 159 }, { "epoch": 0.0736881494487781, "grad_norm": 3.3606584053832695, "learning_rate": 1.9901755362651685e-06, "loss": 0.6374, "step": 160 }, { "epoch": 0.07414870038283296, "grad_norm": 3.8896133343206922, "learning_rate": 1.9899657453920676e-06, "loss": 0.7499, "step": 161 }, { "epoch": 0.07460925131688782, "grad_norm": 3.18752926424928, "learning_rate": 1.989753749483233e-06, "loss": 0.686, "step": 162 }, { "epoch": 0.07506980225094269, "grad_norm": 3.4277096993868557, "learning_rate": 1.989539549010861e-06, "loss": 0.6334, "step": 163 }, { "epoch": 0.07553035318499755, "grad_norm": 3.1363256404631876, "learning_rate": 1.9893231444520584e-06, "loss": 0.7184, "step": 164 }, { "epoch": 0.07599090411905242, "grad_norm": 3.773031496303488, "learning_rate": 1.9891045362888413e-06, "loss": 0.7071, "step": 165 }, { "epoch": 0.07645145505310728, "grad_norm": 3.4894250417343957, "learning_rate": 1.988883725008136e-06, "loss": 0.8024, "step": 166 }, { "epoch": 0.07691200598716215, "grad_norm": 3.5237596679440126, "learning_rate": 1.9886607111017727e-06, "loss": 0.5565, "step": 167 }, { "epoch": 0.077372556921217, "grad_norm": 3.956330390896236, "learning_rate": 1.988435495066491e-06, "loss": 0.6834, "step": 168 }, { "epoch": 0.07783310785527187, "grad_norm": 3.0581620476086027, "learning_rate": 1.988208077403932e-06, "loss": 0.5355, "step": 169 }, { "epoch": 0.07829365878932673, "grad_norm": 3.359700225843598, "learning_rate": 1.9879784586206446e-06, "loss": 0.6266, "step": 170 }, { "epoch": 0.0787542097233816, "grad_norm": 3.160066872012096, "learning_rate": 1.987746639228077e-06, "loss": 0.4693, "step": 171 }, { "epoch": 0.07921476065743646, "grad_norm": 2.8930357386693037, "learning_rate": 1.9875126197425812e-06, "loss": 0.543, "step": 172 }, { "epoch": 0.07967531159149133, "grad_norm": 3.2160039035976538, "learning_rate": 1.987276400685409e-06, "loss": 0.5285, "step": 173 }, { "epoch": 0.08013586252554618, "grad_norm": 3.5154065293512837, "learning_rate": 1.9870379825827105e-06, "loss": 0.7303, "step": 174 }, { "epoch": 0.08059641345960104, "grad_norm": 3.272929698816058, "learning_rate": 1.9867973659655357e-06, "loss": 0.596, "step": 175 }, { "epoch": 0.08105696439365591, "grad_norm": 3.2230741681038837, "learning_rate": 1.9865545513698304e-06, "loss": 0.7758, "step": 176 }, { "epoch": 0.08151751532771077, "grad_norm": 2.88861925930577, "learning_rate": 1.9863095393364363e-06, "loss": 0.5791, "step": 177 }, { "epoch": 0.08197806626176564, "grad_norm": 3.6487253551379166, "learning_rate": 1.9860623304110895e-06, "loss": 0.8919, "step": 178 }, { "epoch": 0.0824386171958205, "grad_norm": 3.3372588648412513, "learning_rate": 1.9858129251444203e-06, "loss": 0.6433, "step": 179 }, { "epoch": 0.08289916812987537, "grad_norm": 3.069467629265819, "learning_rate": 1.9855613240919496e-06, "loss": 0.617, "step": 180 }, { "epoch": 0.08335971906393022, "grad_norm": 3.489373026628576, "learning_rate": 1.985307527814091e-06, "loss": 0.768, "step": 181 }, { "epoch": 0.08382026999798509, "grad_norm": 3.459352404519042, "learning_rate": 1.9850515368761465e-06, "loss": 0.7647, "step": 182 }, { "epoch": 0.08428082093203995, "grad_norm": 3.4396136216484012, "learning_rate": 1.9847933518483066e-06, "loss": 0.6323, "step": 183 }, { "epoch": 0.08474137186609482, "grad_norm": 3.369315529955375, "learning_rate": 1.9845329733056488e-06, "loss": 0.6724, "step": 184 }, { "epoch": 0.08520192280014968, "grad_norm": 3.144962754808177, "learning_rate": 1.9842704018281364e-06, "loss": 0.6974, "step": 185 }, { "epoch": 0.08566247373420455, "grad_norm": 3.3041054108917143, "learning_rate": 1.984005638000618e-06, "loss": 0.6231, "step": 186 }, { "epoch": 0.0861230246682594, "grad_norm": 4.170198999419159, "learning_rate": 1.983738682412824e-06, "loss": 0.644, "step": 187 }, { "epoch": 0.08658357560231426, "grad_norm": 3.22338168544018, "learning_rate": 1.983469535659369e-06, "loss": 0.5809, "step": 188 }, { "epoch": 0.08704412653636913, "grad_norm": 3.87679312890683, "learning_rate": 1.983198198339745e-06, "loss": 0.6326, "step": 189 }, { "epoch": 0.087504677470424, "grad_norm": 2.8932671924853026, "learning_rate": 1.9829246710583258e-06, "loss": 0.694, "step": 190 }, { "epoch": 0.08796522840447886, "grad_norm": 3.0104693995609293, "learning_rate": 1.982648954424362e-06, "loss": 0.5702, "step": 191 }, { "epoch": 0.08842577933853372, "grad_norm": 3.6696726695949597, "learning_rate": 1.982371049051981e-06, "loss": 0.6627, "step": 192 }, { "epoch": 0.08888633027258859, "grad_norm": 3.483251606814318, "learning_rate": 1.982090955560185e-06, "loss": 0.6148, "step": 193 }, { "epoch": 0.08934688120664344, "grad_norm": 3.059134143200745, "learning_rate": 1.981808674572851e-06, "loss": 0.641, "step": 194 }, { "epoch": 0.0898074321406983, "grad_norm": 3.471348679245806, "learning_rate": 1.9815242067187273e-06, "loss": 0.7375, "step": 195 }, { "epoch": 0.09026798307475317, "grad_norm": 3.2356019265585423, "learning_rate": 1.9812375526314335e-06, "loss": 0.6559, "step": 196 }, { "epoch": 0.09072853400880804, "grad_norm": 3.5553418134929737, "learning_rate": 1.9809487129494588e-06, "loss": 0.6052, "step": 197 }, { "epoch": 0.0911890849428629, "grad_norm": 3.5207928333701126, "learning_rate": 1.9806576883161607e-06, "loss": 0.7502, "step": 198 }, { "epoch": 0.09164963587691777, "grad_norm": 3.6571981250699808, "learning_rate": 1.9803644793797635e-06, "loss": 0.6423, "step": 199 }, { "epoch": 0.09211018681097263, "grad_norm": 3.3310862469732236, "learning_rate": 1.9800690867933567e-06, "loss": 0.6166, "step": 200 }, { "epoch": 0.09257073774502748, "grad_norm": 3.599428672561491, "learning_rate": 1.9797715112148933e-06, "loss": 0.6751, "step": 201 }, { "epoch": 0.09303128867908235, "grad_norm": 3.111228152420326, "learning_rate": 1.979471753307189e-06, "loss": 0.5873, "step": 202 }, { "epoch": 0.09349183961313721, "grad_norm": 2.7796406722600433, "learning_rate": 1.979169813737921e-06, "loss": 0.5509, "step": 203 }, { "epoch": 0.09395239054719208, "grad_norm": 3.3190228535283586, "learning_rate": 1.9788656931796237e-06, "loss": 0.5311, "step": 204 }, { "epoch": 0.09441294148124695, "grad_norm": 3.4901850657341678, "learning_rate": 1.9785593923096927e-06, "loss": 0.6073, "step": 205 }, { "epoch": 0.09487349241530181, "grad_norm": 3.8594657707637836, "learning_rate": 1.978250911810377e-06, "loss": 0.7903, "step": 206 }, { "epoch": 0.09533404334935666, "grad_norm": 3.783029519442971, "learning_rate": 1.9779402523687825e-06, "loss": 0.7535, "step": 207 }, { "epoch": 0.09579459428341153, "grad_norm": 3.541223829083605, "learning_rate": 1.977627414676867e-06, "loss": 0.6787, "step": 208 }, { "epoch": 0.09625514521746639, "grad_norm": 3.300314419811215, "learning_rate": 1.977312399431441e-06, "loss": 0.688, "step": 209 }, { "epoch": 0.09671569615152126, "grad_norm": 3.021536775472106, "learning_rate": 1.9769952073341655e-06, "loss": 0.5406, "step": 210 }, { "epoch": 0.09717624708557612, "grad_norm": 3.1063359917852016, "learning_rate": 1.976675839091549e-06, "loss": 0.7334, "step": 211 }, { "epoch": 0.09763679801963099, "grad_norm": 4.2418847264483555, "learning_rate": 1.976354295414948e-06, "loss": 0.7438, "step": 212 }, { "epoch": 0.09809734895368585, "grad_norm": 3.2746417894451927, "learning_rate": 1.9760305770205648e-06, "loss": 0.6335, "step": 213 }, { "epoch": 0.0985578998877407, "grad_norm": 3.217191492035501, "learning_rate": 1.9757046846294446e-06, "loss": 0.6167, "step": 214 }, { "epoch": 0.09901845082179557, "grad_norm": 3.202923015353876, "learning_rate": 1.975376618967476e-06, "loss": 0.6576, "step": 215 }, { "epoch": 0.09947900175585044, "grad_norm": 3.368536938836128, "learning_rate": 1.975046380765387e-06, "loss": 0.5377, "step": 216 }, { "epoch": 0.0999395526899053, "grad_norm": 3.371798575684341, "learning_rate": 1.9747139707587467e-06, "loss": 0.5324, "step": 217 }, { "epoch": 0.10040010362396017, "grad_norm": 2.7715577719122484, "learning_rate": 1.9743793896879595e-06, "loss": 0.6635, "step": 218 }, { "epoch": 0.10086065455801503, "grad_norm": 2.829524830044529, "learning_rate": 1.974042638298267e-06, "loss": 0.5327, "step": 219 }, { "epoch": 0.10132120549206988, "grad_norm": 2.891869373792989, "learning_rate": 1.9737037173397446e-06, "loss": 0.5856, "step": 220 }, { "epoch": 0.10178175642612475, "grad_norm": 3.062890989327354, "learning_rate": 1.9733626275672996e-06, "loss": 0.7426, "step": 221 }, { "epoch": 0.10224230736017961, "grad_norm": 3.116251286088085, "learning_rate": 1.973019369740671e-06, "loss": 0.5652, "step": 222 }, { "epoch": 0.10270285829423448, "grad_norm": 3.4890492450754502, "learning_rate": 1.972673944624426e-06, "loss": 0.6855, "step": 223 }, { "epoch": 0.10316340922828934, "grad_norm": 3.0888002182898444, "learning_rate": 1.9723263529879598e-06, "loss": 0.83, "step": 224 }, { "epoch": 0.10362396016234421, "grad_norm": 3.3556762378140896, "learning_rate": 1.9719765956054933e-06, "loss": 0.6933, "step": 225 }, { "epoch": 0.10408451109639907, "grad_norm": 3.195505438673579, "learning_rate": 1.971624673256071e-06, "loss": 0.5613, "step": 226 }, { "epoch": 0.10454506203045393, "grad_norm": 2.6074576346797143, "learning_rate": 1.9712705867235604e-06, "loss": 0.4869, "step": 227 }, { "epoch": 0.10500561296450879, "grad_norm": 3.421431138997975, "learning_rate": 1.970914336796648e-06, "loss": 0.5345, "step": 228 }, { "epoch": 0.10546616389856366, "grad_norm": 3.558425101140546, "learning_rate": 1.97055592426884e-06, "loss": 0.6834, "step": 229 }, { "epoch": 0.10592671483261852, "grad_norm": 3.662238091068311, "learning_rate": 1.9701953499384593e-06, "loss": 0.6788, "step": 230 }, { "epoch": 0.10638726576667339, "grad_norm": 2.9575097174697387, "learning_rate": 1.9698326146086445e-06, "loss": 0.6197, "step": 231 }, { "epoch": 0.10684781670072825, "grad_norm": 3.4475135509424293, "learning_rate": 1.9694677190873467e-06, "loss": 0.659, "step": 232 }, { "epoch": 0.1073083676347831, "grad_norm": 3.108410036960475, "learning_rate": 1.9691006641873296e-06, "loss": 0.6831, "step": 233 }, { "epoch": 0.10776891856883797, "grad_norm": 3.220407600024432, "learning_rate": 1.968731450726166e-06, "loss": 0.6389, "step": 234 }, { "epoch": 0.10822946950289283, "grad_norm": 3.534132987080869, "learning_rate": 1.9683600795262364e-06, "loss": 0.6332, "step": 235 }, { "epoch": 0.1086900204369477, "grad_norm": 3.5324339129255877, "learning_rate": 1.9679865514147277e-06, "loss": 0.7518, "step": 236 }, { "epoch": 0.10915057137100256, "grad_norm": 3.0421407458400003, "learning_rate": 1.9676108672236317e-06, "loss": 0.5808, "step": 237 }, { "epoch": 0.10961112230505743, "grad_norm": 3.5520947520869908, "learning_rate": 1.9672330277897414e-06, "loss": 0.5368, "step": 238 }, { "epoch": 0.1100716732391123, "grad_norm": 3.451159362431226, "learning_rate": 1.9668530339546514e-06, "loss": 0.6296, "step": 239 }, { "epoch": 0.11053222417316715, "grad_norm": 3.509910875733122, "learning_rate": 1.966470886564755e-06, "loss": 0.7006, "step": 240 }, { "epoch": 0.11099277510722201, "grad_norm": 2.596656303513613, "learning_rate": 1.9660865864712412e-06, "loss": 0.4933, "step": 241 }, { "epoch": 0.11145332604127688, "grad_norm": 3.4232091740027655, "learning_rate": 1.965700134530095e-06, "loss": 0.5211, "step": 242 }, { "epoch": 0.11191387697533174, "grad_norm": 3.040611124293651, "learning_rate": 1.9653115316020935e-06, "loss": 0.6377, "step": 243 }, { "epoch": 0.1123744279093866, "grad_norm": 3.4791503945160267, "learning_rate": 1.9649207785528065e-06, "loss": 0.6889, "step": 244 }, { "epoch": 0.11283497884344147, "grad_norm": 3.0895910555628032, "learning_rate": 1.96452787625259e-06, "loss": 0.6833, "step": 245 }, { "epoch": 0.11329552977749632, "grad_norm": 3.001038166095486, "learning_rate": 1.9641328255765913e-06, "loss": 0.5572, "step": 246 }, { "epoch": 0.11375608071155119, "grad_norm": 3.7375507199090596, "learning_rate": 1.963735627404739e-06, "loss": 0.6776, "step": 247 }, { "epoch": 0.11421663164560605, "grad_norm": 3.4861645782956723, "learning_rate": 1.963336282621747e-06, "loss": 0.6076, "step": 248 }, { "epoch": 0.11467718257966092, "grad_norm": 3.9356402228698313, "learning_rate": 1.962934792117111e-06, "loss": 0.5698, "step": 249 }, { "epoch": 0.11513773351371578, "grad_norm": 3.4680226962421496, "learning_rate": 1.9625311567851045e-06, "loss": 0.6332, "step": 250 }, { "epoch": 0.11559828444777065, "grad_norm": 4.110638488505918, "learning_rate": 1.9621253775247795e-06, "loss": 0.7351, "step": 251 }, { "epoch": 0.11605883538182551, "grad_norm": 3.736146712062541, "learning_rate": 1.9617174552399633e-06, "loss": 0.7494, "step": 252 }, { "epoch": 0.11651938631588037, "grad_norm": 2.938611398017176, "learning_rate": 1.961307390839255e-06, "loss": 0.6439, "step": 253 }, { "epoch": 0.11697993724993523, "grad_norm": 3.267375637056784, "learning_rate": 1.960895185236028e-06, "loss": 0.624, "step": 254 }, { "epoch": 0.1174404881839901, "grad_norm": 3.394791456038148, "learning_rate": 1.9604808393484217e-06, "loss": 0.605, "step": 255 }, { "epoch": 0.11790103911804496, "grad_norm": 3.1028647299114174, "learning_rate": 1.960064354099345e-06, "loss": 0.6428, "step": 256 }, { "epoch": 0.11836159005209983, "grad_norm": 3.0414855513121357, "learning_rate": 1.959645730416471e-06, "loss": 0.4093, "step": 257 }, { "epoch": 0.11882214098615469, "grad_norm": 3.4120022845796525, "learning_rate": 1.959224969232237e-06, "loss": 0.6576, "step": 258 }, { "epoch": 0.11928269192020956, "grad_norm": 2.972667933535389, "learning_rate": 1.9588020714838394e-06, "loss": 0.5633, "step": 259 }, { "epoch": 0.11974324285426441, "grad_norm": 3.2887632380841842, "learning_rate": 1.9583770381132357e-06, "loss": 0.6012, "step": 260 }, { "epoch": 0.12020379378831927, "grad_norm": 3.5081309566331527, "learning_rate": 1.9579498700671386e-06, "loss": 0.6691, "step": 261 }, { "epoch": 0.12066434472237414, "grad_norm": 3.3208918064597936, "learning_rate": 1.9575205682970163e-06, "loss": 0.6513, "step": 262 }, { "epoch": 0.121124895656429, "grad_norm": 3.2712864716169943, "learning_rate": 1.9570891337590895e-06, "loss": 0.5478, "step": 263 }, { "epoch": 0.12158544659048387, "grad_norm": 3.282868754227743, "learning_rate": 1.956655567414329e-06, "loss": 0.6925, "step": 264 }, { "epoch": 0.12204599752453874, "grad_norm": 3.482367964725982, "learning_rate": 1.9562198702284552e-06, "loss": 0.7356, "step": 265 }, { "epoch": 0.12250654845859359, "grad_norm": 3.084010149386528, "learning_rate": 1.955782043171933e-06, "loss": 0.5609, "step": 266 }, { "epoch": 0.12296709939264845, "grad_norm": 3.0585298128518765, "learning_rate": 1.9553420872199732e-06, "loss": 0.6027, "step": 267 }, { "epoch": 0.12342765032670332, "grad_norm": 3.386973557288646, "learning_rate": 1.954900003352527e-06, "loss": 0.5398, "step": 268 }, { "epoch": 0.12388820126075818, "grad_norm": 3.2451705841602396, "learning_rate": 1.954455792554285e-06, "loss": 0.7089, "step": 269 }, { "epoch": 0.12434875219481305, "grad_norm": 2.9403279279548, "learning_rate": 1.9540094558146775e-06, "loss": 0.4651, "step": 270 }, { "epoch": 0.12480930312886791, "grad_norm": 3.422975954193148, "learning_rate": 1.9535609941278677e-06, "loss": 0.6637, "step": 271 }, { "epoch": 0.12526985406292276, "grad_norm": 2.8483103524833906, "learning_rate": 1.9531104084927526e-06, "loss": 0.5443, "step": 272 }, { "epoch": 0.12573040499697763, "grad_norm": 3.0046696813378477, "learning_rate": 1.9526576999129613e-06, "loss": 0.6132, "step": 273 }, { "epoch": 0.1261909559310325, "grad_norm": 3.030506496717706, "learning_rate": 1.9522028693968496e-06, "loss": 0.6696, "step": 274 }, { "epoch": 0.12665150686508736, "grad_norm": 3.4189036410010076, "learning_rate": 1.951745917957501e-06, "loss": 0.6398, "step": 275 }, { "epoch": 0.12711205779914223, "grad_norm": 3.4281845642915854, "learning_rate": 1.951286846612723e-06, "loss": 0.613, "step": 276 }, { "epoch": 0.1275726087331971, "grad_norm": 3.5114510461027573, "learning_rate": 1.9508256563850437e-06, "loss": 0.5248, "step": 277 }, { "epoch": 0.12803315966725196, "grad_norm": 3.2175363479882853, "learning_rate": 1.9503623483017125e-06, "loss": 0.6153, "step": 278 }, { "epoch": 0.12849371060130682, "grad_norm": 3.257507884907778, "learning_rate": 1.949896923394695e-06, "loss": 0.6326, "step": 279 }, { "epoch": 0.12895426153536169, "grad_norm": 3.4234091810352894, "learning_rate": 1.9494293827006724e-06, "loss": 0.6533, "step": 280 }, { "epoch": 0.12941481246941655, "grad_norm": 2.9859571949464803, "learning_rate": 1.9489597272610374e-06, "loss": 0.7496, "step": 281 }, { "epoch": 0.1298753634034714, "grad_norm": 3.6355830400808125, "learning_rate": 1.948487958121895e-06, "loss": 0.6794, "step": 282 }, { "epoch": 0.13033591433752625, "grad_norm": 3.2842113677863733, "learning_rate": 1.9480140763340563e-06, "loss": 0.6123, "step": 283 }, { "epoch": 0.13079646527158112, "grad_norm": 3.094397720174347, "learning_rate": 1.9475380829530394e-06, "loss": 0.5419, "step": 284 }, { "epoch": 0.13125701620563598, "grad_norm": 3.527829614510662, "learning_rate": 1.947059979039065e-06, "loss": 0.7257, "step": 285 }, { "epoch": 0.13171756713969085, "grad_norm": 3.4106857915169475, "learning_rate": 1.9465797656570544e-06, "loss": 0.6017, "step": 286 }, { "epoch": 0.13217811807374572, "grad_norm": 3.319543434532599, "learning_rate": 1.946097443876629e-06, "loss": 0.6015, "step": 287 }, { "epoch": 0.13263866900780058, "grad_norm": 3.141575156484543, "learning_rate": 1.9456130147721057e-06, "loss": 0.6703, "step": 288 }, { "epoch": 0.13309921994185545, "grad_norm": 2.8399129741558635, "learning_rate": 1.9451264794224948e-06, "loss": 0.4491, "step": 289 }, { "epoch": 0.1335597708759103, "grad_norm": 3.0162883821205484, "learning_rate": 1.944637838911498e-06, "loss": 0.4653, "step": 290 }, { "epoch": 0.13402032180996518, "grad_norm": 3.0158920954224593, "learning_rate": 1.944147094327506e-06, "loss": 0.5747, "step": 291 }, { "epoch": 0.13448087274402004, "grad_norm": 2.8892050309237054, "learning_rate": 1.9436542467635968e-06, "loss": 0.4266, "step": 292 }, { "epoch": 0.1349414236780749, "grad_norm": 3.643489464124624, "learning_rate": 1.943159297317532e-06, "loss": 0.6408, "step": 293 }, { "epoch": 0.13540197461212977, "grad_norm": 3.1781209929973815, "learning_rate": 1.9426622470917553e-06, "loss": 0.588, "step": 294 }, { "epoch": 0.13586252554618464, "grad_norm": 3.2919873800040245, "learning_rate": 1.942163097193389e-06, "loss": 0.6894, "step": 295 }, { "epoch": 0.13632307648023947, "grad_norm": 2.858947646506684, "learning_rate": 1.941661848734233e-06, "loss": 0.6467, "step": 296 }, { "epoch": 0.13678362741429434, "grad_norm": 3.246171280951807, "learning_rate": 1.9411585028307604e-06, "loss": 0.5943, "step": 297 }, { "epoch": 0.1372441783483492, "grad_norm": 2.7358113053763953, "learning_rate": 1.9406530606041173e-06, "loss": 0.524, "step": 298 }, { "epoch": 0.13770472928240407, "grad_norm": 3.136480105540605, "learning_rate": 1.940145523180118e-06, "loss": 0.6102, "step": 299 }, { "epoch": 0.13816528021645894, "grad_norm": 3.155533214531188, "learning_rate": 1.939635891689245e-06, "loss": 0.5248, "step": 300 }, { "epoch": 0.1386258311505138, "grad_norm": 3.647288084061138, "learning_rate": 1.9391241672666437e-06, "loss": 0.7047, "step": 301 }, { "epoch": 0.13908638208456867, "grad_norm": 2.979265708987498, "learning_rate": 1.938610351052122e-06, "loss": 0.5515, "step": 302 }, { "epoch": 0.13954693301862353, "grad_norm": 3.119300289126859, "learning_rate": 1.938094444190147e-06, "loss": 0.6248, "step": 303 }, { "epoch": 0.1400074839526784, "grad_norm": 3.663446793597039, "learning_rate": 1.937576447829842e-06, "loss": 0.5661, "step": 304 }, { "epoch": 0.14046803488673326, "grad_norm": 3.5292734921724236, "learning_rate": 1.937056363124985e-06, "loss": 0.5915, "step": 305 }, { "epoch": 0.14092858582078813, "grad_norm": 3.037749619620941, "learning_rate": 1.936534191234006e-06, "loss": 0.676, "step": 306 }, { "epoch": 0.141389136754843, "grad_norm": 3.387669461542685, "learning_rate": 1.9360099333199825e-06, "loss": 0.6448, "step": 307 }, { "epoch": 0.14184968768889786, "grad_norm": 3.5739322009118166, "learning_rate": 1.935483590550639e-06, "loss": 0.7227, "step": 308 }, { "epoch": 0.1423102386229527, "grad_norm": 3.1735367015087115, "learning_rate": 1.9349551640983444e-06, "loss": 0.5428, "step": 309 }, { "epoch": 0.14277078955700756, "grad_norm": 3.048510928639603, "learning_rate": 1.934424655140109e-06, "loss": 0.5981, "step": 310 }, { "epoch": 0.14323134049106243, "grad_norm": 3.586272313938004, "learning_rate": 1.933892064857579e-06, "loss": 0.5659, "step": 311 }, { "epoch": 0.1436918914251173, "grad_norm": 3.3251317073628393, "learning_rate": 1.933357394437041e-06, "loss": 0.6554, "step": 312 }, { "epoch": 0.14415244235917216, "grad_norm": 3.2915172640337915, "learning_rate": 1.93282064506941e-06, "loss": 0.5574, "step": 313 }, { "epoch": 0.14461299329322702, "grad_norm": 3.1215547918258952, "learning_rate": 1.9322818179502356e-06, "loss": 0.5984, "step": 314 }, { "epoch": 0.1450735442272819, "grad_norm": 3.1067732908414336, "learning_rate": 1.931740914279693e-06, "loss": 0.5418, "step": 315 }, { "epoch": 0.14553409516133675, "grad_norm": 3.0231982850922363, "learning_rate": 1.9311979352625832e-06, "loss": 0.6471, "step": 316 }, { "epoch": 0.14599464609539162, "grad_norm": 3.2286421391211326, "learning_rate": 1.930652882108331e-06, "loss": 0.667, "step": 317 }, { "epoch": 0.14645519702944648, "grad_norm": 3.051273886454387, "learning_rate": 1.930105756030979e-06, "loss": 0.6372, "step": 318 }, { "epoch": 0.14691574796350135, "grad_norm": 3.357978841575257, "learning_rate": 1.929556558249189e-06, "loss": 0.5968, "step": 319 }, { "epoch": 0.1473762988975562, "grad_norm": 3.408657581264013, "learning_rate": 1.9290052899862353e-06, "loss": 0.6864, "step": 320 }, { "epoch": 0.14783684983161108, "grad_norm": 2.965828703883621, "learning_rate": 1.9284519524700063e-06, "loss": 0.4714, "step": 321 }, { "epoch": 0.14829740076566592, "grad_norm": 2.936230948042832, "learning_rate": 1.9278965469329976e-06, "loss": 0.5051, "step": 322 }, { "epoch": 0.14875795169972078, "grad_norm": 2.9304142245949727, "learning_rate": 1.9273390746123115e-06, "loss": 0.5798, "step": 323 }, { "epoch": 0.14921850263377565, "grad_norm": 2.6044770470699885, "learning_rate": 1.926779536749654e-06, "loss": 0.4478, "step": 324 }, { "epoch": 0.1496790535678305, "grad_norm": 3.346102851664222, "learning_rate": 1.9262179345913323e-06, "loss": 0.6081, "step": 325 }, { "epoch": 0.15013960450188538, "grad_norm": 3.599494713042751, "learning_rate": 1.9256542693882503e-06, "loss": 0.66, "step": 326 }, { "epoch": 0.15060015543594024, "grad_norm": 3.3559078451878883, "learning_rate": 1.925088542395909e-06, "loss": 0.5762, "step": 327 }, { "epoch": 0.1510607063699951, "grad_norm": 3.305256183172175, "learning_rate": 1.9245207548743994e-06, "loss": 0.5205, "step": 328 }, { "epoch": 0.15152125730404997, "grad_norm": 3.189694287544651, "learning_rate": 1.9239509080884043e-06, "loss": 0.5483, "step": 329 }, { "epoch": 0.15198180823810484, "grad_norm": 3.8482914338019802, "learning_rate": 1.923379003307193e-06, "loss": 0.5602, "step": 330 }, { "epoch": 0.1524423591721597, "grad_norm": 2.804743136804608, "learning_rate": 1.9228050418046165e-06, "loss": 0.4328, "step": 331 }, { "epoch": 0.15290291010621457, "grad_norm": 3.675849703736383, "learning_rate": 1.92222902485911e-06, "loss": 0.5611, "step": 332 }, { "epoch": 0.15336346104026943, "grad_norm": 3.145151001423423, "learning_rate": 1.921650953753685e-06, "loss": 0.44, "step": 333 }, { "epoch": 0.1538240119743243, "grad_norm": 3.580270585290245, "learning_rate": 1.9210708297759284e-06, "loss": 0.6488, "step": 334 }, { "epoch": 0.15428456290837914, "grad_norm": 3.003304167462429, "learning_rate": 1.9204886542180007e-06, "loss": 0.5877, "step": 335 }, { "epoch": 0.154745113842434, "grad_norm": 2.8636522642130275, "learning_rate": 1.9199044283766315e-06, "loss": 0.5932, "step": 336 }, { "epoch": 0.15520566477648887, "grad_norm": 3.3796119616806743, "learning_rate": 1.9193181535531177e-06, "loss": 0.6361, "step": 337 }, { "epoch": 0.15566621571054373, "grad_norm": 3.185129118132095, "learning_rate": 1.9187298310533184e-06, "loss": 0.5942, "step": 338 }, { "epoch": 0.1561267666445986, "grad_norm": 3.337715563794658, "learning_rate": 1.9181394621876556e-06, "loss": 0.6108, "step": 339 }, { "epoch": 0.15658731757865346, "grad_norm": 2.8640792419997907, "learning_rate": 1.917547048271109e-06, "loss": 0.633, "step": 340 }, { "epoch": 0.15704786851270833, "grad_norm": 2.7926946129227943, "learning_rate": 1.916952590623212e-06, "loss": 0.4712, "step": 341 }, { "epoch": 0.1575084194467632, "grad_norm": 3.4005962235241203, "learning_rate": 1.9163560905680514e-06, "loss": 0.5317, "step": 342 }, { "epoch": 0.15796897038081806, "grad_norm": 3.1869189227339416, "learning_rate": 1.9157575494342636e-06, "loss": 0.5394, "step": 343 }, { "epoch": 0.15842952131487292, "grad_norm": 3.4152466358703713, "learning_rate": 1.91515696855503e-06, "loss": 0.6375, "step": 344 }, { "epoch": 0.1588900722489278, "grad_norm": 3.0926055262112615, "learning_rate": 1.9145543492680763e-06, "loss": 0.6227, "step": 345 }, { "epoch": 0.15935062318298265, "grad_norm": 3.22498936596375, "learning_rate": 1.9139496929156683e-06, "loss": 0.6416, "step": 346 }, { "epoch": 0.15981117411703752, "grad_norm": 2.9706888314628466, "learning_rate": 1.913343000844609e-06, "loss": 0.5891, "step": 347 }, { "epoch": 0.16027172505109236, "grad_norm": 3.19859477403754, "learning_rate": 1.9127342744062357e-06, "loss": 0.5765, "step": 348 }, { "epoch": 0.16073227598514722, "grad_norm": 3.115476706969415, "learning_rate": 1.912123514956417e-06, "loss": 0.5554, "step": 349 }, { "epoch": 0.1611928269192021, "grad_norm": 2.961966146622805, "learning_rate": 1.9115107238555497e-06, "loss": 0.5382, "step": 350 }, { "epoch": 0.16165337785325695, "grad_norm": 3.671673821035005, "learning_rate": 1.9108959024685566e-06, "loss": 0.6642, "step": 351 }, { "epoch": 0.16211392878731182, "grad_norm": 3.3302938456428106, "learning_rate": 1.9102790521648817e-06, "loss": 0.5453, "step": 352 }, { "epoch": 0.16257447972136668, "grad_norm": 3.4915603642383752, "learning_rate": 1.909660174318489e-06, "loss": 0.5795, "step": 353 }, { "epoch": 0.16303503065542155, "grad_norm": 3.211148339907401, "learning_rate": 1.909039270307858e-06, "loss": 0.5538, "step": 354 }, { "epoch": 0.1634955815894764, "grad_norm": 2.998522836200678, "learning_rate": 1.9084163415159817e-06, "loss": 0.6143, "step": 355 }, { "epoch": 0.16395613252353128, "grad_norm": 3.088249307549935, "learning_rate": 1.907791389330363e-06, "loss": 0.5506, "step": 356 }, { "epoch": 0.16441668345758614, "grad_norm": 3.0463921020716604, "learning_rate": 1.9071644151430108e-06, "loss": 0.5874, "step": 357 }, { "epoch": 0.164877234391641, "grad_norm": 3.345306076824923, "learning_rate": 1.9065354203504398e-06, "loss": 0.7046, "step": 358 }, { "epoch": 0.16533778532569587, "grad_norm": 3.228814822006655, "learning_rate": 1.9059044063536633e-06, "loss": 0.6248, "step": 359 }, { "epoch": 0.16579833625975074, "grad_norm": 3.1464755508670637, "learning_rate": 1.9052713745581931e-06, "loss": 0.7453, "step": 360 }, { "epoch": 0.16625888719380558, "grad_norm": 3.2839840109926697, "learning_rate": 1.9046363263740358e-06, "loss": 0.5185, "step": 361 }, { "epoch": 0.16671943812786044, "grad_norm": 3.2693787327619046, "learning_rate": 1.9039992632156881e-06, "loss": 0.6118, "step": 362 }, { "epoch": 0.1671799890619153, "grad_norm": 2.666213429284101, "learning_rate": 1.9033601865021356e-06, "loss": 0.6548, "step": 363 }, { "epoch": 0.16764053999597017, "grad_norm": 3.814240106647521, "learning_rate": 1.902719097656849e-06, "loss": 0.6145, "step": 364 }, { "epoch": 0.16810109093002504, "grad_norm": 3.5057882856050515, "learning_rate": 1.9020759981077804e-06, "loss": 0.5805, "step": 365 }, { "epoch": 0.1685616418640799, "grad_norm": 3.039639804053503, "learning_rate": 1.9014308892873608e-06, "loss": 0.5629, "step": 366 }, { "epoch": 0.16902219279813477, "grad_norm": 3.492100318826316, "learning_rate": 1.9007837726324965e-06, "loss": 0.5946, "step": 367 }, { "epoch": 0.16948274373218963, "grad_norm": 3.5705400131693494, "learning_rate": 1.9001346495845656e-06, "loss": 0.6548, "step": 368 }, { "epoch": 0.1699432946662445, "grad_norm": 4.12268491301927, "learning_rate": 1.899483521589416e-06, "loss": 0.7214, "step": 369 }, { "epoch": 0.17040384560029936, "grad_norm": 3.4249008502834934, "learning_rate": 1.8988303900973612e-06, "loss": 0.5911, "step": 370 }, { "epoch": 0.17086439653435423, "grad_norm": 3.656574957105641, "learning_rate": 1.8981752565631767e-06, "loss": 0.6093, "step": 371 }, { "epoch": 0.1713249474684091, "grad_norm": 3.079763893362344, "learning_rate": 1.8975181224460974e-06, "loss": 0.6065, "step": 372 }, { "epoch": 0.17178549840246396, "grad_norm": 3.2800007239600197, "learning_rate": 1.8968589892098153e-06, "loss": 0.4789, "step": 373 }, { "epoch": 0.1722460493365188, "grad_norm": 3.2117733850824313, "learning_rate": 1.8961978583224743e-06, "loss": 0.5964, "step": 374 }, { "epoch": 0.17270660027057366, "grad_norm": 3.4590302152792876, "learning_rate": 1.8955347312566675e-06, "loss": 0.6101, "step": 375 }, { "epoch": 0.17316715120462853, "grad_norm": 4.060553328835282, "learning_rate": 1.8948696094894352e-06, "loss": 0.5648, "step": 376 }, { "epoch": 0.1736277021386834, "grad_norm": 3.567361103730285, "learning_rate": 1.8942024945022598e-06, "loss": 0.7206, "step": 377 }, { "epoch": 0.17408825307273826, "grad_norm": 3.11756724456868, "learning_rate": 1.8935333877810646e-06, "loss": 0.5549, "step": 378 }, { "epoch": 0.17454880400679312, "grad_norm": 3.0493230161140357, "learning_rate": 1.892862290816208e-06, "loss": 0.5972, "step": 379 }, { "epoch": 0.175009354940848, "grad_norm": 3.0362303456982156, "learning_rate": 1.8921892051024816e-06, "loss": 0.5893, "step": 380 }, { "epoch": 0.17546990587490285, "grad_norm": 3.2964073384518082, "learning_rate": 1.8915141321391078e-06, "loss": 0.6622, "step": 381 }, { "epoch": 0.17593045680895772, "grad_norm": 3.177252523691639, "learning_rate": 1.8908370734297338e-06, "loss": 0.7608, "step": 382 }, { "epoch": 0.17639100774301258, "grad_norm": 2.9299509906121077, "learning_rate": 1.8901580304824311e-06, "loss": 0.4753, "step": 383 }, { "epoch": 0.17685155867706745, "grad_norm": 3.66473331747743, "learning_rate": 1.8894770048096903e-06, "loss": 0.551, "step": 384 }, { "epoch": 0.17731210961112231, "grad_norm": 2.857955898303891, "learning_rate": 1.8887939979284182e-06, "loss": 0.6346, "step": 385 }, { "epoch": 0.17777266054517718, "grad_norm": 3.1162479244718666, "learning_rate": 1.8881090113599352e-06, "loss": 0.5842, "step": 386 }, { "epoch": 0.17823321147923202, "grad_norm": 3.329339301935554, "learning_rate": 1.88742204662997e-06, "loss": 0.6312, "step": 387 }, { "epoch": 0.17869376241328688, "grad_norm": 3.334334327924235, "learning_rate": 1.8867331052686583e-06, "loss": 0.625, "step": 388 }, { "epoch": 0.17915431334734175, "grad_norm": 3.2174795958697033, "learning_rate": 1.886042188810539e-06, "loss": 0.5223, "step": 389 }, { "epoch": 0.1796148642813966, "grad_norm": 3.185576064014635, "learning_rate": 1.8853492987945487e-06, "loss": 0.5531, "step": 390 }, { "epoch": 0.18007541521545148, "grad_norm": 3.463451373745552, "learning_rate": 1.8846544367640216e-06, "loss": 0.6159, "step": 391 }, { "epoch": 0.18053596614950634, "grad_norm": 3.2172789875643604, "learning_rate": 1.8839576042666833e-06, "loss": 0.545, "step": 392 }, { "epoch": 0.1809965170835612, "grad_norm": 3.050074228085725, "learning_rate": 1.883258802854649e-06, "loss": 0.6598, "step": 393 }, { "epoch": 0.18145706801761607, "grad_norm": 3.3861305570759974, "learning_rate": 1.8825580340844187e-06, "loss": 0.6044, "step": 394 }, { "epoch": 0.18191761895167094, "grad_norm": 3.4136999849562772, "learning_rate": 1.8818552995168748e-06, "loss": 0.6544, "step": 395 }, { "epoch": 0.1823781698857258, "grad_norm": 3.3360169966873547, "learning_rate": 1.8811506007172788e-06, "loss": 0.6911, "step": 396 }, { "epoch": 0.18283872081978067, "grad_norm": 3.0761653456275875, "learning_rate": 1.8804439392552664e-06, "loss": 0.5872, "step": 397 }, { "epoch": 0.18329927175383554, "grad_norm": 3.466796092903531, "learning_rate": 1.8797353167048457e-06, "loss": 0.545, "step": 398 }, { "epoch": 0.1837598226878904, "grad_norm": 3.0584194265671636, "learning_rate": 1.8790247346443927e-06, "loss": 0.5631, "step": 399 }, { "epoch": 0.18422037362194527, "grad_norm": 3.149641541487599, "learning_rate": 1.8783121946566473e-06, "loss": 0.6112, "step": 400 }, { "epoch": 0.1846809245560001, "grad_norm": 3.555627278211036, "learning_rate": 1.8775976983287114e-06, "loss": 0.571, "step": 401 }, { "epoch": 0.18514147549005497, "grad_norm": 3.1940216915097412, "learning_rate": 1.876881247252044e-06, "loss": 0.6587, "step": 402 }, { "epoch": 0.18560202642410983, "grad_norm": 3.488393433022656, "learning_rate": 1.8761628430224582e-06, "loss": 0.6844, "step": 403 }, { "epoch": 0.1860625773581647, "grad_norm": 3.1438026269472075, "learning_rate": 1.875442487240117e-06, "loss": 0.5829, "step": 404 }, { "epoch": 0.18652312829221956, "grad_norm": 3.441265497700973, "learning_rate": 1.8747201815095313e-06, "loss": 0.5813, "step": 405 }, { "epoch": 0.18698367922627443, "grad_norm": 3.125275659245325, "learning_rate": 1.8739959274395547e-06, "loss": 0.5402, "step": 406 }, { "epoch": 0.1874442301603293, "grad_norm": 3.182859255843592, "learning_rate": 1.87326972664338e-06, "loss": 0.5338, "step": 407 }, { "epoch": 0.18790478109438416, "grad_norm": 3.3508341677793867, "learning_rate": 1.8725415807385368e-06, "loss": 0.596, "step": 408 }, { "epoch": 0.18836533202843903, "grad_norm": 3.5167971779519616, "learning_rate": 1.871811491346887e-06, "loss": 0.738, "step": 409 }, { "epoch": 0.1888258829624939, "grad_norm": 3.2498092239498844, "learning_rate": 1.8710794600946216e-06, "loss": 0.5168, "step": 410 }, { "epoch": 0.18928643389654876, "grad_norm": 3.260212416401363, "learning_rate": 1.8703454886122565e-06, "loss": 0.5984, "step": 411 }, { "epoch": 0.18974698483060362, "grad_norm": 3.507545427256196, "learning_rate": 1.8696095785346295e-06, "loss": 0.5627, "step": 412 }, { "epoch": 0.1902075357646585, "grad_norm": 3.3634449621572466, "learning_rate": 1.8688717315008962e-06, "loss": 0.541, "step": 413 }, { "epoch": 0.19066808669871332, "grad_norm": 3.2112093920249105, "learning_rate": 1.8681319491545269e-06, "loss": 0.6084, "step": 414 }, { "epoch": 0.1911286376327682, "grad_norm": 3.5404353894665532, "learning_rate": 1.8673902331433022e-06, "loss": 0.5919, "step": 415 }, { "epoch": 0.19158918856682305, "grad_norm": 3.104412575312322, "learning_rate": 1.86664658511931e-06, "loss": 0.6024, "step": 416 }, { "epoch": 0.19204973950087792, "grad_norm": 3.1299398856732945, "learning_rate": 1.8659010067389414e-06, "loss": 0.55, "step": 417 }, { "epoch": 0.19251029043493278, "grad_norm": 3.289621978009311, "learning_rate": 1.8651534996628869e-06, "loss": 0.6006, "step": 418 }, { "epoch": 0.19297084136898765, "grad_norm": 3.388227682628728, "learning_rate": 1.8644040655561334e-06, "loss": 0.6185, "step": 419 }, { "epoch": 0.19343139230304252, "grad_norm": 3.2963874382804432, "learning_rate": 1.8636527060879601e-06, "loss": 0.5446, "step": 420 }, { "epoch": 0.19389194323709738, "grad_norm": 2.951087777995391, "learning_rate": 1.8628994229319338e-06, "loss": 0.5546, "step": 421 }, { "epoch": 0.19435249417115225, "grad_norm": 2.970629687314148, "learning_rate": 1.8621442177659076e-06, "loss": 0.7266, "step": 422 }, { "epoch": 0.1948130451052071, "grad_norm": 3.3425017993739696, "learning_rate": 1.8613870922720145e-06, "loss": 0.6919, "step": 423 }, { "epoch": 0.19527359603926198, "grad_norm": 3.138471966309496, "learning_rate": 1.8606280481366649e-06, "loss": 0.6644, "step": 424 }, { "epoch": 0.19573414697331684, "grad_norm": 2.931479609326468, "learning_rate": 1.8598670870505434e-06, "loss": 0.5852, "step": 425 }, { "epoch": 0.1961946979073717, "grad_norm": 3.1399142652071563, "learning_rate": 1.8591042107086038e-06, "loss": 0.6798, "step": 426 }, { "epoch": 0.19665524884142654, "grad_norm": 3.31831713584121, "learning_rate": 1.8583394208100658e-06, "loss": 0.6821, "step": 427 }, { "epoch": 0.1971157997754814, "grad_norm": 3.570916313275249, "learning_rate": 1.857572719058412e-06, "loss": 0.6067, "step": 428 }, { "epoch": 0.19757635070953627, "grad_norm": 3.3374646506424983, "learning_rate": 1.8568041071613832e-06, "loss": 0.5824, "step": 429 }, { "epoch": 0.19803690164359114, "grad_norm": 3.45107101342791, "learning_rate": 1.8560335868309742e-06, "loss": 0.6396, "step": 430 }, { "epoch": 0.198497452577646, "grad_norm": 2.832158757011958, "learning_rate": 1.8552611597834317e-06, "loss": 0.4881, "step": 431 }, { "epoch": 0.19895800351170087, "grad_norm": 3.526958026283652, "learning_rate": 1.8544868277392482e-06, "loss": 0.5955, "step": 432 }, { "epoch": 0.19941855444575574, "grad_norm": 3.368802966721466, "learning_rate": 1.8537105924231601e-06, "loss": 0.568, "step": 433 }, { "epoch": 0.1998791053798106, "grad_norm": 2.9552863120111, "learning_rate": 1.8529324555641436e-06, "loss": 0.6433, "step": 434 }, { "epoch": 0.20033965631386547, "grad_norm": 2.6981629403620615, "learning_rate": 1.8521524188954091e-06, "loss": 0.5556, "step": 435 }, { "epoch": 0.20080020724792033, "grad_norm": 2.817878440174924, "learning_rate": 1.8513704841543995e-06, "loss": 0.5547, "step": 436 }, { "epoch": 0.2012607581819752, "grad_norm": 3.2229669697907553, "learning_rate": 1.8505866530827855e-06, "loss": 0.5965, "step": 437 }, { "epoch": 0.20172130911603006, "grad_norm": 3.1101407413877538, "learning_rate": 1.8498009274264605e-06, "loss": 0.5596, "step": 438 }, { "epoch": 0.20218186005008493, "grad_norm": 3.3912236595059975, "learning_rate": 1.8490133089355398e-06, "loss": 0.7065, "step": 439 }, { "epoch": 0.20264241098413976, "grad_norm": 3.0636194517862965, "learning_rate": 1.848223799364353e-06, "loss": 0.6438, "step": 440 }, { "epoch": 0.20310296191819463, "grad_norm": 3.0364026600968277, "learning_rate": 1.8474324004714428e-06, "loss": 0.6763, "step": 441 }, { "epoch": 0.2035635128522495, "grad_norm": 3.401042051162378, "learning_rate": 1.8466391140195601e-06, "loss": 0.6028, "step": 442 }, { "epoch": 0.20402406378630436, "grad_norm": 3.324755759973617, "learning_rate": 1.8458439417756594e-06, "loss": 0.75, "step": 443 }, { "epoch": 0.20448461472035923, "grad_norm": 3.452759888109864, "learning_rate": 1.8450468855108969e-06, "loss": 0.7129, "step": 444 }, { "epoch": 0.2049451656544141, "grad_norm": 3.567128370695001, "learning_rate": 1.8442479470006239e-06, "loss": 0.6197, "step": 445 }, { "epoch": 0.20540571658846896, "grad_norm": 3.3653450254819237, "learning_rate": 1.843447128024385e-06, "loss": 0.663, "step": 446 }, { "epoch": 0.20586626752252382, "grad_norm": 3.3553923088948117, "learning_rate": 1.842644430365913e-06, "loss": 0.5946, "step": 447 }, { "epoch": 0.2063268184565787, "grad_norm": 3.0905391763237806, "learning_rate": 1.8418398558131257e-06, "loss": 0.5987, "step": 448 }, { "epoch": 0.20678736939063355, "grad_norm": 2.8613755616545875, "learning_rate": 1.8410334061581206e-06, "loss": 0.5291, "step": 449 }, { "epoch": 0.20724792032468842, "grad_norm": 3.510631784297277, "learning_rate": 1.8402250831971723e-06, "loss": 0.5732, "step": 450 }, { "epoch": 0.20770847125874328, "grad_norm": 3.5041502693832203, "learning_rate": 1.8394148887307285e-06, "loss": 0.585, "step": 451 }, { "epoch": 0.20816902219279815, "grad_norm": 3.1445431808415556, "learning_rate": 1.8386028245634041e-06, "loss": 0.5904, "step": 452 }, { "epoch": 0.20862957312685299, "grad_norm": 3.257773233991117, "learning_rate": 1.83778889250398e-06, "loss": 0.6259, "step": 453 }, { "epoch": 0.20909012406090785, "grad_norm": 2.7341574429872764, "learning_rate": 1.836973094365397e-06, "loss": 0.4938, "step": 454 }, { "epoch": 0.20955067499496272, "grad_norm": 3.6461025551874497, "learning_rate": 1.8361554319647522e-06, "loss": 0.6574, "step": 455 }, { "epoch": 0.21001122592901758, "grad_norm": 3.278582866514977, "learning_rate": 1.8353359071232951e-06, "loss": 0.5728, "step": 456 }, { "epoch": 0.21047177686307245, "grad_norm": 3.9462317493517025, "learning_rate": 1.8345145216664242e-06, "loss": 0.755, "step": 457 }, { "epoch": 0.2109323277971273, "grad_norm": 3.481064251792427, "learning_rate": 1.8336912774236818e-06, "loss": 0.6502, "step": 458 }, { "epoch": 0.21139287873118218, "grad_norm": 3.248277112137531, "learning_rate": 1.83286617622875e-06, "loss": 0.5746, "step": 459 }, { "epoch": 0.21185342966523704, "grad_norm": 3.3146346341451918, "learning_rate": 1.8320392199194476e-06, "loss": 0.605, "step": 460 }, { "epoch": 0.2123139805992919, "grad_norm": 3.441495638998992, "learning_rate": 1.8312104103377262e-06, "loss": 0.6416, "step": 461 }, { "epoch": 0.21277453153334677, "grad_norm": 2.9904476105113065, "learning_rate": 1.8303797493296637e-06, "loss": 0.6347, "step": 462 }, { "epoch": 0.21323508246740164, "grad_norm": 3.63776364142898, "learning_rate": 1.8295472387454636e-06, "loss": 0.4796, "step": 463 }, { "epoch": 0.2136956334014565, "grad_norm": 3.0013856019668763, "learning_rate": 1.8287128804394474e-06, "loss": 0.6319, "step": 464 }, { "epoch": 0.21415618433551137, "grad_norm": 3.0880145601287783, "learning_rate": 1.8278766762700534e-06, "loss": 0.5423, "step": 465 }, { "epoch": 0.2146167352695662, "grad_norm": 3.133296021110922, "learning_rate": 1.8270386280998309e-06, "loss": 0.5982, "step": 466 }, { "epoch": 0.21507728620362107, "grad_norm": 3.4704322121668914, "learning_rate": 1.8261987377954365e-06, "loss": 0.6409, "step": 467 }, { "epoch": 0.21553783713767594, "grad_norm": 3.0683920370476203, "learning_rate": 1.8253570072276303e-06, "loss": 0.6111, "step": 468 }, { "epoch": 0.2159983880717308, "grad_norm": 3.2983986131430663, "learning_rate": 1.8245134382712709e-06, "loss": 0.6993, "step": 469 }, { "epoch": 0.21645893900578567, "grad_norm": 3.2695193504256754, "learning_rate": 1.8236680328053116e-06, "loss": 0.5836, "step": 470 }, { "epoch": 0.21691948993984053, "grad_norm": 3.1255673883544612, "learning_rate": 1.822820792712797e-06, "loss": 0.5577, "step": 471 }, { "epoch": 0.2173800408738954, "grad_norm": 3.6062462823524735, "learning_rate": 1.8219717198808578e-06, "loss": 0.6389, "step": 472 }, { "epoch": 0.21784059180795026, "grad_norm": 3.393965634932339, "learning_rate": 1.8211208162007065e-06, "loss": 0.5364, "step": 473 }, { "epoch": 0.21830114274200513, "grad_norm": 3.244656999193985, "learning_rate": 1.820268083567634e-06, "loss": 0.525, "step": 474 }, { "epoch": 0.21876169367606, "grad_norm": 3.2505550753334664, "learning_rate": 1.819413523881005e-06, "loss": 0.6237, "step": 475 }, { "epoch": 0.21922224461011486, "grad_norm": 3.0423831974765894, "learning_rate": 1.818557139044254e-06, "loss": 0.5472, "step": 476 }, { "epoch": 0.21968279554416972, "grad_norm": 2.9954662337432656, "learning_rate": 1.8176989309648803e-06, "loss": 0.5291, "step": 477 }, { "epoch": 0.2201433464782246, "grad_norm": 2.918815185530408, "learning_rate": 1.8168389015544444e-06, "loss": 0.5379, "step": 478 }, { "epoch": 0.22060389741227943, "grad_norm": 3.265440208483845, "learning_rate": 1.8159770527285634e-06, "loss": 0.5796, "step": 479 }, { "epoch": 0.2210644483463343, "grad_norm": 3.3450099140811806, "learning_rate": 1.815113386406908e-06, "loss": 0.5663, "step": 480 }, { "epoch": 0.22152499928038916, "grad_norm": 2.8483938675266547, "learning_rate": 1.8142479045131953e-06, "loss": 0.6591, "step": 481 }, { "epoch": 0.22198555021444402, "grad_norm": 3.1336266713837744, "learning_rate": 1.8133806089751884e-06, "loss": 0.4984, "step": 482 }, { "epoch": 0.2224461011484989, "grad_norm": 3.080985904456354, "learning_rate": 1.8125115017246887e-06, "loss": 0.4414, "step": 483 }, { "epoch": 0.22290665208255375, "grad_norm": 3.371599831924596, "learning_rate": 1.8116405846975335e-06, "loss": 0.5807, "step": 484 }, { "epoch": 0.22336720301660862, "grad_norm": 3.078985566820636, "learning_rate": 1.8107678598335912e-06, "loss": 0.6484, "step": 485 }, { "epoch": 0.22382775395066348, "grad_norm": 2.9841159659031526, "learning_rate": 1.8098933290767567e-06, "loss": 0.569, "step": 486 }, { "epoch": 0.22428830488471835, "grad_norm": 3.110058890834877, "learning_rate": 1.8090169943749474e-06, "loss": 0.6511, "step": 487 }, { "epoch": 0.2247488558187732, "grad_norm": 3.153179145491399, "learning_rate": 1.808138857680099e-06, "loss": 0.4959, "step": 488 }, { "epoch": 0.22520940675282808, "grad_norm": 3.004599141952458, "learning_rate": 1.8072589209481607e-06, "loss": 0.5327, "step": 489 }, { "epoch": 0.22566995768688294, "grad_norm": 3.3051175185768273, "learning_rate": 1.8063771861390915e-06, "loss": 0.6466, "step": 490 }, { "epoch": 0.2261305086209378, "grad_norm": 3.096130029682352, "learning_rate": 1.8054936552168547e-06, "loss": 0.4745, "step": 491 }, { "epoch": 0.22659105955499265, "grad_norm": 3.756055051745282, "learning_rate": 1.804608330149415e-06, "loss": 0.6128, "step": 492 }, { "epoch": 0.2270516104890475, "grad_norm": 3.342218840047437, "learning_rate": 1.8037212129087322e-06, "loss": 0.561, "step": 493 }, { "epoch": 0.22751216142310238, "grad_norm": 3.244488624559804, "learning_rate": 1.8028323054707592e-06, "loss": 0.6411, "step": 494 }, { "epoch": 0.22797271235715724, "grad_norm": 3.3116315535429055, "learning_rate": 1.801941609815436e-06, "loss": 0.6216, "step": 495 }, { "epoch": 0.2284332632912121, "grad_norm": 3.3075500193801775, "learning_rate": 1.8010491279266858e-06, "loss": 0.6051, "step": 496 }, { "epoch": 0.22889381422526697, "grad_norm": 3.0538650883239, "learning_rate": 1.8001548617924095e-06, "loss": 0.4452, "step": 497 }, { "epoch": 0.22935436515932184, "grad_norm": 3.5424554842795835, "learning_rate": 1.799258813404483e-06, "loss": 0.6483, "step": 498 }, { "epoch": 0.2298149160933767, "grad_norm": 3.3078686719248593, "learning_rate": 1.7983609847587521e-06, "loss": 0.585, "step": 499 }, { "epoch": 0.23027546702743157, "grad_norm": 3.3972513361686185, "learning_rate": 1.7974613778550278e-06, "loss": 0.6655, "step": 500 }, { "epoch": 0.23073601796148643, "grad_norm": 3.465463703248316, "learning_rate": 1.7965599946970812e-06, "loss": 0.6034, "step": 501 }, { "epoch": 0.2311965688955413, "grad_norm": 3.325451562972118, "learning_rate": 1.795656837292641e-06, "loss": 0.6398, "step": 502 }, { "epoch": 0.23165711982959616, "grad_norm": 3.5458965409990593, "learning_rate": 1.7947519076533873e-06, "loss": 0.5217, "step": 503 }, { "epoch": 0.23211767076365103, "grad_norm": 3.3055864844064904, "learning_rate": 1.793845207794947e-06, "loss": 0.633, "step": 504 }, { "epoch": 0.23257822169770587, "grad_norm": 3.353646102717693, "learning_rate": 1.7929367397368913e-06, "loss": 0.5797, "step": 505 }, { "epoch": 0.23303877263176073, "grad_norm": 3.254837375875168, "learning_rate": 1.7920265055027283e-06, "loss": 0.5433, "step": 506 }, { "epoch": 0.2334993235658156, "grad_norm": 3.5539641885768214, "learning_rate": 1.7911145071199018e-06, "loss": 0.6674, "step": 507 }, { "epoch": 0.23395987449987046, "grad_norm": 3.4968687641517584, "learning_rate": 1.7902007466197837e-06, "loss": 0.5679, "step": 508 }, { "epoch": 0.23442042543392533, "grad_norm": 3.316075449684041, "learning_rate": 1.789285226037671e-06, "loss": 0.6063, "step": 509 }, { "epoch": 0.2348809763679802, "grad_norm": 3.4021632966156696, "learning_rate": 1.788367947412782e-06, "loss": 0.5539, "step": 510 }, { "epoch": 0.23534152730203506, "grad_norm": 3.1412284614498103, "learning_rate": 1.7874489127882493e-06, "loss": 0.5244, "step": 511 }, { "epoch": 0.23580207823608992, "grad_norm": 3.6411365029508143, "learning_rate": 1.7865281242111182e-06, "loss": 0.6874, "step": 512 }, { "epoch": 0.2362626291701448, "grad_norm": 2.939080873360146, "learning_rate": 1.7856055837323406e-06, "loss": 0.6462, "step": 513 }, { "epoch": 0.23672318010419965, "grad_norm": 3.5060804854877334, "learning_rate": 1.784681293406769e-06, "loss": 0.6256, "step": 514 }, { "epoch": 0.23718373103825452, "grad_norm": 3.454373638780033, "learning_rate": 1.7837552552931555e-06, "loss": 0.5419, "step": 515 }, { "epoch": 0.23764428197230938, "grad_norm": 2.981684278208563, "learning_rate": 1.7828274714541443e-06, "loss": 0.6395, "step": 516 }, { "epoch": 0.23810483290636425, "grad_norm": 2.6776702286309413, "learning_rate": 1.7818979439562677e-06, "loss": 0.4575, "step": 517 }, { "epoch": 0.23856538384041912, "grad_norm": 3.0713454976431342, "learning_rate": 1.7809666748699424e-06, "loss": 0.6557, "step": 518 }, { "epoch": 0.23902593477447395, "grad_norm": 3.332095530691328, "learning_rate": 1.7800336662694635e-06, "loss": 0.5726, "step": 519 }, { "epoch": 0.23948648570852882, "grad_norm": 3.3759244307287375, "learning_rate": 1.7790989202330018e-06, "loss": 0.5942, "step": 520 }, { "epoch": 0.23994703664258368, "grad_norm": 3.3882812703718583, "learning_rate": 1.7781624388425973e-06, "loss": 0.6916, "step": 521 }, { "epoch": 0.24040758757663855, "grad_norm": 3.4802417994319, "learning_rate": 1.7772242241841552e-06, "loss": 0.5549, "step": 522 }, { "epoch": 0.2408681385106934, "grad_norm": 2.8321037056813925, "learning_rate": 1.7762842783474419e-06, "loss": 0.5675, "step": 523 }, { "epoch": 0.24132868944474828, "grad_norm": 3.0286366892013024, "learning_rate": 1.7753426034260794e-06, "loss": 0.6203, "step": 524 }, { "epoch": 0.24178924037880314, "grad_norm": 3.2252778331423255, "learning_rate": 1.774399201517541e-06, "loss": 0.6389, "step": 525 }, { "epoch": 0.242249791312858, "grad_norm": 3.0971473717588087, "learning_rate": 1.7734540747231469e-06, "loss": 0.6017, "step": 526 }, { "epoch": 0.24271034224691287, "grad_norm": 3.4994719951042104, "learning_rate": 1.772507225148059e-06, "loss": 0.4603, "step": 527 }, { "epoch": 0.24317089318096774, "grad_norm": 2.713673764054212, "learning_rate": 1.7715586549012768e-06, "loss": 0.5765, "step": 528 }, { "epoch": 0.2436314441150226, "grad_norm": 3.221284406276941, "learning_rate": 1.7706083660956327e-06, "loss": 0.6266, "step": 529 }, { "epoch": 0.24409199504907747, "grad_norm": 3.306672434466321, "learning_rate": 1.7696563608477862e-06, "loss": 0.6451, "step": 530 }, { "epoch": 0.24455254598313234, "grad_norm": 3.065434289834053, "learning_rate": 1.76870264127822e-06, "loss": 0.6552, "step": 531 }, { "epoch": 0.24501309691718717, "grad_norm": 3.356746881862158, "learning_rate": 1.7677472095112363e-06, "loss": 0.4904, "step": 532 }, { "epoch": 0.24547364785124204, "grad_norm": 3.253784280023295, "learning_rate": 1.7667900676749498e-06, "loss": 0.5361, "step": 533 }, { "epoch": 0.2459341987852969, "grad_norm": 3.0192332980381344, "learning_rate": 1.7658312179012854e-06, "loss": 0.5099, "step": 534 }, { "epoch": 0.24639474971935177, "grad_norm": 3.0809296396824384, "learning_rate": 1.7648706623259706e-06, "loss": 0.6658, "step": 535 }, { "epoch": 0.24685530065340663, "grad_norm": 3.6830695514949077, "learning_rate": 1.7639084030885338e-06, "loss": 0.5973, "step": 536 }, { "epoch": 0.2473158515874615, "grad_norm": 3.330551105763686, "learning_rate": 1.7629444423322982e-06, "loss": 0.7364, "step": 537 }, { "epoch": 0.24777640252151636, "grad_norm": 2.9911270084180366, "learning_rate": 1.7619787822043754e-06, "loss": 0.462, "step": 538 }, { "epoch": 0.24823695345557123, "grad_norm": 3.149958771441102, "learning_rate": 1.7610114248556639e-06, "loss": 0.611, "step": 539 }, { "epoch": 0.2486975043896261, "grad_norm": 3.30756958264156, "learning_rate": 1.7600423724408415e-06, "loss": 0.5285, "step": 540 }, { "epoch": 0.24915805532368096, "grad_norm": 3.3029379864351, "learning_rate": 1.759071627118362e-06, "loss": 0.5643, "step": 541 }, { "epoch": 0.24961860625773583, "grad_norm": 3.454862601842869, "learning_rate": 1.7580991910504494e-06, "loss": 0.6229, "step": 542 }, { "epoch": 0.2500791571917907, "grad_norm": 3.2773777585364616, "learning_rate": 1.7571250664030947e-06, "loss": 0.6066, "step": 543 }, { "epoch": 0.25053970812584553, "grad_norm": 2.955729624984104, "learning_rate": 1.7561492553460488e-06, "loss": 0.5717, "step": 544 }, { "epoch": 0.2510002590599004, "grad_norm": 3.103695274249137, "learning_rate": 1.7551717600528203e-06, "loss": 0.5664, "step": 545 }, { "epoch": 0.25146080999395526, "grad_norm": 3.1965303115665034, "learning_rate": 1.7541925827006678e-06, "loss": 0.5587, "step": 546 }, { "epoch": 0.25192136092801015, "grad_norm": 2.9103203193249545, "learning_rate": 1.7532117254705972e-06, "loss": 0.6035, "step": 547 }, { "epoch": 0.252381911862065, "grad_norm": 2.9173381955892976, "learning_rate": 1.7522291905473558e-06, "loss": 0.4366, "step": 548 }, { "epoch": 0.2528424627961199, "grad_norm": 3.222972546291656, "learning_rate": 1.7512449801194286e-06, "loss": 0.5781, "step": 549 }, { "epoch": 0.2533030137301747, "grad_norm": 3.1103988583748663, "learning_rate": 1.7502590963790316e-06, "loss": 0.5862, "step": 550 }, { "epoch": 0.25376356466422956, "grad_norm": 3.4396946239347908, "learning_rate": 1.7492715415221087e-06, "loss": 0.6496, "step": 551 }, { "epoch": 0.25422411559828445, "grad_norm": 2.9802260714638398, "learning_rate": 1.7482823177483252e-06, "loss": 0.6773, "step": 552 }, { "epoch": 0.2546846665323393, "grad_norm": 3.082234293267444, "learning_rate": 1.7472914272610646e-06, "loss": 0.5246, "step": 553 }, { "epoch": 0.2551452174663942, "grad_norm": 3.3567221598735992, "learning_rate": 1.7462988722674221e-06, "loss": 0.6232, "step": 554 }, { "epoch": 0.255605768400449, "grad_norm": 3.2626603235826264, "learning_rate": 1.7453046549782009e-06, "loss": 0.5204, "step": 555 }, { "epoch": 0.2560663193345039, "grad_norm": 3.2339294619977434, "learning_rate": 1.7443087776079064e-06, "loss": 0.5647, "step": 556 }, { "epoch": 0.25652687026855875, "grad_norm": 3.3723328031579722, "learning_rate": 1.7433112423747418e-06, "loss": 0.6539, "step": 557 }, { "epoch": 0.25698742120261364, "grad_norm": 3.184022192143854, "learning_rate": 1.742312051500603e-06, "loss": 0.6068, "step": 558 }, { "epoch": 0.2574479721366685, "grad_norm": 3.5355428533513695, "learning_rate": 1.741311207211074e-06, "loss": 0.6395, "step": 559 }, { "epoch": 0.25790852307072337, "grad_norm": 3.303032162182857, "learning_rate": 1.740308711735421e-06, "loss": 0.5797, "step": 560 }, { "epoch": 0.2583690740047782, "grad_norm": 3.2151549157821235, "learning_rate": 1.7393045673065878e-06, "loss": 0.5189, "step": 561 }, { "epoch": 0.2588296249388331, "grad_norm": 3.2889581937560513, "learning_rate": 1.7382987761611924e-06, "loss": 0.5853, "step": 562 }, { "epoch": 0.25929017587288794, "grad_norm": 3.589638793664337, "learning_rate": 1.7372913405395191e-06, "loss": 0.591, "step": 563 }, { "epoch": 0.2597507268069428, "grad_norm": 3.1711968406048734, "learning_rate": 1.7362822626855165e-06, "loss": 0.5466, "step": 564 }, { "epoch": 0.26021127774099767, "grad_norm": 2.816516792416395, "learning_rate": 1.7352715448467895e-06, "loss": 0.5831, "step": 565 }, { "epoch": 0.2606718286750525, "grad_norm": 3.154750601104403, "learning_rate": 1.7342591892745974e-06, "loss": 0.6726, "step": 566 }, { "epoch": 0.2611323796091074, "grad_norm": 3.3840134771829637, "learning_rate": 1.7332451982238465e-06, "loss": 0.5483, "step": 567 }, { "epoch": 0.26159293054316224, "grad_norm": 3.1406500671773645, "learning_rate": 1.732229573953086e-06, "loss": 0.6034, "step": 568 }, { "epoch": 0.26205348147721713, "grad_norm": 2.862334788278051, "learning_rate": 1.7312123187245037e-06, "loss": 0.6242, "step": 569 }, { "epoch": 0.26251403241127197, "grad_norm": 2.8647709222697095, "learning_rate": 1.7301934348039188e-06, "loss": 0.5623, "step": 570 }, { "epoch": 0.26297458334532686, "grad_norm": 2.8749482072040458, "learning_rate": 1.7291729244607795e-06, "loss": 0.568, "step": 571 }, { "epoch": 0.2634351342793817, "grad_norm": 3.1262185261082935, "learning_rate": 1.7281507899681556e-06, "loss": 0.5939, "step": 572 }, { "epoch": 0.2638956852134366, "grad_norm": 3.2312398719111606, "learning_rate": 1.7271270336027358e-06, "loss": 0.5729, "step": 573 }, { "epoch": 0.26435623614749143, "grad_norm": 3.035067327482627, "learning_rate": 1.7261016576448198e-06, "loss": 0.5952, "step": 574 }, { "epoch": 0.2648167870815463, "grad_norm": 3.386614804112364, "learning_rate": 1.7250746643783158e-06, "loss": 0.7062, "step": 575 }, { "epoch": 0.26527733801560116, "grad_norm": 3.3687551890454315, "learning_rate": 1.7240460560907343e-06, "loss": 0.4614, "step": 576 }, { "epoch": 0.26573788894965605, "grad_norm": 2.9333951618662675, "learning_rate": 1.7230158350731832e-06, "loss": 0.5462, "step": 577 }, { "epoch": 0.2661984398837109, "grad_norm": 3.2883977513315714, "learning_rate": 1.7219840036203613e-06, "loss": 0.668, "step": 578 }, { "epoch": 0.26665899081776573, "grad_norm": 3.319840681654177, "learning_rate": 1.7209505640305562e-06, "loss": 0.5502, "step": 579 }, { "epoch": 0.2671195417518206, "grad_norm": 3.344576294088726, "learning_rate": 1.7199155186056364e-06, "loss": 0.7453, "step": 580 }, { "epoch": 0.26758009268587546, "grad_norm": 3.3507561207198266, "learning_rate": 1.7188788696510474e-06, "loss": 0.5591, "step": 581 }, { "epoch": 0.26804064361993035, "grad_norm": 3.6763295819772, "learning_rate": 1.7178406194758067e-06, "loss": 0.5894, "step": 582 }, { "epoch": 0.2685011945539852, "grad_norm": 2.7982715860658836, "learning_rate": 1.7168007703924978e-06, "loss": 0.6436, "step": 583 }, { "epoch": 0.2689617454880401, "grad_norm": 3.1119980909697724, "learning_rate": 1.7157593247172664e-06, "loss": 0.557, "step": 584 }, { "epoch": 0.2694222964220949, "grad_norm": 3.69421493980708, "learning_rate": 1.714716284769813e-06, "loss": 0.6445, "step": 585 }, { "epoch": 0.2698828473561498, "grad_norm": 3.786080820523942, "learning_rate": 1.7136716528733912e-06, "loss": 0.417, "step": 586 }, { "epoch": 0.27034339829020465, "grad_norm": 4.270736109571739, "learning_rate": 1.7126254313547985e-06, "loss": 0.6734, "step": 587 }, { "epoch": 0.27080394922425954, "grad_norm": 3.600772785847931, "learning_rate": 1.7115776225443739e-06, "loss": 0.8145, "step": 588 }, { "epoch": 0.2712645001583144, "grad_norm": 2.8943620464394293, "learning_rate": 1.7105282287759926e-06, "loss": 0.528, "step": 589 }, { "epoch": 0.2717250510923693, "grad_norm": 3.0063394751214094, "learning_rate": 1.7094772523870589e-06, "loss": 0.5868, "step": 590 }, { "epoch": 0.2721856020264241, "grad_norm": 3.3735726594180613, "learning_rate": 1.7084246957185032e-06, "loss": 0.6339, "step": 591 }, { "epoch": 0.27264615296047895, "grad_norm": 3.3141049773629283, "learning_rate": 1.707370561114775e-06, "loss": 0.6053, "step": 592 }, { "epoch": 0.27310670389453384, "grad_norm": 2.80960222995646, "learning_rate": 1.7063148509238393e-06, "loss": 0.4687, "step": 593 }, { "epoch": 0.2735672548285887, "grad_norm": 3.313745439020705, "learning_rate": 1.70525756749717e-06, "loss": 0.6093, "step": 594 }, { "epoch": 0.2740278057626436, "grad_norm": 3.7937269183858318, "learning_rate": 1.7041987131897445e-06, "loss": 0.5746, "step": 595 }, { "epoch": 0.2744883566966984, "grad_norm": 2.9125080429436365, "learning_rate": 1.703138290360041e-06, "loss": 0.525, "step": 596 }, { "epoch": 0.2749489076307533, "grad_norm": 3.2588529493155285, "learning_rate": 1.7020763013700296e-06, "loss": 0.5566, "step": 597 }, { "epoch": 0.27540945856480814, "grad_norm": 3.0202214315810396, "learning_rate": 1.70101274858517e-06, "loss": 0.5711, "step": 598 }, { "epoch": 0.27587000949886303, "grad_norm": 3.3568365962069664, "learning_rate": 1.6999476343744047e-06, "loss": 0.5359, "step": 599 }, { "epoch": 0.27633056043291787, "grad_norm": 3.0103484901671638, "learning_rate": 1.6988809611101535e-06, "loss": 0.6112, "step": 600 }, { "epoch": 0.27679111136697276, "grad_norm": 3.4029649629281673, "learning_rate": 1.69781273116831e-06, "loss": 0.5336, "step": 601 }, { "epoch": 0.2772516623010276, "grad_norm": 2.8767133982019377, "learning_rate": 1.6967429469282345e-06, "loss": 0.6359, "step": 602 }, { "epoch": 0.2777122132350825, "grad_norm": 3.2835587382748024, "learning_rate": 1.695671610772749e-06, "loss": 0.5618, "step": 603 }, { "epoch": 0.27817276416913733, "grad_norm": 3.5991470633371283, "learning_rate": 1.694598725088133e-06, "loss": 0.5286, "step": 604 }, { "epoch": 0.27863331510319217, "grad_norm": 3.1120545309975958, "learning_rate": 1.6935242922641159e-06, "loss": 0.6178, "step": 605 }, { "epoch": 0.27909386603724706, "grad_norm": 3.1244820282575674, "learning_rate": 1.6924483146938754e-06, "loss": 0.5822, "step": 606 }, { "epoch": 0.2795544169713019, "grad_norm": 2.714150777824839, "learning_rate": 1.6913707947740284e-06, "loss": 0.5298, "step": 607 }, { "epoch": 0.2800149679053568, "grad_norm": 3.3837453901982486, "learning_rate": 1.690291734904627e-06, "loss": 0.5653, "step": 608 }, { "epoch": 0.28047551883941163, "grad_norm": 3.1917062613137785, "learning_rate": 1.6892111374891547e-06, "loss": 0.5825, "step": 609 }, { "epoch": 0.2809360697734665, "grad_norm": 3.3940845730675226, "learning_rate": 1.6881290049345185e-06, "loss": 0.5283, "step": 610 }, { "epoch": 0.28139662070752136, "grad_norm": 3.3197149723223127, "learning_rate": 1.6870453396510453e-06, "loss": 0.5643, "step": 611 }, { "epoch": 0.28185717164157625, "grad_norm": 3.368272451697648, "learning_rate": 1.6859601440524757e-06, "loss": 0.5448, "step": 612 }, { "epoch": 0.2823177225756311, "grad_norm": 3.66743262346132, "learning_rate": 1.6848734205559593e-06, "loss": 0.5931, "step": 613 }, { "epoch": 0.282778273509686, "grad_norm": 3.399278333660865, "learning_rate": 1.6837851715820488e-06, "loss": 0.6151, "step": 614 }, { "epoch": 0.2832388244437408, "grad_norm": 3.57422302010664, "learning_rate": 1.6826953995546945e-06, "loss": 0.5642, "step": 615 }, { "epoch": 0.2836993753777957, "grad_norm": 3.0447865419845286, "learning_rate": 1.6816041069012388e-06, "loss": 0.5781, "step": 616 }, { "epoch": 0.28415992631185055, "grad_norm": 3.072556146750414, "learning_rate": 1.680511296052412e-06, "loss": 0.5839, "step": 617 }, { "epoch": 0.2846204772459054, "grad_norm": 3.7455049180528484, "learning_rate": 1.6794169694423257e-06, "loss": 0.6444, "step": 618 }, { "epoch": 0.2850810281799603, "grad_norm": 3.0929934631543574, "learning_rate": 1.6783211295084669e-06, "loss": 0.6054, "step": 619 }, { "epoch": 0.2855415791140151, "grad_norm": 2.7963129880952415, "learning_rate": 1.677223778691695e-06, "loss": 0.586, "step": 620 }, { "epoch": 0.28600213004807, "grad_norm": 3.0409334266150263, "learning_rate": 1.6761249194362328e-06, "loss": 0.6534, "step": 621 }, { "epoch": 0.28646268098212485, "grad_norm": 3.541465069260458, "learning_rate": 1.6750245541896644e-06, "loss": 0.7103, "step": 622 }, { "epoch": 0.28692323191617974, "grad_norm": 3.2927125901227012, "learning_rate": 1.6739226854029276e-06, "loss": 0.493, "step": 623 }, { "epoch": 0.2873837828502346, "grad_norm": 2.761658471122107, "learning_rate": 1.6728193155303097e-06, "loss": 0.537, "step": 624 }, { "epoch": 0.2878443337842895, "grad_norm": 3.362612929819794, "learning_rate": 1.6717144470294406e-06, "loss": 0.6486, "step": 625 }, { "epoch": 0.2883048847183443, "grad_norm": 3.2064609899615877, "learning_rate": 1.6706080823612894e-06, "loss": 0.5923, "step": 626 }, { "epoch": 0.2887654356523992, "grad_norm": 2.9733446787329227, "learning_rate": 1.6695002239901569e-06, "loss": 0.6514, "step": 627 }, { "epoch": 0.28922598658645404, "grad_norm": 3.272788188514796, "learning_rate": 1.6683908743836711e-06, "loss": 0.599, "step": 628 }, { "epoch": 0.28968653752050894, "grad_norm": 3.073673873179751, "learning_rate": 1.6672800360127823e-06, "loss": 0.5548, "step": 629 }, { "epoch": 0.2901470884545638, "grad_norm": 3.1359157628557353, "learning_rate": 1.6661677113517553e-06, "loss": 0.6053, "step": 630 }, { "epoch": 0.2906076393886186, "grad_norm": 3.2988300051982056, "learning_rate": 1.6650539028781667e-06, "loss": 0.7002, "step": 631 }, { "epoch": 0.2910681903226735, "grad_norm": 3.025853139733737, "learning_rate": 1.663938613072898e-06, "loss": 0.5597, "step": 632 }, { "epoch": 0.29152874125672834, "grad_norm": 2.797667473613527, "learning_rate": 1.6628218444201299e-06, "loss": 0.579, "step": 633 }, { "epoch": 0.29198929219078323, "grad_norm": 2.482842591113322, "learning_rate": 1.6617035994073372e-06, "loss": 0.4768, "step": 634 }, { "epoch": 0.29244984312483807, "grad_norm": 3.2875064889505423, "learning_rate": 1.6605838805252828e-06, "loss": 0.6799, "step": 635 }, { "epoch": 0.29291039405889296, "grad_norm": 3.160691937520528, "learning_rate": 1.6594626902680126e-06, "loss": 0.4986, "step": 636 }, { "epoch": 0.2933709449929478, "grad_norm": 3.2855289808671384, "learning_rate": 1.6583400311328505e-06, "loss": 0.5377, "step": 637 }, { "epoch": 0.2938314959270027, "grad_norm": 2.9638072454998725, "learning_rate": 1.6572159056203915e-06, "loss": 0.6512, "step": 638 }, { "epoch": 0.29429204686105753, "grad_norm": 3.371604241327082, "learning_rate": 1.6560903162344966e-06, "loss": 0.5783, "step": 639 }, { "epoch": 0.2947525977951124, "grad_norm": 3.628939173234319, "learning_rate": 1.6549632654822875e-06, "loss": 0.6427, "step": 640 }, { "epoch": 0.29521314872916726, "grad_norm": 2.945921755545379, "learning_rate": 1.6538347558741422e-06, "loss": 0.4827, "step": 641 }, { "epoch": 0.29567369966322216, "grad_norm": 2.9596989147777513, "learning_rate": 1.652704789923686e-06, "loss": 0.7239, "step": 642 }, { "epoch": 0.296134250597277, "grad_norm": 3.597968564106779, "learning_rate": 1.6515733701477896e-06, "loss": 0.666, "step": 643 }, { "epoch": 0.29659480153133183, "grad_norm": 3.1426064389176087, "learning_rate": 1.6504404990665615e-06, "loss": 0.5744, "step": 644 }, { "epoch": 0.2970553524653867, "grad_norm": 3.120168720987635, "learning_rate": 1.6493061792033424e-06, "loss": 0.5737, "step": 645 }, { "epoch": 0.29751590339944156, "grad_norm": 2.7520653399387904, "learning_rate": 1.648170413084701e-06, "loss": 0.6586, "step": 646 }, { "epoch": 0.29797645433349645, "grad_norm": 2.7598009601994575, "learning_rate": 1.6470332032404258e-06, "loss": 0.5887, "step": 647 }, { "epoch": 0.2984370052675513, "grad_norm": 2.9219392147630456, "learning_rate": 1.6458945522035227e-06, "loss": 0.4776, "step": 648 }, { "epoch": 0.2988975562016062, "grad_norm": 2.918413331344107, "learning_rate": 1.6447544625102068e-06, "loss": 0.5586, "step": 649 }, { "epoch": 0.299358107135661, "grad_norm": 3.0539033522988275, "learning_rate": 1.6436129366998973e-06, "loss": 0.6355, "step": 650 }, { "epoch": 0.2998186580697159, "grad_norm": 3.0936916830976475, "learning_rate": 1.6424699773152138e-06, "loss": 0.6304, "step": 651 }, { "epoch": 0.30027920900377075, "grad_norm": 3.3790034098133384, "learning_rate": 1.6413255869019666e-06, "loss": 0.6053, "step": 652 }, { "epoch": 0.30073975993782565, "grad_norm": 3.244406880170614, "learning_rate": 1.640179768009156e-06, "loss": 0.5883, "step": 653 }, { "epoch": 0.3012003108718805, "grad_norm": 3.145131450305741, "learning_rate": 1.6390325231889616e-06, "loss": 0.4706, "step": 654 }, { "epoch": 0.3016608618059354, "grad_norm": 2.7110601957589404, "learning_rate": 1.6378838549967415e-06, "loss": 0.6071, "step": 655 }, { "epoch": 0.3021214127399902, "grad_norm": 3.343566724286675, "learning_rate": 1.6367337659910221e-06, "loss": 0.6244, "step": 656 }, { "epoch": 0.30258196367404505, "grad_norm": 3.209028090230382, "learning_rate": 1.6355822587334959e-06, "loss": 0.543, "step": 657 }, { "epoch": 0.30304251460809994, "grad_norm": 3.5185896956330422, "learning_rate": 1.6344293357890137e-06, "loss": 0.5821, "step": 658 }, { "epoch": 0.3035030655421548, "grad_norm": 2.8601115189735604, "learning_rate": 1.6332749997255804e-06, "loss": 0.6433, "step": 659 }, { "epoch": 0.3039636164762097, "grad_norm": 3.476073663868734, "learning_rate": 1.632119253114347e-06, "loss": 0.4601, "step": 660 }, { "epoch": 0.3044241674102645, "grad_norm": 3.2412899564729005, "learning_rate": 1.6309620985296072e-06, "loss": 0.7235, "step": 661 }, { "epoch": 0.3048847183443194, "grad_norm": 2.713918449445168, "learning_rate": 1.6298035385487918e-06, "loss": 0.6167, "step": 662 }, { "epoch": 0.30534526927837424, "grad_norm": 3.4033868200489623, "learning_rate": 1.6286435757524602e-06, "loss": 0.5718, "step": 663 }, { "epoch": 0.30580582021242914, "grad_norm": 3.0202609458048038, "learning_rate": 1.6274822127242974e-06, "loss": 0.4536, "step": 664 }, { "epoch": 0.306266371146484, "grad_norm": 3.001878947353794, "learning_rate": 1.6263194520511064e-06, "loss": 0.57, "step": 665 }, { "epoch": 0.30672692208053887, "grad_norm": 2.998809917811107, "learning_rate": 1.6251552963228048e-06, "loss": 0.5362, "step": 666 }, { "epoch": 0.3071874730145937, "grad_norm": 2.977451183975403, "learning_rate": 1.6239897481324164e-06, "loss": 0.4483, "step": 667 }, { "epoch": 0.3076480239486486, "grad_norm": 3.0423908095090733, "learning_rate": 1.6228228100760664e-06, "loss": 0.5557, "step": 668 }, { "epoch": 0.30810857488270343, "grad_norm": 3.043152280285196, "learning_rate": 1.6216544847529764e-06, "loss": 0.6267, "step": 669 }, { "epoch": 0.30856912581675827, "grad_norm": 2.742916110544047, "learning_rate": 1.620484774765458e-06, "loss": 0.5141, "step": 670 }, { "epoch": 0.30902967675081316, "grad_norm": 3.535942753468968, "learning_rate": 1.6193136827189065e-06, "loss": 0.7185, "step": 671 }, { "epoch": 0.309490227684868, "grad_norm": 3.4294982638238163, "learning_rate": 1.6181412112217957e-06, "loss": 0.6178, "step": 672 }, { "epoch": 0.3099507786189229, "grad_norm": 3.3208662012013104, "learning_rate": 1.6169673628856722e-06, "loss": 0.5777, "step": 673 }, { "epoch": 0.31041132955297773, "grad_norm": 3.6764674298301214, "learning_rate": 1.6157921403251492e-06, "loss": 0.5752, "step": 674 }, { "epoch": 0.3108718804870326, "grad_norm": 3.252440774762675, "learning_rate": 1.6146155461579007e-06, "loss": 0.7095, "step": 675 }, { "epoch": 0.31133243142108746, "grad_norm": 3.2165238232059084, "learning_rate": 1.6134375830046563e-06, "loss": 0.6169, "step": 676 }, { "epoch": 0.31179298235514236, "grad_norm": 3.1854273045665287, "learning_rate": 1.6122582534891942e-06, "loss": 0.6611, "step": 677 }, { "epoch": 0.3122535332891972, "grad_norm": 3.4248301426471297, "learning_rate": 1.6110775602383365e-06, "loss": 0.6427, "step": 678 }, { "epoch": 0.3127140842232521, "grad_norm": 3.113032011879882, "learning_rate": 1.6098955058819423e-06, "loss": 0.5213, "step": 679 }, { "epoch": 0.3131746351573069, "grad_norm": 3.3490627264511175, "learning_rate": 1.6087120930529036e-06, "loss": 0.5633, "step": 680 }, { "epoch": 0.3136351860913618, "grad_norm": 3.2500116220033375, "learning_rate": 1.6075273243871367e-06, "loss": 0.5387, "step": 681 }, { "epoch": 0.31409573702541665, "grad_norm": 3.6728777135144837, "learning_rate": 1.606341202523579e-06, "loss": 0.5113, "step": 682 }, { "epoch": 0.3145562879594715, "grad_norm": 2.683705848328992, "learning_rate": 1.6051537301041812e-06, "loss": 0.5355, "step": 683 }, { "epoch": 0.3150168388935264, "grad_norm": 3.4297858809617363, "learning_rate": 1.6039649097739032e-06, "loss": 0.6449, "step": 684 }, { "epoch": 0.3154773898275812, "grad_norm": 3.292570386362054, "learning_rate": 1.602774744180706e-06, "loss": 0.6552, "step": 685 }, { "epoch": 0.3159379407616361, "grad_norm": 3.123629844208179, "learning_rate": 1.601583235975548e-06, "loss": 0.4843, "step": 686 }, { "epoch": 0.31639849169569095, "grad_norm": 3.1402599959393473, "learning_rate": 1.6003903878123782e-06, "loss": 0.5134, "step": 687 }, { "epoch": 0.31685904262974585, "grad_norm": 3.3270904677521873, "learning_rate": 1.599196202348129e-06, "loss": 0.5501, "step": 688 }, { "epoch": 0.3173195935638007, "grad_norm": 3.5015088481238577, "learning_rate": 1.5980006822427123e-06, "loss": 0.5634, "step": 689 }, { "epoch": 0.3177801444978556, "grad_norm": 3.08180457972679, "learning_rate": 1.5968038301590133e-06, "loss": 0.6374, "step": 690 }, { "epoch": 0.3182406954319104, "grad_norm": 3.2445020121060715, "learning_rate": 1.5956056487628827e-06, "loss": 0.6407, "step": 691 }, { "epoch": 0.3187012463659653, "grad_norm": 3.233197428352453, "learning_rate": 1.5944061407231338e-06, "loss": 0.706, "step": 692 }, { "epoch": 0.31916179730002014, "grad_norm": 3.280658309577868, "learning_rate": 1.593205308711533e-06, "loss": 0.5266, "step": 693 }, { "epoch": 0.31962234823407504, "grad_norm": 3.5140387469250736, "learning_rate": 1.5920031554027969e-06, "loss": 0.5574, "step": 694 }, { "epoch": 0.3200828991681299, "grad_norm": 3.42036786677391, "learning_rate": 1.590799683474585e-06, "loss": 0.6599, "step": 695 }, { "epoch": 0.3205434501021847, "grad_norm": 3.1846836901173075, "learning_rate": 1.5895948956074933e-06, "loss": 0.668, "step": 696 }, { "epoch": 0.3210040010362396, "grad_norm": 3.3566033229272967, "learning_rate": 1.5883887944850495e-06, "loss": 0.6669, "step": 697 }, { "epoch": 0.32146455197029444, "grad_norm": 3.2045770323594382, "learning_rate": 1.5871813827937063e-06, "loss": 0.5538, "step": 698 }, { "epoch": 0.32192510290434934, "grad_norm": 2.9902993069464565, "learning_rate": 1.5859726632228357e-06, "loss": 0.5938, "step": 699 }, { "epoch": 0.3223856538384042, "grad_norm": 3.0057175558399485, "learning_rate": 1.5847626384647221e-06, "loss": 0.5181, "step": 700 }, { "epoch": 0.32284620477245907, "grad_norm": 3.5222478856728254, "learning_rate": 1.583551311214558e-06, "loss": 0.5791, "step": 701 }, { "epoch": 0.3233067557065139, "grad_norm": 3.998127982109179, "learning_rate": 1.5823386841704362e-06, "loss": 0.4773, "step": 702 }, { "epoch": 0.3237673066405688, "grad_norm": 2.995949094547311, "learning_rate": 1.5811247600333456e-06, "loss": 0.5852, "step": 703 }, { "epoch": 0.32422785757462363, "grad_norm": 2.9705371769732425, "learning_rate": 1.5799095415071628e-06, "loss": 0.5957, "step": 704 }, { "epoch": 0.3246884085086785, "grad_norm": 2.6316896525372537, "learning_rate": 1.5786930312986495e-06, "loss": 0.4241, "step": 705 }, { "epoch": 0.32514895944273337, "grad_norm": 3.200708018638179, "learning_rate": 1.5774752321174427e-06, "loss": 0.5434, "step": 706 }, { "epoch": 0.32560951037678826, "grad_norm": 3.452466534810513, "learning_rate": 1.576256146676051e-06, "loss": 0.5766, "step": 707 }, { "epoch": 0.3260700613108431, "grad_norm": 2.8456835798060816, "learning_rate": 1.575035777689849e-06, "loss": 0.4522, "step": 708 }, { "epoch": 0.32653061224489793, "grad_norm": 3.593887134081324, "learning_rate": 1.5738141278770685e-06, "loss": 0.628, "step": 709 }, { "epoch": 0.3269911631789528, "grad_norm": 3.217566834694963, "learning_rate": 1.5725911999587953e-06, "loss": 0.594, "step": 710 }, { "epoch": 0.32745171411300766, "grad_norm": 3.0506131303862234, "learning_rate": 1.5713669966589618e-06, "loss": 0.6114, "step": 711 }, { "epoch": 0.32791226504706256, "grad_norm": 3.2672150522195254, "learning_rate": 1.5701415207043414e-06, "loss": 0.5329, "step": 712 }, { "epoch": 0.3283728159811174, "grad_norm": 3.0741601299943957, "learning_rate": 1.5689147748245412e-06, "loss": 0.5823, "step": 713 }, { "epoch": 0.3288333669151723, "grad_norm": 2.7741610627962974, "learning_rate": 1.5676867617519986e-06, "loss": 0.5868, "step": 714 }, { "epoch": 0.3292939178492271, "grad_norm": 3.3153530179766766, "learning_rate": 1.566457484221972e-06, "loss": 0.5851, "step": 715 }, { "epoch": 0.329754468783282, "grad_norm": 3.317026645580635, "learning_rate": 1.5652269449725374e-06, "loss": 0.6181, "step": 716 }, { "epoch": 0.33021501971733686, "grad_norm": 3.404975554795291, "learning_rate": 1.5639951467445798e-06, "loss": 0.6734, "step": 717 }, { "epoch": 0.33067557065139175, "grad_norm": 2.8122628566375933, "learning_rate": 1.5627620922817895e-06, "loss": 0.5022, "step": 718 }, { "epoch": 0.3311361215854466, "grad_norm": 2.8689456040300954, "learning_rate": 1.561527784330655e-06, "loss": 0.4525, "step": 719 }, { "epoch": 0.3315966725195015, "grad_norm": 3.1909349124276734, "learning_rate": 1.5602922256404556e-06, "loss": 0.6127, "step": 720 }, { "epoch": 0.3320572234535563, "grad_norm": 3.0736929753426776, "learning_rate": 1.559055418963258e-06, "loss": 0.5143, "step": 721 }, { "epoch": 0.33251777438761115, "grad_norm": 2.9374948837179518, "learning_rate": 1.557817367053908e-06, "loss": 0.4802, "step": 722 }, { "epoch": 0.33297832532166605, "grad_norm": 2.8350541529140316, "learning_rate": 1.5565780726700244e-06, "loss": 0.4546, "step": 723 }, { "epoch": 0.3334388762557209, "grad_norm": 2.9599261081932027, "learning_rate": 1.5553375385719943e-06, "loss": 0.5198, "step": 724 }, { "epoch": 0.3338994271897758, "grad_norm": 3.2317683594863715, "learning_rate": 1.5540957675229663e-06, "loss": 0.5195, "step": 725 }, { "epoch": 0.3343599781238306, "grad_norm": 3.296064700074342, "learning_rate": 1.5528527622888428e-06, "loss": 0.6044, "step": 726 }, { "epoch": 0.3348205290578855, "grad_norm": 2.863524627128321, "learning_rate": 1.5516085256382764e-06, "loss": 0.6144, "step": 727 }, { "epoch": 0.33528107999194035, "grad_norm": 3.0821132303925594, "learning_rate": 1.5503630603426628e-06, "loss": 0.576, "step": 728 }, { "epoch": 0.33574163092599524, "grad_norm": 2.9688429474981026, "learning_rate": 1.5491163691761334e-06, "loss": 0.5294, "step": 729 }, { "epoch": 0.3362021818600501, "grad_norm": 3.268176435044111, "learning_rate": 1.5478684549155507e-06, "loss": 0.523, "step": 730 }, { "epoch": 0.33666273279410497, "grad_norm": 3.3700516164655374, "learning_rate": 1.5466193203405015e-06, "loss": 0.6837, "step": 731 }, { "epoch": 0.3371232837281598, "grad_norm": 3.2319186451474646, "learning_rate": 1.5453689682332898e-06, "loss": 0.6385, "step": 732 }, { "epoch": 0.3375838346622147, "grad_norm": 3.4936427230146987, "learning_rate": 1.5441174013789326e-06, "loss": 0.6258, "step": 733 }, { "epoch": 0.33804438559626954, "grad_norm": 3.305223919072284, "learning_rate": 1.5428646225651525e-06, "loss": 0.5428, "step": 734 }, { "epoch": 0.3385049365303244, "grad_norm": 2.9963569062928777, "learning_rate": 1.5416106345823714e-06, "loss": 0.5679, "step": 735 }, { "epoch": 0.33896548746437927, "grad_norm": 3.3209457502261026, "learning_rate": 1.540355440223704e-06, "loss": 0.6125, "step": 736 }, { "epoch": 0.3394260383984341, "grad_norm": 3.304460716728442, "learning_rate": 1.5390990422849528e-06, "loss": 0.5128, "step": 737 }, { "epoch": 0.339886589332489, "grad_norm": 3.177691233283076, "learning_rate": 1.5378414435646008e-06, "loss": 0.6157, "step": 738 }, { "epoch": 0.34034714026654384, "grad_norm": 3.2484901797414687, "learning_rate": 1.5365826468638057e-06, "loss": 0.4804, "step": 739 }, { "epoch": 0.34080769120059873, "grad_norm": 2.97003236725624, "learning_rate": 1.5353226549863933e-06, "loss": 0.5174, "step": 740 }, { "epoch": 0.34126824213465357, "grad_norm": 3.0081534150970985, "learning_rate": 1.5340614707388516e-06, "loss": 0.5972, "step": 741 }, { "epoch": 0.34172879306870846, "grad_norm": 3.0537773804963044, "learning_rate": 1.5327990969303256e-06, "loss": 0.6268, "step": 742 }, { "epoch": 0.3421893440027633, "grad_norm": 2.4616414555847537, "learning_rate": 1.531535536372608e-06, "loss": 0.5428, "step": 743 }, { "epoch": 0.3426498949368182, "grad_norm": 3.459066350919476, "learning_rate": 1.5302707918801354e-06, "loss": 0.6306, "step": 744 }, { "epoch": 0.343110445870873, "grad_norm": 3.352692171370741, "learning_rate": 1.5290048662699828e-06, "loss": 0.5567, "step": 745 }, { "epoch": 0.3435709968049279, "grad_norm": 3.444376949244577, "learning_rate": 1.5277377623618546e-06, "loss": 0.587, "step": 746 }, { "epoch": 0.34403154773898276, "grad_norm": 3.018488954664052, "learning_rate": 1.5264694829780801e-06, "loss": 0.6234, "step": 747 }, { "epoch": 0.3444920986730376, "grad_norm": 4.059892549594848, "learning_rate": 1.5252000309436071e-06, "loss": 0.5284, "step": 748 }, { "epoch": 0.3449526496070925, "grad_norm": 3.1529595103252848, "learning_rate": 1.5239294090859948e-06, "loss": 0.5514, "step": 749 }, { "epoch": 0.3454132005411473, "grad_norm": 3.2294187071291938, "learning_rate": 1.522657620235409e-06, "loss": 0.5978, "step": 750 }, { "epoch": 0.3458737514752022, "grad_norm": 3.1490886609776685, "learning_rate": 1.5213846672246138e-06, "loss": 0.596, "step": 751 }, { "epoch": 0.34633430240925706, "grad_norm": 3.1168755025387362, "learning_rate": 1.5201105528889666e-06, "loss": 0.5168, "step": 752 }, { "epoch": 0.34679485334331195, "grad_norm": 3.054962204132803, "learning_rate": 1.5188352800664119e-06, "loss": 0.5459, "step": 753 }, { "epoch": 0.3472554042773668, "grad_norm": 3.575951938973164, "learning_rate": 1.5175588515974748e-06, "loss": 0.6461, "step": 754 }, { "epoch": 0.3477159552114217, "grad_norm": 2.8785648294557085, "learning_rate": 1.5162812703252537e-06, "loss": 0.4715, "step": 755 }, { "epoch": 0.3481765061454765, "grad_norm": 3.2534706235609585, "learning_rate": 1.5150025390954152e-06, "loss": 0.5632, "step": 756 }, { "epoch": 0.3486370570795314, "grad_norm": 3.0206088523549974, "learning_rate": 1.513722660756187e-06, "loss": 0.4888, "step": 757 }, { "epoch": 0.34909760801358625, "grad_norm": 2.7017314694335086, "learning_rate": 1.5124416381583517e-06, "loss": 0.4697, "step": 758 }, { "epoch": 0.34955815894764114, "grad_norm": 3.3514313878368625, "learning_rate": 1.5111594741552423e-06, "loss": 0.5044, "step": 759 }, { "epoch": 0.350018709881696, "grad_norm": 3.338106474247453, "learning_rate": 1.5098761716027315e-06, "loss": 0.5476, "step": 760 }, { "epoch": 0.3504792608157508, "grad_norm": 3.3207826162715666, "learning_rate": 1.5085917333592297e-06, "loss": 0.6685, "step": 761 }, { "epoch": 0.3509398117498057, "grad_norm": 2.882204620095301, "learning_rate": 1.5073061622856765e-06, "loss": 0.4214, "step": 762 }, { "epoch": 0.35140036268386055, "grad_norm": 3.2702426268023856, "learning_rate": 1.506019461245535e-06, "loss": 0.4785, "step": 763 }, { "epoch": 0.35186091361791544, "grad_norm": 3.393774716059176, "learning_rate": 1.5047316331047846e-06, "loss": 0.5249, "step": 764 }, { "epoch": 0.3523214645519703, "grad_norm": 2.626311070223108, "learning_rate": 1.5034426807319162e-06, "loss": 0.5107, "step": 765 }, { "epoch": 0.35278201548602517, "grad_norm": 3.0508858155806586, "learning_rate": 1.5021526069979232e-06, "loss": 0.6297, "step": 766 }, { "epoch": 0.35324256642008, "grad_norm": 3.2942945723575336, "learning_rate": 1.5008614147762982e-06, "loss": 0.6742, "step": 767 }, { "epoch": 0.3537031173541349, "grad_norm": 3.375703871690601, "learning_rate": 1.4995691069430244e-06, "loss": 0.6203, "step": 768 }, { "epoch": 0.35416366828818974, "grad_norm": 3.11233555593855, "learning_rate": 1.49827568637657e-06, "loss": 0.5992, "step": 769 }, { "epoch": 0.35462421922224463, "grad_norm": 2.8563952523571965, "learning_rate": 1.4969811559578818e-06, "loss": 0.4969, "step": 770 }, { "epoch": 0.35508477015629947, "grad_norm": 3.2356844981316235, "learning_rate": 1.4956855185703786e-06, "loss": 0.4214, "step": 771 }, { "epoch": 0.35554532109035436, "grad_norm": 2.8462493199493024, "learning_rate": 1.4943887770999447e-06, "loss": 0.5523, "step": 772 }, { "epoch": 0.3560058720244092, "grad_norm": 3.5665113209504105, "learning_rate": 1.493090934434924e-06, "loss": 0.5731, "step": 773 }, { "epoch": 0.35646642295846404, "grad_norm": 3.169353405800153, "learning_rate": 1.4917919934661128e-06, "loss": 0.5358, "step": 774 }, { "epoch": 0.35692697389251893, "grad_norm": 3.121133974442328, "learning_rate": 1.4904919570867539e-06, "loss": 0.5471, "step": 775 }, { "epoch": 0.35738752482657377, "grad_norm": 3.0928538679959243, "learning_rate": 1.4891908281925298e-06, "loss": 0.6502, "step": 776 }, { "epoch": 0.35784807576062866, "grad_norm": 3.2886205307157823, "learning_rate": 1.4878886096815569e-06, "loss": 0.5099, "step": 777 }, { "epoch": 0.3583086266946835, "grad_norm": 3.041026939521522, "learning_rate": 1.486585304454378e-06, "loss": 0.5957, "step": 778 }, { "epoch": 0.3587691776287384, "grad_norm": 2.8217805539112732, "learning_rate": 1.4852809154139576e-06, "loss": 0.5313, "step": 779 }, { "epoch": 0.3592297285627932, "grad_norm": 2.6440360065601145, "learning_rate": 1.4839754454656723e-06, "loss": 0.5048, "step": 780 }, { "epoch": 0.3596902794968481, "grad_norm": 3.4469745235083393, "learning_rate": 1.4826688975173084e-06, "loss": 0.6932, "step": 781 }, { "epoch": 0.36015083043090296, "grad_norm": 4.096492089448597, "learning_rate": 1.481361274479052e-06, "loss": 0.5383, "step": 782 }, { "epoch": 0.36061138136495785, "grad_norm": 2.9081019095353606, "learning_rate": 1.4800525792634838e-06, "loss": 0.5196, "step": 783 }, { "epoch": 0.3610719322990127, "grad_norm": 3.0363832332393073, "learning_rate": 1.4787428147855737e-06, "loss": 0.5606, "step": 784 }, { "epoch": 0.3615324832330676, "grad_norm": 3.404973081364874, "learning_rate": 1.4774319839626725e-06, "loss": 0.6067, "step": 785 }, { "epoch": 0.3619930341671224, "grad_norm": 2.9657594105232916, "learning_rate": 1.476120089714506e-06, "loss": 0.5427, "step": 786 }, { "epoch": 0.36245358510117726, "grad_norm": 3.6192624322683256, "learning_rate": 1.4748071349631693e-06, "loss": 0.6706, "step": 787 }, { "epoch": 0.36291413603523215, "grad_norm": 3.526460171368003, "learning_rate": 1.4734931226331188e-06, "loss": 0.5103, "step": 788 }, { "epoch": 0.363374686969287, "grad_norm": 2.8430807766055577, "learning_rate": 1.4721780556511674e-06, "loss": 0.4935, "step": 789 }, { "epoch": 0.3638352379033419, "grad_norm": 3.264151914199905, "learning_rate": 1.4708619369464765e-06, "loss": 0.6703, "step": 790 }, { "epoch": 0.3642957888373967, "grad_norm": 3.5948426136873843, "learning_rate": 1.469544769450551e-06, "loss": 0.5244, "step": 791 }, { "epoch": 0.3647563397714516, "grad_norm": 3.2156355781796018, "learning_rate": 1.46822655609723e-06, "loss": 0.5689, "step": 792 }, { "epoch": 0.36521689070550645, "grad_norm": 3.4654337540005002, "learning_rate": 1.4669072998226843e-06, "loss": 0.5605, "step": 793 }, { "epoch": 0.36567744163956134, "grad_norm": 3.0725623075599184, "learning_rate": 1.4655870035654065e-06, "loss": 0.6267, "step": 794 }, { "epoch": 0.3661379925736162, "grad_norm": 3.029931817827877, "learning_rate": 1.4642656702662058e-06, "loss": 0.6285, "step": 795 }, { "epoch": 0.36659854350767107, "grad_norm": 2.9315413167488558, "learning_rate": 1.4629433028682013e-06, "loss": 0.5949, "step": 796 }, { "epoch": 0.3670590944417259, "grad_norm": 3.151885439468786, "learning_rate": 1.4616199043168154e-06, "loss": 0.6758, "step": 797 }, { "epoch": 0.3675196453757808, "grad_norm": 3.0346115082411655, "learning_rate": 1.4602954775597673e-06, "loss": 0.5696, "step": 798 }, { "epoch": 0.36798019630983564, "grad_norm": 3.2157712771710827, "learning_rate": 1.458970025547067e-06, "loss": 0.6722, "step": 799 }, { "epoch": 0.36844074724389053, "grad_norm": 3.2513066677812748, "learning_rate": 1.457643551231007e-06, "loss": 0.6715, "step": 800 }, { "epoch": 0.36890129817794537, "grad_norm": 3.5891769122485653, "learning_rate": 1.456316057566158e-06, "loss": 0.6068, "step": 801 }, { "epoch": 0.3693618491120002, "grad_norm": 2.9833048436720655, "learning_rate": 1.45498754750936e-06, "loss": 0.5265, "step": 802 }, { "epoch": 0.3698224000460551, "grad_norm": 3.0143882511843754, "learning_rate": 1.453658024019718e-06, "loss": 0.6277, "step": 803 }, { "epoch": 0.37028295098010994, "grad_norm": 3.303125029561216, "learning_rate": 1.4523274900585942e-06, "loss": 0.5178, "step": 804 }, { "epoch": 0.37074350191416483, "grad_norm": 2.96600210169327, "learning_rate": 1.4509959485896004e-06, "loss": 0.6883, "step": 805 }, { "epoch": 0.37120405284821967, "grad_norm": 3.222533442523701, "learning_rate": 1.4496634025785937e-06, "loss": 0.5035, "step": 806 }, { "epoch": 0.37166460378227456, "grad_norm": 3.3343786199048027, "learning_rate": 1.4483298549936684e-06, "loss": 0.5683, "step": 807 }, { "epoch": 0.3721251547163294, "grad_norm": 3.194169003790268, "learning_rate": 1.4469953088051497e-06, "loss": 0.6087, "step": 808 }, { "epoch": 0.3725857056503843, "grad_norm": 3.1623966174805083, "learning_rate": 1.445659766985586e-06, "loss": 0.5334, "step": 809 }, { "epoch": 0.37304625658443913, "grad_norm": 2.9573679310798138, "learning_rate": 1.4443232325097454e-06, "loss": 0.4965, "step": 810 }, { "epoch": 0.373506807518494, "grad_norm": 3.1354387245216766, "learning_rate": 1.4429857083546053e-06, "loss": 0.4966, "step": 811 }, { "epoch": 0.37396735845254886, "grad_norm": 3.7718550261376045, "learning_rate": 1.4416471974993487e-06, "loss": 0.4793, "step": 812 }, { "epoch": 0.37442790938660375, "grad_norm": 3.0917131083646194, "learning_rate": 1.4403077029253553e-06, "loss": 0.6511, "step": 813 }, { "epoch": 0.3748884603206586, "grad_norm": 3.060066893764434, "learning_rate": 1.4389672276161963e-06, "loss": 0.4503, "step": 814 }, { "epoch": 0.3753490112547134, "grad_norm": 3.0880275565306006, "learning_rate": 1.4376257745576282e-06, "loss": 0.5234, "step": 815 }, { "epoch": 0.3758095621887683, "grad_norm": 3.3246972054582233, "learning_rate": 1.4362833467375836e-06, "loss": 0.5849, "step": 816 }, { "epoch": 0.37627011312282316, "grad_norm": 3.5727318566464263, "learning_rate": 1.4349399471461684e-06, "loss": 0.6084, "step": 817 }, { "epoch": 0.37673066405687805, "grad_norm": 3.688093227741573, "learning_rate": 1.4335955787756513e-06, "loss": 0.6864, "step": 818 }, { "epoch": 0.3771912149909329, "grad_norm": 3.6380575782207702, "learning_rate": 1.4322502446204592e-06, "loss": 0.6678, "step": 819 }, { "epoch": 0.3776517659249878, "grad_norm": 3.1559208653988633, "learning_rate": 1.4309039476771706e-06, "loss": 0.4839, "step": 820 }, { "epoch": 0.3781123168590426, "grad_norm": 3.7163228600713745, "learning_rate": 1.429556690944509e-06, "loss": 0.6348, "step": 821 }, { "epoch": 0.3785728677930975, "grad_norm": 2.9842451520879876, "learning_rate": 1.4282084774233338e-06, "loss": 0.4819, "step": 822 }, { "epoch": 0.37903341872715235, "grad_norm": 3.069211559831523, "learning_rate": 1.4268593101166378e-06, "loss": 0.6175, "step": 823 }, { "epoch": 0.37949396966120724, "grad_norm": 3.115770022996151, "learning_rate": 1.4255091920295367e-06, "loss": 0.6113, "step": 824 }, { "epoch": 0.3799545205952621, "grad_norm": 3.2472503472001484, "learning_rate": 1.4241581261692647e-06, "loss": 0.6686, "step": 825 }, { "epoch": 0.380415071529317, "grad_norm": 3.1742136266716314, "learning_rate": 1.422806115545167e-06, "loss": 0.6194, "step": 826 }, { "epoch": 0.3808756224633718, "grad_norm": 2.7416191430113788, "learning_rate": 1.4214531631686929e-06, "loss": 0.575, "step": 827 }, { "epoch": 0.38133617339742665, "grad_norm": 2.991715260469191, "learning_rate": 1.4200992720533886e-06, "loss": 0.6348, "step": 828 }, { "epoch": 0.38179672433148154, "grad_norm": 3.3536018044430143, "learning_rate": 1.4187444452148934e-06, "loss": 0.5707, "step": 829 }, { "epoch": 0.3822572752655364, "grad_norm": 2.9504178057835957, "learning_rate": 1.4173886856709288e-06, "loss": 0.5539, "step": 830 }, { "epoch": 0.38271782619959127, "grad_norm": 3.4467632526859826, "learning_rate": 1.416031996441294e-06, "loss": 0.5473, "step": 831 }, { "epoch": 0.3831783771336461, "grad_norm": 3.1022606765939877, "learning_rate": 1.4146743805478605e-06, "loss": 0.5181, "step": 832 }, { "epoch": 0.383638928067701, "grad_norm": 2.8177854685524575, "learning_rate": 1.413315841014562e-06, "loss": 0.5749, "step": 833 }, { "epoch": 0.38409947900175584, "grad_norm": 3.6263375137296614, "learning_rate": 1.4119563808673905e-06, "loss": 0.5913, "step": 834 }, { "epoch": 0.38456002993581073, "grad_norm": 3.0015961938916806, "learning_rate": 1.4105960031343889e-06, "loss": 0.5724, "step": 835 }, { "epoch": 0.38502058086986557, "grad_norm": 3.0678893744276357, "learning_rate": 1.4092347108456424e-06, "loss": 0.6605, "step": 836 }, { "epoch": 0.38548113180392046, "grad_norm": 2.899485221765189, "learning_rate": 1.4078725070332746e-06, "loss": 0.3908, "step": 837 }, { "epoch": 0.3859416827379753, "grad_norm": 3.085157334581907, "learning_rate": 1.4065093947314396e-06, "loss": 0.4573, "step": 838 }, { "epoch": 0.3864022336720302, "grad_norm": 3.4352911831634887, "learning_rate": 1.4051453769763143e-06, "loss": 0.6433, "step": 839 }, { "epoch": 0.38686278460608503, "grad_norm": 3.742434977201632, "learning_rate": 1.4037804568060919e-06, "loss": 0.5888, "step": 840 }, { "epoch": 0.38732333554013987, "grad_norm": 2.9975870813637586, "learning_rate": 1.402414637260977e-06, "loss": 0.5201, "step": 841 }, { "epoch": 0.38778388647419476, "grad_norm": 3.0225885866635367, "learning_rate": 1.4010479213831762e-06, "loss": 0.4794, "step": 842 }, { "epoch": 0.3882444374082496, "grad_norm": 3.2296706627603857, "learning_rate": 1.399680312216894e-06, "loss": 0.5355, "step": 843 }, { "epoch": 0.3887049883423045, "grad_norm": 3.0042503059260883, "learning_rate": 1.3983118128083234e-06, "loss": 0.5446, "step": 844 }, { "epoch": 0.38916553927635933, "grad_norm": 3.5187839319359684, "learning_rate": 1.3969424262056402e-06, "loss": 0.598, "step": 845 }, { "epoch": 0.3896260902104142, "grad_norm": 3.059136381267136, "learning_rate": 1.3955721554589975e-06, "loss": 0.621, "step": 846 }, { "epoch": 0.39008664114446906, "grad_norm": 3.186481313726005, "learning_rate": 1.3942010036205165e-06, "loss": 0.5483, "step": 847 }, { "epoch": 0.39054719207852395, "grad_norm": 3.3209014633626133, "learning_rate": 1.392828973744282e-06, "loss": 0.5836, "step": 848 }, { "epoch": 0.3910077430125788, "grad_norm": 3.3185866705706184, "learning_rate": 1.3914560688863336e-06, "loss": 0.4829, "step": 849 }, { "epoch": 0.3914682939466337, "grad_norm": 3.1927851100882862, "learning_rate": 1.39008229210466e-06, "loss": 0.5053, "step": 850 }, { "epoch": 0.3919288448806885, "grad_norm": 3.3595068103015473, "learning_rate": 1.3887076464591928e-06, "loss": 0.5338, "step": 851 }, { "epoch": 0.3923893958147434, "grad_norm": 2.7905012085301837, "learning_rate": 1.3873321350117981e-06, "loss": 0.4979, "step": 852 }, { "epoch": 0.39284994674879825, "grad_norm": 3.090347369854456, "learning_rate": 1.3859557608262705e-06, "loss": 0.5797, "step": 853 }, { "epoch": 0.3933104976828531, "grad_norm": 3.060994610172803, "learning_rate": 1.384578526968326e-06, "loss": 0.5184, "step": 854 }, { "epoch": 0.393771048616908, "grad_norm": 3.6607469509314914, "learning_rate": 1.3832004365055974e-06, "loss": 0.5485, "step": 855 }, { "epoch": 0.3942315995509628, "grad_norm": 2.7397062923925053, "learning_rate": 1.3818214925076223e-06, "loss": 0.4594, "step": 856 }, { "epoch": 0.3946921504850177, "grad_norm": 3.0413655649505262, "learning_rate": 1.380441698045842e-06, "loss": 0.495, "step": 857 }, { "epoch": 0.39515270141907255, "grad_norm": 3.4287955433322423, "learning_rate": 1.3790610561935911e-06, "loss": 0.645, "step": 858 }, { "epoch": 0.39561325235312744, "grad_norm": 3.3357893051930816, "learning_rate": 1.3776795700260915e-06, "loss": 0.6037, "step": 859 }, { "epoch": 0.3960738032871823, "grad_norm": 3.6164730160728413, "learning_rate": 1.3762972426204461e-06, "loss": 0.622, "step": 860 }, { "epoch": 0.3965343542212372, "grad_norm": 3.386533311875332, "learning_rate": 1.374914077055632e-06, "loss": 0.6871, "step": 861 }, { "epoch": 0.396994905155292, "grad_norm": 2.9410360813121352, "learning_rate": 1.3735300764124916e-06, "loss": 0.5102, "step": 862 }, { "epoch": 0.3974554560893469, "grad_norm": 3.103887613925395, "learning_rate": 1.3721452437737293e-06, "loss": 0.5432, "step": 863 }, { "epoch": 0.39791600702340174, "grad_norm": 3.517765292291276, "learning_rate": 1.3707595822239015e-06, "loss": 0.5159, "step": 864 }, { "epoch": 0.39837655795745663, "grad_norm": 3.043417914703796, "learning_rate": 1.3693730948494114e-06, "loss": 0.5317, "step": 865 }, { "epoch": 0.39883710889151147, "grad_norm": 3.338449707312485, "learning_rate": 1.3679857847385009e-06, "loss": 0.632, "step": 866 }, { "epoch": 0.3992976598255663, "grad_norm": 3.253152412711078, "learning_rate": 1.3665976549812452e-06, "loss": 0.5134, "step": 867 }, { "epoch": 0.3997582107596212, "grad_norm": 3.072410142653441, "learning_rate": 1.365208708669545e-06, "loss": 0.4643, "step": 868 }, { "epoch": 0.40021876169367604, "grad_norm": 3.525403495130597, "learning_rate": 1.36381894889712e-06, "loss": 0.5517, "step": 869 }, { "epoch": 0.40067931262773093, "grad_norm": 3.556527192054287, "learning_rate": 1.362428378759501e-06, "loss": 0.6111, "step": 870 }, { "epoch": 0.40113986356178577, "grad_norm": 3.1449463321315236, "learning_rate": 1.3610370013540247e-06, "loss": 0.5445, "step": 871 }, { "epoch": 0.40160041449584066, "grad_norm": 3.0202852656779577, "learning_rate": 1.3596448197798253e-06, "loss": 0.576, "step": 872 }, { "epoch": 0.4020609654298955, "grad_norm": 3.08760829256673, "learning_rate": 1.3582518371378282e-06, "loss": 0.6271, "step": 873 }, { "epoch": 0.4025215163639504, "grad_norm": 3.328960142744962, "learning_rate": 1.3568580565307436e-06, "loss": 0.5746, "step": 874 }, { "epoch": 0.40298206729800523, "grad_norm": 3.0333254368255136, "learning_rate": 1.355463481063059e-06, "loss": 0.5229, "step": 875 }, { "epoch": 0.4034426182320601, "grad_norm": 2.933573447144409, "learning_rate": 1.3540681138410314e-06, "loss": 0.5778, "step": 876 }, { "epoch": 0.40390316916611496, "grad_norm": 2.882788964082942, "learning_rate": 1.3526719579726829e-06, "loss": 0.5073, "step": 877 }, { "epoch": 0.40436372010016985, "grad_norm": 3.2365144628176568, "learning_rate": 1.3512750165677906e-06, "loss": 0.5211, "step": 878 }, { "epoch": 0.4048242710342247, "grad_norm": 3.38193990207538, "learning_rate": 1.3498772927378824e-06, "loss": 0.606, "step": 879 }, { "epoch": 0.40528482196827953, "grad_norm": 3.56597317135757, "learning_rate": 1.348478789596229e-06, "loss": 0.6701, "step": 880 }, { "epoch": 0.4057453729023344, "grad_norm": 3.324345046149201, "learning_rate": 1.3470795102578355e-06, "loss": 0.6522, "step": 881 }, { "epoch": 0.40620592383638926, "grad_norm": 3.340866287319074, "learning_rate": 1.3456794578394382e-06, "loss": 0.6136, "step": 882 }, { "epoch": 0.40666647477044415, "grad_norm": 3.131575960928237, "learning_rate": 1.3442786354594937e-06, "loss": 0.5138, "step": 883 }, { "epoch": 0.407127025704499, "grad_norm": 3.1323338349582306, "learning_rate": 1.3428770462381739e-06, "loss": 0.5023, "step": 884 }, { "epoch": 0.4075875766385539, "grad_norm": 2.8727768076877664, "learning_rate": 1.3414746932973583e-06, "loss": 0.5462, "step": 885 }, { "epoch": 0.4080481275726087, "grad_norm": 2.818312617541394, "learning_rate": 1.340071579760629e-06, "loss": 0.5003, "step": 886 }, { "epoch": 0.4085086785066636, "grad_norm": 2.8944571031714674, "learning_rate": 1.338667708753261e-06, "loss": 0.4793, "step": 887 }, { "epoch": 0.40896922944071845, "grad_norm": 3.0617046790599582, "learning_rate": 1.3372630834022165e-06, "loss": 0.5714, "step": 888 }, { "epoch": 0.40942978037477334, "grad_norm": 3.3240252994477477, "learning_rate": 1.3358577068361383e-06, "loss": 0.7543, "step": 889 }, { "epoch": 0.4098903313088282, "grad_norm": 3.387874616156958, "learning_rate": 1.3344515821853427e-06, "loss": 0.5331, "step": 890 }, { "epoch": 0.4103508822428831, "grad_norm": 2.9538858616900296, "learning_rate": 1.3330447125818114e-06, "loss": 0.5126, "step": 891 }, { "epoch": 0.4108114331769379, "grad_norm": 2.6905912340286045, "learning_rate": 1.331637101159186e-06, "loss": 0.6335, "step": 892 }, { "epoch": 0.41127198411099275, "grad_norm": 2.8234464497330842, "learning_rate": 1.3302287510527606e-06, "loss": 0.485, "step": 893 }, { "epoch": 0.41173253504504764, "grad_norm": 3.364736286801205, "learning_rate": 1.3288196653994742e-06, "loss": 0.6351, "step": 894 }, { "epoch": 0.4121930859791025, "grad_norm": 3.148255912278157, "learning_rate": 1.3274098473379041e-06, "loss": 0.501, "step": 895 }, { "epoch": 0.4126536369131574, "grad_norm": 2.9927281051396397, "learning_rate": 1.3259993000082597e-06, "loss": 0.6576, "step": 896 }, { "epoch": 0.4131141878472122, "grad_norm": 2.6304206915731387, "learning_rate": 1.3245880265523737e-06, "loss": 0.5005, "step": 897 }, { "epoch": 0.4135747387812671, "grad_norm": 3.033315201466835, "learning_rate": 1.3231760301136968e-06, "loss": 0.5032, "step": 898 }, { "epoch": 0.41403528971532194, "grad_norm": 3.153932562017399, "learning_rate": 1.32176331383729e-06, "loss": 0.6438, "step": 899 }, { "epoch": 0.41449584064937683, "grad_norm": 3.0569467140610564, "learning_rate": 1.3203498808698177e-06, "loss": 0.4582, "step": 900 }, { "epoch": 0.41495639158343167, "grad_norm": 3.4798136677368525, "learning_rate": 1.3189357343595405e-06, "loss": 0.4739, "step": 901 }, { "epoch": 0.41541694251748656, "grad_norm": 3.267066000558948, "learning_rate": 1.317520877456308e-06, "loss": 0.5333, "step": 902 }, { "epoch": 0.4158774934515414, "grad_norm": 3.0973720845505057, "learning_rate": 1.3161053133115534e-06, "loss": 0.6643, "step": 903 }, { "epoch": 0.4163380443855963, "grad_norm": 3.124046522546217, "learning_rate": 1.3146890450782833e-06, "loss": 0.6385, "step": 904 }, { "epoch": 0.41679859531965113, "grad_norm": 3.2564321361920348, "learning_rate": 1.3132720759110742e-06, "loss": 0.5914, "step": 905 }, { "epoch": 0.41725914625370597, "grad_norm": 3.5529506389797207, "learning_rate": 1.3118544089660632e-06, "loss": 0.5549, "step": 906 }, { "epoch": 0.41771969718776086, "grad_norm": 3.1088968115091458, "learning_rate": 1.3104360474009413e-06, "loss": 0.5016, "step": 907 }, { "epoch": 0.4181802481218157, "grad_norm": 2.961528786967664, "learning_rate": 1.3090169943749473e-06, "loss": 0.6016, "step": 908 }, { "epoch": 0.4186407990558706, "grad_norm": 2.9590563505493557, "learning_rate": 1.3075972530488601e-06, "loss": 0.6245, "step": 909 }, { "epoch": 0.41910134998992543, "grad_norm": 3.2817559385376502, "learning_rate": 1.306176826584991e-06, "loss": 0.5741, "step": 910 }, { "epoch": 0.4195619009239803, "grad_norm": 3.1851733205126456, "learning_rate": 1.3047557181471782e-06, "loss": 0.6102, "step": 911 }, { "epoch": 0.42002245185803516, "grad_norm": 3.3709380563412856, "learning_rate": 1.3033339309007782e-06, "loss": 0.6029, "step": 912 }, { "epoch": 0.42048300279209005, "grad_norm": 2.978439007428691, "learning_rate": 1.3019114680126607e-06, "loss": 0.6095, "step": 913 }, { "epoch": 0.4209435537261449, "grad_norm": 3.3731454043582545, "learning_rate": 1.3004883326511986e-06, "loss": 0.6838, "step": 914 }, { "epoch": 0.4214041046601998, "grad_norm": 3.3633712629594723, "learning_rate": 1.2990645279862637e-06, "loss": 0.637, "step": 915 }, { "epoch": 0.4218646555942546, "grad_norm": 3.3342371154036936, "learning_rate": 1.2976400571892187e-06, "loss": 0.5637, "step": 916 }, { "epoch": 0.4223252065283095, "grad_norm": 3.2152650834374885, "learning_rate": 1.2962149234329096e-06, "loss": 0.5651, "step": 917 }, { "epoch": 0.42278575746236435, "grad_norm": 3.728918503268583, "learning_rate": 1.2947891298916597e-06, "loss": 0.5591, "step": 918 }, { "epoch": 0.4232463083964192, "grad_norm": 3.4867704670014814, "learning_rate": 1.2933626797412601e-06, "loss": 0.4446, "step": 919 }, { "epoch": 0.4237068593304741, "grad_norm": 3.3645940418700717, "learning_rate": 1.2919355761589673e-06, "loss": 0.5977, "step": 920 }, { "epoch": 0.4241674102645289, "grad_norm": 3.165145021423808, "learning_rate": 1.2905078223234907e-06, "loss": 0.6094, "step": 921 }, { "epoch": 0.4246279611985838, "grad_norm": 3.1311304590118083, "learning_rate": 1.2890794214149895e-06, "loss": 0.5793, "step": 922 }, { "epoch": 0.42508851213263865, "grad_norm": 2.8231203010209462, "learning_rate": 1.2876503766150634e-06, "loss": 0.5225, "step": 923 }, { "epoch": 0.42554906306669354, "grad_norm": 3.268397607752156, "learning_rate": 1.2862206911067467e-06, "loss": 0.5686, "step": 924 }, { "epoch": 0.4260096140007484, "grad_norm": 3.305099301303672, "learning_rate": 1.2847903680745012e-06, "loss": 0.5445, "step": 925 }, { "epoch": 0.4264701649348033, "grad_norm": 3.3965795137188675, "learning_rate": 1.2833594107042075e-06, "loss": 0.6591, "step": 926 }, { "epoch": 0.4269307158688581, "grad_norm": 3.483867364296341, "learning_rate": 1.2819278221831604e-06, "loss": 0.6056, "step": 927 }, { "epoch": 0.427391266802913, "grad_norm": 3.4347389707625107, "learning_rate": 1.2804956057000597e-06, "loss": 0.5861, "step": 928 }, { "epoch": 0.42785181773696784, "grad_norm": 3.1912886523840545, "learning_rate": 1.2790627644450042e-06, "loss": 0.5413, "step": 929 }, { "epoch": 0.42831236867102274, "grad_norm": 3.277432951067786, "learning_rate": 1.2776293016094848e-06, "loss": 0.5728, "step": 930 }, { "epoch": 0.4287729196050776, "grad_norm": 3.1216436794666897, "learning_rate": 1.2761952203863758e-06, "loss": 0.5291, "step": 931 }, { "epoch": 0.4292334705391324, "grad_norm": 3.2701087953996315, "learning_rate": 1.2747605239699293e-06, "loss": 0.5777, "step": 932 }, { "epoch": 0.4296940214731873, "grad_norm": 2.703298534665295, "learning_rate": 1.2733252155557686e-06, "loss": 0.4752, "step": 933 }, { "epoch": 0.43015457240724214, "grad_norm": 3.5331916544052584, "learning_rate": 1.2718892983408787e-06, "loss": 0.5741, "step": 934 }, { "epoch": 0.43061512334129703, "grad_norm": 3.2776132030927196, "learning_rate": 1.270452775523602e-06, "loss": 0.6765, "step": 935 }, { "epoch": 0.43107567427535187, "grad_norm": 3.556572826939555, "learning_rate": 1.2690156503036288e-06, "loss": 0.5738, "step": 936 }, { "epoch": 0.43153622520940677, "grad_norm": 2.6845624221473003, "learning_rate": 1.2675779258819913e-06, "loss": 0.437, "step": 937 }, { "epoch": 0.4319967761434616, "grad_norm": 3.0987030708733965, "learning_rate": 1.2661396054610568e-06, "loss": 0.5322, "step": 938 }, { "epoch": 0.4324573270775165, "grad_norm": 2.69969210409678, "learning_rate": 1.2647006922445203e-06, "loss": 0.563, "step": 939 }, { "epoch": 0.43291787801157133, "grad_norm": 3.1443985858808112, "learning_rate": 1.2632611894373963e-06, "loss": 0.5224, "step": 940 }, { "epoch": 0.4333784289456262, "grad_norm": 3.041880247975588, "learning_rate": 1.2618211002460133e-06, "loss": 0.6286, "step": 941 }, { "epoch": 0.43383897987968106, "grad_norm": 3.1243004909346612, "learning_rate": 1.2603804278780054e-06, "loss": 0.4417, "step": 942 }, { "epoch": 0.43429953081373596, "grad_norm": 3.0394710034585044, "learning_rate": 1.2589391755423061e-06, "loss": 0.3974, "step": 943 }, { "epoch": 0.4347600817477908, "grad_norm": 2.9343561758404855, "learning_rate": 1.2574973464491406e-06, "loss": 0.5109, "step": 944 }, { "epoch": 0.43522063268184563, "grad_norm": 3.5301907527590837, "learning_rate": 1.2560549438100187e-06, "loss": 0.6818, "step": 945 }, { "epoch": 0.4356811836159005, "grad_norm": 3.098431035830371, "learning_rate": 1.2546119708377273e-06, "loss": 0.6054, "step": 946 }, { "epoch": 0.43614173454995536, "grad_norm": 3.0568020174713344, "learning_rate": 1.2531684307463243e-06, "loss": 0.5611, "step": 947 }, { "epoch": 0.43660228548401026, "grad_norm": 3.133249513429743, "learning_rate": 1.2517243267511308e-06, "loss": 0.5266, "step": 948 }, { "epoch": 0.4370628364180651, "grad_norm": 3.5046506132656656, "learning_rate": 1.2502796620687232e-06, "loss": 0.7523, "step": 949 }, { "epoch": 0.43752338735212, "grad_norm": 3.8415980756490913, "learning_rate": 1.2488344399169275e-06, "loss": 0.6561, "step": 950 }, { "epoch": 0.4379839382861748, "grad_norm": 3.04800030433528, "learning_rate": 1.2473886635148107e-06, "loss": 0.3969, "step": 951 }, { "epoch": 0.4384444892202297, "grad_norm": 2.9634130405670542, "learning_rate": 1.2459423360826753e-06, "loss": 0.5083, "step": 952 }, { "epoch": 0.43890504015428455, "grad_norm": 3.2286597169487496, "learning_rate": 1.2444954608420509e-06, "loss": 0.5204, "step": 953 }, { "epoch": 0.43936559108833945, "grad_norm": 3.072141673231512, "learning_rate": 1.2430480410156859e-06, "loss": 0.5266, "step": 954 }, { "epoch": 0.4398261420223943, "grad_norm": 3.1772864308017503, "learning_rate": 1.2416000798275434e-06, "loss": 0.5762, "step": 955 }, { "epoch": 0.4402866929564492, "grad_norm": 3.1044222947486766, "learning_rate": 1.2401515805027923e-06, "loss": 0.6046, "step": 956 }, { "epoch": 0.440747243890504, "grad_norm": 2.9714963515506674, "learning_rate": 1.2387025462677986e-06, "loss": 0.5226, "step": 957 }, { "epoch": 0.44120779482455885, "grad_norm": 3.6632065071396602, "learning_rate": 1.2372529803501212e-06, "loss": 0.6285, "step": 958 }, { "epoch": 0.44166834575861375, "grad_norm": 3.3335254391441236, "learning_rate": 1.2358028859785027e-06, "loss": 0.6491, "step": 959 }, { "epoch": 0.4421288966926686, "grad_norm": 3.2741227211236277, "learning_rate": 1.234352266382863e-06, "loss": 0.472, "step": 960 }, { "epoch": 0.4425894476267235, "grad_norm": 2.734950400129683, "learning_rate": 1.2329011247942913e-06, "loss": 0.4678, "step": 961 }, { "epoch": 0.4430499985607783, "grad_norm": 3.706836559033099, "learning_rate": 1.2314494644450405e-06, "loss": 0.572, "step": 962 }, { "epoch": 0.4435105494948332, "grad_norm": 3.191997067089114, "learning_rate": 1.2299972885685175e-06, "loss": 0.526, "step": 963 }, { "epoch": 0.44397110042888804, "grad_norm": 3.750204622573935, "learning_rate": 1.2285446003992794e-06, "loss": 0.6557, "step": 964 }, { "epoch": 0.44443165136294294, "grad_norm": 3.3390534313214086, "learning_rate": 1.2270914031730227e-06, "loss": 0.6844, "step": 965 }, { "epoch": 0.4448922022969978, "grad_norm": 3.4357388932326103, "learning_rate": 1.2256377001265782e-06, "loss": 0.589, "step": 966 }, { "epoch": 0.44535275323105267, "grad_norm": 2.9239348458649497, "learning_rate": 1.2241834944979043e-06, "loss": 0.5562, "step": 967 }, { "epoch": 0.4458133041651075, "grad_norm": 3.0247685003568656, "learning_rate": 1.2227287895260774e-06, "loss": 0.5027, "step": 968 }, { "epoch": 0.4462738550991624, "grad_norm": 3.4607078630570274, "learning_rate": 1.2212735884512873e-06, "loss": 0.5647, "step": 969 }, { "epoch": 0.44673440603321724, "grad_norm": 3.134258529274002, "learning_rate": 1.2198178945148284e-06, "loss": 0.6152, "step": 970 }, { "epoch": 0.4471949569672721, "grad_norm": 3.2437674513922143, "learning_rate": 1.2183617109590923e-06, "loss": 0.4777, "step": 971 }, { "epoch": 0.44765550790132697, "grad_norm": 2.9881905608566846, "learning_rate": 1.2169050410275617e-06, "loss": 0.5202, "step": 972 }, { "epoch": 0.4481160588353818, "grad_norm": 3.186446601937435, "learning_rate": 1.2154478879648034e-06, "loss": 0.5035, "step": 973 }, { "epoch": 0.4485766097694367, "grad_norm": 3.5679383322300495, "learning_rate": 1.213990255016459e-06, "loss": 0.6799, "step": 974 }, { "epoch": 0.44903716070349153, "grad_norm": 3.3889885780961526, "learning_rate": 1.2125321454292397e-06, "loss": 0.638, "step": 975 }, { "epoch": 0.4494977116375464, "grad_norm": 3.0589827390964848, "learning_rate": 1.2110735624509184e-06, "loss": 0.6329, "step": 976 }, { "epoch": 0.44995826257160126, "grad_norm": 3.2178815155136067, "learning_rate": 1.2096145093303215e-06, "loss": 0.5451, "step": 977 }, { "epoch": 0.45041881350565616, "grad_norm": 3.1650611316618398, "learning_rate": 1.2081549893173244e-06, "loss": 0.5188, "step": 978 }, { "epoch": 0.450879364439711, "grad_norm": 2.7477870679696226, "learning_rate": 1.206695005662841e-06, "loss": 0.4523, "step": 979 }, { "epoch": 0.4513399153737659, "grad_norm": 2.9102863855372747, "learning_rate": 1.2052345616188177e-06, "loss": 0.5282, "step": 980 }, { "epoch": 0.4518004663078207, "grad_norm": 3.1966574178250116, "learning_rate": 1.2037736604382277e-06, "loss": 0.5015, "step": 981 }, { "epoch": 0.4522610172418756, "grad_norm": 2.89336588474616, "learning_rate": 1.2023123053750613e-06, "loss": 0.5461, "step": 982 }, { "epoch": 0.45272156817593046, "grad_norm": 3.832342329803752, "learning_rate": 1.2008504996843206e-06, "loss": 0.6394, "step": 983 }, { "epoch": 0.4531821191099853, "grad_norm": 3.4482291994471086, "learning_rate": 1.1993882466220102e-06, "loss": 0.6259, "step": 984 }, { "epoch": 0.4536426700440402, "grad_norm": 3.611548614335293, "learning_rate": 1.1979255494451326e-06, "loss": 0.6066, "step": 985 }, { "epoch": 0.454103220978095, "grad_norm": 2.7453352434114713, "learning_rate": 1.1964624114116784e-06, "loss": 0.6955, "step": 986 }, { "epoch": 0.4545637719121499, "grad_norm": 3.075608198924997, "learning_rate": 1.194998835780621e-06, "loss": 0.6155, "step": 987 }, { "epoch": 0.45502432284620475, "grad_norm": 3.1080551036437654, "learning_rate": 1.1935348258119083e-06, "loss": 0.578, "step": 988 }, { "epoch": 0.45548487378025965, "grad_norm": 3.2505311952038234, "learning_rate": 1.1920703847664546e-06, "loss": 0.535, "step": 989 }, { "epoch": 0.4559454247143145, "grad_norm": 3.0544071466547336, "learning_rate": 1.190605515906136e-06, "loss": 0.4539, "step": 990 }, { "epoch": 0.4564059756483694, "grad_norm": 2.9834516516156366, "learning_rate": 1.1891402224937804e-06, "loss": 0.5145, "step": 991 }, { "epoch": 0.4568665265824242, "grad_norm": 3.2098618470017963, "learning_rate": 1.1876745077931617e-06, "loss": 0.4956, "step": 992 }, { "epoch": 0.4573270775164791, "grad_norm": 3.00024716203184, "learning_rate": 1.1862083750689923e-06, "loss": 0.5351, "step": 993 }, { "epoch": 0.45778762845053395, "grad_norm": 3.084747400551803, "learning_rate": 1.1847418275869151e-06, "loss": 0.5766, "step": 994 }, { "epoch": 0.45824817938458884, "grad_norm": 3.04028326868723, "learning_rate": 1.183274868613498e-06, "loss": 0.6411, "step": 995 }, { "epoch": 0.4587087303186437, "grad_norm": 2.5418869985884354, "learning_rate": 1.181807501416224e-06, "loss": 0.4326, "step": 996 }, { "epoch": 0.4591692812526985, "grad_norm": 3.5975504622299534, "learning_rate": 1.1803397292634867e-06, "loss": 0.6207, "step": 997 }, { "epoch": 0.4596298321867534, "grad_norm": 3.385303810357531, "learning_rate": 1.1788715554245807e-06, "loss": 0.6074, "step": 998 }, { "epoch": 0.46009038312080824, "grad_norm": 3.0211329657757005, "learning_rate": 1.1774029831696955e-06, "loss": 0.6681, "step": 999 }, { "epoch": 0.46055093405486314, "grad_norm": 3.055209977773643, "learning_rate": 1.1759340157699088e-06, "loss": 0.5646, "step": 1000 }, { "epoch": 0.461011484988918, "grad_norm": 3.1014875109105753, "learning_rate": 1.1744646564971777e-06, "loss": 0.5598, "step": 1001 }, { "epoch": 0.46147203592297287, "grad_norm": 3.256357038090698, "learning_rate": 1.1729949086243319e-06, "loss": 0.6722, "step": 1002 }, { "epoch": 0.4619325868570277, "grad_norm": 2.917952076658023, "learning_rate": 1.1715247754250673e-06, "loss": 0.4466, "step": 1003 }, { "epoch": 0.4623931377910826, "grad_norm": 2.7874434854895207, "learning_rate": 1.1700542601739381e-06, "loss": 0.5989, "step": 1004 }, { "epoch": 0.46285368872513744, "grad_norm": 3.0402556453953546, "learning_rate": 1.1685833661463488e-06, "loss": 0.4839, "step": 1005 }, { "epoch": 0.46331423965919233, "grad_norm": 3.544520931422804, "learning_rate": 1.1671120966185484e-06, "loss": 0.5576, "step": 1006 }, { "epoch": 0.46377479059324717, "grad_norm": 3.3715770705932204, "learning_rate": 1.1656404548676219e-06, "loss": 0.595, "step": 1007 }, { "epoch": 0.46423534152730206, "grad_norm": 3.07379805336765, "learning_rate": 1.1641684441714828e-06, "loss": 0.5866, "step": 1008 }, { "epoch": 0.4646958924613569, "grad_norm": 3.2769110964863413, "learning_rate": 1.1626960678088677e-06, "loss": 0.4229, "step": 1009 }, { "epoch": 0.46515644339541173, "grad_norm": 3.231165225029422, "learning_rate": 1.1612233290593264e-06, "loss": 0.4908, "step": 1010 }, { "epoch": 0.4656169943294666, "grad_norm": 2.6717664603336164, "learning_rate": 1.1597502312032168e-06, "loss": 0.5189, "step": 1011 }, { "epoch": 0.46607754526352146, "grad_norm": 3.5412588764816575, "learning_rate": 1.158276777521696e-06, "loss": 0.5227, "step": 1012 }, { "epoch": 0.46653809619757636, "grad_norm": 3.367681094901886, "learning_rate": 1.1568029712967137e-06, "loss": 0.4772, "step": 1013 }, { "epoch": 0.4669986471316312, "grad_norm": 2.9896273143544185, "learning_rate": 1.1553288158110057e-06, "loss": 0.6199, "step": 1014 }, { "epoch": 0.4674591980656861, "grad_norm": 3.5056640416959346, "learning_rate": 1.153854314348085e-06, "loss": 0.507, "step": 1015 }, { "epoch": 0.4679197489997409, "grad_norm": 2.8729584735763694, "learning_rate": 1.152379470192235e-06, "loss": 0.5681, "step": 1016 }, { "epoch": 0.4683802999337958, "grad_norm": 3.1311855689183608, "learning_rate": 1.1509042866285028e-06, "loss": 0.5204, "step": 1017 }, { "epoch": 0.46884085086785066, "grad_norm": 3.6303555087991493, "learning_rate": 1.149428766942692e-06, "loss": 0.5597, "step": 1018 }, { "epoch": 0.46930140180190555, "grad_norm": 3.0256164571238795, "learning_rate": 1.1479529144213537e-06, "loss": 0.5227, "step": 1019 }, { "epoch": 0.4697619527359604, "grad_norm": 2.7202389363979016, "learning_rate": 1.1464767323517813e-06, "loss": 0.3788, "step": 1020 }, { "epoch": 0.4702225036700153, "grad_norm": 2.9201338750532893, "learning_rate": 1.145000224022002e-06, "loss": 0.4937, "step": 1021 }, { "epoch": 0.4706830546040701, "grad_norm": 3.387209727218353, "learning_rate": 1.143523392720769e-06, "loss": 0.5268, "step": 1022 }, { "epoch": 0.47114360553812495, "grad_norm": 2.8811446548900124, "learning_rate": 1.1420462417375562e-06, "loss": 0.4288, "step": 1023 }, { "epoch": 0.47160415647217985, "grad_norm": 3.416259827843531, "learning_rate": 1.140568774362549e-06, "loss": 0.5689, "step": 1024 }, { "epoch": 0.4720647074062347, "grad_norm": 3.1462397361747043, "learning_rate": 1.1390909938866367e-06, "loss": 0.5057, "step": 1025 }, { "epoch": 0.4725252583402896, "grad_norm": 3.064526836533319, "learning_rate": 1.137612903601407e-06, "loss": 0.4729, "step": 1026 }, { "epoch": 0.4729858092743444, "grad_norm": 3.451553189665636, "learning_rate": 1.1361345067991375e-06, "loss": 0.7637, "step": 1027 }, { "epoch": 0.4734463602083993, "grad_norm": 2.8877618039147515, "learning_rate": 1.134655806772788e-06, "loss": 0.4614, "step": 1028 }, { "epoch": 0.47390691114245415, "grad_norm": 3.253794710465501, "learning_rate": 1.1331768068159946e-06, "loss": 0.6915, "step": 1029 }, { "epoch": 0.47436746207650904, "grad_norm": 3.6020230395836617, "learning_rate": 1.1316975102230604e-06, "loss": 0.5978, "step": 1030 }, { "epoch": 0.4748280130105639, "grad_norm": 3.2619527720299444, "learning_rate": 1.1302179202889505e-06, "loss": 0.5066, "step": 1031 }, { "epoch": 0.47528856394461877, "grad_norm": 3.2521154822577687, "learning_rate": 1.1287380403092816e-06, "loss": 0.5423, "step": 1032 }, { "epoch": 0.4757491148786736, "grad_norm": 2.851595035378972, "learning_rate": 1.127257873580318e-06, "loss": 0.493, "step": 1033 }, { "epoch": 0.4762096658127285, "grad_norm": 2.813869134619398, "learning_rate": 1.1257774233989623e-06, "loss": 0.4491, "step": 1034 }, { "epoch": 0.47667021674678334, "grad_norm": 2.8702386818101977, "learning_rate": 1.1242966930627484e-06, "loss": 0.4677, "step": 1035 }, { "epoch": 0.47713076768083823, "grad_norm": 2.9773792194983235, "learning_rate": 1.1228156858698343e-06, "loss": 0.4339, "step": 1036 }, { "epoch": 0.47759131861489307, "grad_norm": 2.936870179141437, "learning_rate": 1.1213344051189939e-06, "loss": 0.5943, "step": 1037 }, { "epoch": 0.4780518695489479, "grad_norm": 3.305259733776011, "learning_rate": 1.1198528541096115e-06, "loss": 0.4593, "step": 1038 }, { "epoch": 0.4785124204830028, "grad_norm": 3.36680958646396, "learning_rate": 1.1183710361416727e-06, "loss": 0.7228, "step": 1039 }, { "epoch": 0.47897297141705764, "grad_norm": 3.2573238811104486, "learning_rate": 1.1168889545157582e-06, "loss": 0.5007, "step": 1040 }, { "epoch": 0.47943352235111253, "grad_norm": 2.9011246681530043, "learning_rate": 1.1154066125330357e-06, "loss": 0.5315, "step": 1041 }, { "epoch": 0.47989407328516737, "grad_norm": 2.981185948614369, "learning_rate": 1.1139240134952523e-06, "loss": 0.5441, "step": 1042 }, { "epoch": 0.48035462421922226, "grad_norm": 3.255149727332359, "learning_rate": 1.1124411607047288e-06, "loss": 0.5446, "step": 1043 }, { "epoch": 0.4808151751532771, "grad_norm": 3.1280648749420132, "learning_rate": 1.1109580574643503e-06, "loss": 0.5637, "step": 1044 }, { "epoch": 0.481275726087332, "grad_norm": 3.124825859884878, "learning_rate": 1.10947470707756e-06, "loss": 0.4994, "step": 1045 }, { "epoch": 0.4817362770213868, "grad_norm": 3.66113588450651, "learning_rate": 1.107991112848352e-06, "loss": 0.5917, "step": 1046 }, { "epoch": 0.4821968279554417, "grad_norm": 3.1021700535523298, "learning_rate": 1.1065072780812625e-06, "loss": 0.5356, "step": 1047 }, { "epoch": 0.48265737888949656, "grad_norm": 3.3696723546575194, "learning_rate": 1.1050232060813644e-06, "loss": 0.5811, "step": 1048 }, { "epoch": 0.48311792982355145, "grad_norm": 3.0818897055953434, "learning_rate": 1.1035389001542595e-06, "loss": 0.6459, "step": 1049 }, { "epoch": 0.4835784807576063, "grad_norm": 3.024745793339092, "learning_rate": 1.1020543636060683e-06, "loss": 0.5107, "step": 1050 }, { "epoch": 0.4840390316916611, "grad_norm": 3.2367103732823774, "learning_rate": 1.100569599743428e-06, "loss": 0.5783, "step": 1051 }, { "epoch": 0.484499582625716, "grad_norm": 3.2724898116979797, "learning_rate": 1.09908461187348e-06, "loss": 0.6343, "step": 1052 }, { "epoch": 0.48496013355977086, "grad_norm": 2.6886278432787836, "learning_rate": 1.0975994033038656e-06, "loss": 0.5134, "step": 1053 }, { "epoch": 0.48542068449382575, "grad_norm": 3.5676405092063175, "learning_rate": 1.0961139773427171e-06, "loss": 0.6208, "step": 1054 }, { "epoch": 0.4858812354278806, "grad_norm": 2.8882955010908598, "learning_rate": 1.0946283372986516e-06, "loss": 0.5888, "step": 1055 }, { "epoch": 0.4863417863619355, "grad_norm": 3.5114513574479087, "learning_rate": 1.0931424864807623e-06, "loss": 0.57, "step": 1056 }, { "epoch": 0.4868023372959903, "grad_norm": 3.0588259590216236, "learning_rate": 1.0916564281986133e-06, "loss": 0.5229, "step": 1057 }, { "epoch": 0.4872628882300452, "grad_norm": 3.0749972740879348, "learning_rate": 1.0901701657622291e-06, "loss": 0.5475, "step": 1058 }, { "epoch": 0.48772343916410005, "grad_norm": 3.6869592539698215, "learning_rate": 1.0886837024820897e-06, "loss": 0.6199, "step": 1059 }, { "epoch": 0.48818399009815494, "grad_norm": 3.3785618023616797, "learning_rate": 1.0871970416691227e-06, "loss": 0.6568, "step": 1060 }, { "epoch": 0.4886445410322098, "grad_norm": 3.2577480072609193, "learning_rate": 1.085710186634695e-06, "loss": 0.6063, "step": 1061 }, { "epoch": 0.48910509196626467, "grad_norm": 2.930851386132042, "learning_rate": 1.0842231406906076e-06, "loss": 0.5489, "step": 1062 }, { "epoch": 0.4895656429003195, "grad_norm": 3.056596065641448, "learning_rate": 1.0827359071490845e-06, "loss": 0.5766, "step": 1063 }, { "epoch": 0.49002619383437435, "grad_norm": 3.3438308914593686, "learning_rate": 1.0812484893227688e-06, "loss": 0.5343, "step": 1064 }, { "epoch": 0.49048674476842924, "grad_norm": 3.2170156199513684, "learning_rate": 1.079760890524715e-06, "loss": 0.4873, "step": 1065 }, { "epoch": 0.4909472957024841, "grad_norm": 3.3949566731113276, "learning_rate": 1.0782731140683784e-06, "loss": 0.6256, "step": 1066 }, { "epoch": 0.49140784663653897, "grad_norm": 3.8201701220980815, "learning_rate": 1.0767851632676119e-06, "loss": 0.5645, "step": 1067 }, { "epoch": 0.4918683975705938, "grad_norm": 3.321842231119507, "learning_rate": 1.0752970414366561e-06, "loss": 0.5429, "step": 1068 }, { "epoch": 0.4923289485046487, "grad_norm": 3.1614648927797493, "learning_rate": 1.0738087518901326e-06, "loss": 0.578, "step": 1069 }, { "epoch": 0.49278949943870354, "grad_norm": 3.1744346557606957, "learning_rate": 1.0723202979430364e-06, "loss": 0.5857, "step": 1070 }, { "epoch": 0.49325005037275843, "grad_norm": 3.0752420415036132, "learning_rate": 1.0708316829107293e-06, "loss": 0.4874, "step": 1071 }, { "epoch": 0.49371060130681327, "grad_norm": 3.139920581664928, "learning_rate": 1.0693429101089306e-06, "loss": 0.5561, "step": 1072 }, { "epoch": 0.49417115224086816, "grad_norm": 3.3471269540196786, "learning_rate": 1.0678539828537123e-06, "loss": 0.4969, "step": 1073 }, { "epoch": 0.494631703174923, "grad_norm": 3.026522963165782, "learning_rate": 1.06636490446149e-06, "loss": 0.5516, "step": 1074 }, { "epoch": 0.4950922541089779, "grad_norm": 2.803007358195874, "learning_rate": 1.064875678249016e-06, "loss": 0.5154, "step": 1075 }, { "epoch": 0.49555280504303273, "grad_norm": 3.5445642741425165, "learning_rate": 1.0633863075333712e-06, "loss": 0.6616, "step": 1076 }, { "epoch": 0.49601335597708757, "grad_norm": 3.3956547555358725, "learning_rate": 1.0618967956319595e-06, "loss": 0.6653, "step": 1077 }, { "epoch": 0.49647390691114246, "grad_norm": 3.236674834459769, "learning_rate": 1.0604071458624985e-06, "loss": 0.5443, "step": 1078 }, { "epoch": 0.4969344578451973, "grad_norm": 2.943577612886821, "learning_rate": 1.058917361543013e-06, "loss": 0.6144, "step": 1079 }, { "epoch": 0.4973950087792522, "grad_norm": 3.789273872300937, "learning_rate": 1.0574274459918279e-06, "loss": 0.5742, "step": 1080 }, { "epoch": 0.497855559713307, "grad_norm": 3.4217054874372423, "learning_rate": 1.0559374025275595e-06, "loss": 0.5328, "step": 1081 }, { "epoch": 0.4983161106473619, "grad_norm": 3.1895741231305252, "learning_rate": 1.0544472344691102e-06, "loss": 0.6774, "step": 1082 }, { "epoch": 0.49877666158141676, "grad_norm": 3.2720121537966347, "learning_rate": 1.0529569451356586e-06, "loss": 0.5828, "step": 1083 }, { "epoch": 0.49923721251547165, "grad_norm": 3.404120717189837, "learning_rate": 1.051466537846655e-06, "loss": 0.6939, "step": 1084 }, { "epoch": 0.4996977634495265, "grad_norm": 3.3671005760273323, "learning_rate": 1.049976015921811e-06, "loss": 0.6451, "step": 1085 }, { "epoch": 0.5001583143835814, "grad_norm": 3.529477019557084, "learning_rate": 1.048485382681094e-06, "loss": 0.7398, "step": 1086 }, { "epoch": 0.5006188653176362, "grad_norm": 2.985478155983292, "learning_rate": 1.0469946414447196e-06, "loss": 0.4825, "step": 1087 }, { "epoch": 0.5010794162516911, "grad_norm": 3.257171883150608, "learning_rate": 1.0455037955331447e-06, "loss": 0.4787, "step": 1088 }, { "epoch": 0.5015399671857459, "grad_norm": 2.9340259115329226, "learning_rate": 1.0440128482670569e-06, "loss": 0.5687, "step": 1089 }, { "epoch": 0.5020005181198008, "grad_norm": 3.7349893867163773, "learning_rate": 1.0425218029673718e-06, "loss": 0.502, "step": 1090 }, { "epoch": 0.5024610690538557, "grad_norm": 3.1463328721295714, "learning_rate": 1.0410306629552231e-06, "loss": 0.5196, "step": 1091 }, { "epoch": 0.5029216199879105, "grad_norm": 3.2092568851256007, "learning_rate": 1.0395394315519541e-06, "loss": 0.5733, "step": 1092 }, { "epoch": 0.5033821709219654, "grad_norm": 3.079164349910014, "learning_rate": 1.0380481120791136e-06, "loss": 0.5044, "step": 1093 }, { "epoch": 0.5038427218560203, "grad_norm": 2.701558803955672, "learning_rate": 1.036556707858445e-06, "loss": 0.4957, "step": 1094 }, { "epoch": 0.5043032727900751, "grad_norm": 3.1632512432778226, "learning_rate": 1.0350652222118807e-06, "loss": 0.5624, "step": 1095 }, { "epoch": 0.50476382372413, "grad_norm": 2.872004805465866, "learning_rate": 1.0335736584615356e-06, "loss": 0.5077, "step": 1096 }, { "epoch": 0.5052243746581848, "grad_norm": 3.038715190759613, "learning_rate": 1.0320820199296974e-06, "loss": 0.487, "step": 1097 }, { "epoch": 0.5056849255922398, "grad_norm": 3.1237869179473594, "learning_rate": 1.0305903099388202e-06, "loss": 0.418, "step": 1098 }, { "epoch": 0.5061454765262946, "grad_norm": 2.9307402726415757, "learning_rate": 1.0290985318115184e-06, "loss": 0.5496, "step": 1099 }, { "epoch": 0.5066060274603494, "grad_norm": 3.3590679344076193, "learning_rate": 1.0276066888705574e-06, "loss": 0.6662, "step": 1100 }, { "epoch": 0.5070665783944043, "grad_norm": 3.0656098416878033, "learning_rate": 1.0261147844388472e-06, "loss": 0.5917, "step": 1101 }, { "epoch": 0.5075271293284591, "grad_norm": 3.082634007004262, "learning_rate": 1.0246228218394346e-06, "loss": 0.5372, "step": 1102 }, { "epoch": 0.5079876802625141, "grad_norm": 3.5524616274647567, "learning_rate": 1.023130804395496e-06, "loss": 0.5913, "step": 1103 }, { "epoch": 0.5084482311965689, "grad_norm": 2.9986863183328696, "learning_rate": 1.0216387354303295e-06, "loss": 0.4397, "step": 1104 }, { "epoch": 0.5089087821306237, "grad_norm": 2.613344675102279, "learning_rate": 1.0201466182673498e-06, "loss": 0.4783, "step": 1105 }, { "epoch": 0.5093693330646786, "grad_norm": 3.2217947984483, "learning_rate": 1.0186544562300764e-06, "loss": 0.5062, "step": 1106 }, { "epoch": 0.5098298839987335, "grad_norm": 2.9440216394401904, "learning_rate": 1.0171622526421304e-06, "loss": 0.6022, "step": 1107 }, { "epoch": 0.5102904349327884, "grad_norm": 2.7773735259088257, "learning_rate": 1.0156700108272252e-06, "loss": 0.6232, "step": 1108 }, { "epoch": 0.5107509858668432, "grad_norm": 2.941806533194381, "learning_rate": 1.0141777341091587e-06, "loss": 0.4926, "step": 1109 }, { "epoch": 0.511211536800898, "grad_norm": 3.548025228777574, "learning_rate": 1.0126854258118074e-06, "loss": 0.5843, "step": 1110 }, { "epoch": 0.511672087734953, "grad_norm": 3.275863911485487, "learning_rate": 1.011193089259118e-06, "loss": 0.4883, "step": 1111 }, { "epoch": 0.5121326386690078, "grad_norm": 3.0059327106969653, "learning_rate": 1.009700727775099e-06, "loss": 0.5342, "step": 1112 }, { "epoch": 0.5125931896030627, "grad_norm": 3.0101617328590278, "learning_rate": 1.008208344683816e-06, "loss": 0.4742, "step": 1113 }, { "epoch": 0.5130537405371175, "grad_norm": 3.752871827480606, "learning_rate": 1.0067159433093815e-06, "loss": 0.605, "step": 1114 }, { "epoch": 0.5135142914711723, "grad_norm": 3.3469797895498843, "learning_rate": 1.00522352697595e-06, "loss": 0.681, "step": 1115 }, { "epoch": 0.5139748424052273, "grad_norm": 2.6263493290364246, "learning_rate": 1.003731099007708e-06, "loss": 0.4897, "step": 1116 }, { "epoch": 0.5144353933392821, "grad_norm": 2.9725521268454003, "learning_rate": 1.002238662728869e-06, "loss": 0.5346, "step": 1117 }, { "epoch": 0.514895944273337, "grad_norm": 3.049153105281882, "learning_rate": 1.000746221463664e-06, "loss": 0.5959, "step": 1118 }, { "epoch": 0.5153564952073918, "grad_norm": 2.7288089533635334, "learning_rate": 9.992537785363361e-07, "loss": 0.4807, "step": 1119 }, { "epoch": 0.5158170461414467, "grad_norm": 3.0318291312947667, "learning_rate": 9.977613372711308e-07, "loss": 0.5651, "step": 1120 }, { "epoch": 0.5162775970755016, "grad_norm": 2.6295894746203583, "learning_rate": 9.962689009922918e-07, "loss": 0.5055, "step": 1121 }, { "epoch": 0.5167381480095564, "grad_norm": 3.1101382519689285, "learning_rate": 9.947764730240501e-07, "loss": 0.5315, "step": 1122 }, { "epoch": 0.5171986989436113, "grad_norm": 3.019675975546636, "learning_rate": 9.932840566906184e-07, "loss": 0.5095, "step": 1123 }, { "epoch": 0.5176592498776662, "grad_norm": 2.8010710502034897, "learning_rate": 9.917916553161841e-07, "loss": 0.4929, "step": 1124 }, { "epoch": 0.518119800811721, "grad_norm": 3.0678462211009436, "learning_rate": 9.90299272224901e-07, "loss": 0.5167, "step": 1125 }, { "epoch": 0.5185803517457759, "grad_norm": 3.134942537587258, "learning_rate": 9.888069107408824e-07, "loss": 0.5602, "step": 1126 }, { "epoch": 0.5190409026798307, "grad_norm": 2.971903282768269, "learning_rate": 9.873145741881927e-07, "loss": 0.6232, "step": 1127 }, { "epoch": 0.5195014536138856, "grad_norm": 3.402808011964365, "learning_rate": 9.858222658908412e-07, "loss": 0.6225, "step": 1128 }, { "epoch": 0.5199620045479405, "grad_norm": 3.0789459137502644, "learning_rate": 9.84329989172775e-07, "loss": 0.6397, "step": 1129 }, { "epoch": 0.5204225554819953, "grad_norm": 2.9406870784619428, "learning_rate": 9.828377473578697e-07, "loss": 0.5466, "step": 1130 }, { "epoch": 0.5208831064160502, "grad_norm": 3.364937021577735, "learning_rate": 9.813455437699237e-07, "loss": 0.6092, "step": 1131 }, { "epoch": 0.521343657350105, "grad_norm": 2.9557279281094724, "learning_rate": 9.798533817326504e-07, "loss": 0.5889, "step": 1132 }, { "epoch": 0.52180420828416, "grad_norm": 3.7473302217626654, "learning_rate": 9.783612645696702e-07, "loss": 0.5108, "step": 1133 }, { "epoch": 0.5222647592182148, "grad_norm": 2.973166241915579, "learning_rate": 9.768691956045042e-07, "loss": 0.5232, "step": 1134 }, { "epoch": 0.5227253101522696, "grad_norm": 3.21594378369249, "learning_rate": 9.753771781605657e-07, "loss": 0.5568, "step": 1135 }, { "epoch": 0.5231858610863245, "grad_norm": 3.144947147889769, "learning_rate": 9.73885215561153e-07, "loss": 0.5373, "step": 1136 }, { "epoch": 0.5236464120203794, "grad_norm": 3.2253884546962137, "learning_rate": 9.723933111294427e-07, "loss": 0.5413, "step": 1137 }, { "epoch": 0.5241069629544343, "grad_norm": 3.223035357344408, "learning_rate": 9.709014681884815e-07, "loss": 0.4688, "step": 1138 }, { "epoch": 0.5245675138884891, "grad_norm": 3.4567627154136322, "learning_rate": 9.6940969006118e-07, "loss": 0.6319, "step": 1139 }, { "epoch": 0.5250280648225439, "grad_norm": 3.2739054341505596, "learning_rate": 9.67917980070303e-07, "loss": 0.6505, "step": 1140 }, { "epoch": 0.5254886157565988, "grad_norm": 3.0010401161709943, "learning_rate": 9.664263415384643e-07, "loss": 0.4819, "step": 1141 }, { "epoch": 0.5259491666906537, "grad_norm": 3.213300971817815, "learning_rate": 9.649347777881192e-07, "loss": 0.4948, "step": 1142 }, { "epoch": 0.5264097176247086, "grad_norm": 3.834117009480357, "learning_rate": 9.634432921415554e-07, "loss": 0.5202, "step": 1143 }, { "epoch": 0.5268702685587634, "grad_norm": 3.0405631156566524, "learning_rate": 9.619518879208865e-07, "loss": 0.5498, "step": 1144 }, { "epoch": 0.5273308194928182, "grad_norm": 2.736642170245626, "learning_rate": 9.604605684480458e-07, "loss": 0.5615, "step": 1145 }, { "epoch": 0.5277913704268732, "grad_norm": 3.4155336627808577, "learning_rate": 9.589693370447768e-07, "loss": 0.6218, "step": 1146 }, { "epoch": 0.528251921360928, "grad_norm": 3.197761407312923, "learning_rate": 9.574781970326283e-07, "loss": 0.5547, "step": 1147 }, { "epoch": 0.5287124722949829, "grad_norm": 3.025078798504295, "learning_rate": 9.559871517329434e-07, "loss": 0.4866, "step": 1148 }, { "epoch": 0.5291730232290377, "grad_norm": 3.2517856164626027, "learning_rate": 9.544962044668555e-07, "loss": 0.5777, "step": 1149 }, { "epoch": 0.5296335741630926, "grad_norm": 2.996348096347719, "learning_rate": 9.530053585552802e-07, "loss": 0.5648, "step": 1150 }, { "epoch": 0.5300941250971475, "grad_norm": 3.119156079432281, "learning_rate": 9.515146173189057e-07, "loss": 0.5534, "step": 1151 }, { "epoch": 0.5305546760312023, "grad_norm": 2.984885291180427, "learning_rate": 9.50023984078189e-07, "loss": 0.4584, "step": 1152 }, { "epoch": 0.5310152269652572, "grad_norm": 3.2174005544489765, "learning_rate": 9.485334621533453e-07, "loss": 0.4978, "step": 1153 }, { "epoch": 0.5314757778993121, "grad_norm": 2.838123600964648, "learning_rate": 9.470430548643411e-07, "loss": 0.5271, "step": 1154 }, { "epoch": 0.531936328833367, "grad_norm": 3.2298666663636766, "learning_rate": 9.455527655308899e-07, "loss": 0.5232, "step": 1155 }, { "epoch": 0.5323968797674218, "grad_norm": 3.7741680239909963, "learning_rate": 9.440625974724407e-07, "loss": 0.6279, "step": 1156 }, { "epoch": 0.5328574307014766, "grad_norm": 2.9599505842958873, "learning_rate": 9.425725540081721e-07, "loss": 0.4482, "step": 1157 }, { "epoch": 0.5333179816355315, "grad_norm": 3.0963847317641533, "learning_rate": 9.410826384569869e-07, "loss": 0.5946, "step": 1158 }, { "epoch": 0.5337785325695864, "grad_norm": 3.0756573597480825, "learning_rate": 9.395928541375013e-07, "loss": 0.4987, "step": 1159 }, { "epoch": 0.5342390835036412, "grad_norm": 2.96523606047648, "learning_rate": 9.381032043680405e-07, "loss": 0.5864, "step": 1160 }, { "epoch": 0.5346996344376961, "grad_norm": 2.8034523910128377, "learning_rate": 9.366136924666288e-07, "loss": 0.6241, "step": 1161 }, { "epoch": 0.5351601853717509, "grad_norm": 3.0938529680087794, "learning_rate": 9.351243217509842e-07, "loss": 0.613, "step": 1162 }, { "epoch": 0.5356207363058059, "grad_norm": 3.3153575829450728, "learning_rate": 9.336350955385101e-07, "loss": 0.5514, "step": 1163 }, { "epoch": 0.5360812872398607, "grad_norm": 3.1688595006317124, "learning_rate": 9.321460171462876e-07, "loss": 0.5431, "step": 1164 }, { "epoch": 0.5365418381739155, "grad_norm": 3.1725759909285123, "learning_rate": 9.306570898910695e-07, "loss": 0.4865, "step": 1165 }, { "epoch": 0.5370023891079704, "grad_norm": 2.9853416997922406, "learning_rate": 9.29168317089271e-07, "loss": 0.5308, "step": 1166 }, { "epoch": 0.5374629400420253, "grad_norm": 3.2282727253852053, "learning_rate": 9.276797020569635e-07, "loss": 0.5703, "step": 1167 }, { "epoch": 0.5379234909760802, "grad_norm": 2.7961799624127734, "learning_rate": 9.261912481098675e-07, "loss": 0.4944, "step": 1168 }, { "epoch": 0.538384041910135, "grad_norm": 3.3641211633805126, "learning_rate": 9.24702958563344e-07, "loss": 0.5362, "step": 1169 }, { "epoch": 0.5388445928441898, "grad_norm": 3.101865055004353, "learning_rate": 9.232148367323882e-07, "loss": 0.6308, "step": 1170 }, { "epoch": 0.5393051437782447, "grad_norm": 3.189825939308207, "learning_rate": 9.217268859316218e-07, "loss": 0.5148, "step": 1171 }, { "epoch": 0.5397656947122996, "grad_norm": 3.82610831328444, "learning_rate": 9.202391094752853e-07, "loss": 0.5346, "step": 1172 }, { "epoch": 0.5402262456463545, "grad_norm": 3.2076777255853672, "learning_rate": 9.187515106772311e-07, "loss": 0.5662, "step": 1173 }, { "epoch": 0.5406867965804093, "grad_norm": 3.4891483553752343, "learning_rate": 9.172640928509158e-07, "loss": 0.5638, "step": 1174 }, { "epoch": 0.5411473475144641, "grad_norm": 3.5063754666432865, "learning_rate": 9.157768593093925e-07, "loss": 0.4947, "step": 1175 }, { "epoch": 0.5416078984485191, "grad_norm": 2.803505276485617, "learning_rate": 9.142898133653047e-07, "loss": 0.5989, "step": 1176 }, { "epoch": 0.5420684493825739, "grad_norm": 3.4189599088321745, "learning_rate": 9.128029583308773e-07, "loss": 0.5396, "step": 1177 }, { "epoch": 0.5425290003166288, "grad_norm": 3.4562786815564723, "learning_rate": 9.113162975179104e-07, "loss": 0.5468, "step": 1178 }, { "epoch": 0.5429895512506836, "grad_norm": 3.1130591716501272, "learning_rate": 9.098298342377711e-07, "loss": 0.6484, "step": 1179 }, { "epoch": 0.5434501021847385, "grad_norm": 3.218340063155536, "learning_rate": 9.083435718013868e-07, "loss": 0.5841, "step": 1180 }, { "epoch": 0.5439106531187934, "grad_norm": 2.871969590049317, "learning_rate": 9.068575135192376e-07, "loss": 0.5347, "step": 1181 }, { "epoch": 0.5443712040528482, "grad_norm": 3.1118275872712906, "learning_rate": 9.053716627013487e-07, "loss": 0.528, "step": 1182 }, { "epoch": 0.5448317549869031, "grad_norm": 3.262925860586302, "learning_rate": 9.038860226572831e-07, "loss": 0.6184, "step": 1183 }, { "epoch": 0.5452923059209579, "grad_norm": 3.379255799830584, "learning_rate": 9.024005966961346e-07, "loss": 0.6056, "step": 1184 }, { "epoch": 0.5457528568550128, "grad_norm": 3.1526864635641103, "learning_rate": 9.009153881265198e-07, "loss": 0.6153, "step": 1185 }, { "epoch": 0.5462134077890677, "grad_norm": 3.2354021477420547, "learning_rate": 8.994304002565722e-07, "loss": 0.6598, "step": 1186 }, { "epoch": 0.5466739587231225, "grad_norm": 3.3399627148509077, "learning_rate": 8.979456363939317e-07, "loss": 0.5406, "step": 1187 }, { "epoch": 0.5471345096571774, "grad_norm": 3.2227858379890106, "learning_rate": 8.964610998457407e-07, "loss": 0.4731, "step": 1188 }, { "epoch": 0.5475950605912323, "grad_norm": 3.128161422108759, "learning_rate": 8.949767939186356e-07, "loss": 0.5487, "step": 1189 }, { "epoch": 0.5480556115252871, "grad_norm": 3.2758864860806076, "learning_rate": 8.934927219187373e-07, "loss": 0.5796, "step": 1190 }, { "epoch": 0.548516162459342, "grad_norm": 3.0508736285456903, "learning_rate": 8.920088871516481e-07, "loss": 0.5156, "step": 1191 }, { "epoch": 0.5489767133933968, "grad_norm": 3.454863882802425, "learning_rate": 8.905252929224402e-07, "loss": 0.5471, "step": 1192 }, { "epoch": 0.5494372643274518, "grad_norm": 3.241696417453311, "learning_rate": 8.890419425356495e-07, "loss": 0.6282, "step": 1193 }, { "epoch": 0.5498978152615066, "grad_norm": 3.168159151579064, "learning_rate": 8.875588392952712e-07, "loss": 0.5145, "step": 1194 }, { "epoch": 0.5503583661955614, "grad_norm": 3.496337240225602, "learning_rate": 8.860759865047475e-07, "loss": 0.5628, "step": 1195 }, { "epoch": 0.5508189171296163, "grad_norm": 3.2304073520392165, "learning_rate": 8.845933874669644e-07, "loss": 0.4703, "step": 1196 }, { "epoch": 0.5512794680636711, "grad_norm": 3.1834073949679214, "learning_rate": 8.831110454842418e-07, "loss": 0.5432, "step": 1197 }, { "epoch": 0.5517400189977261, "grad_norm": 3.4899372677451823, "learning_rate": 8.816289638583272e-07, "loss": 0.6382, "step": 1198 }, { "epoch": 0.5522005699317809, "grad_norm": 2.9457383672792754, "learning_rate": 8.801471458903885e-07, "loss": 0.4662, "step": 1199 }, { "epoch": 0.5526611208658357, "grad_norm": 3.2935543447014104, "learning_rate": 8.786655948810062e-07, "loss": 0.4872, "step": 1200 }, { "epoch": 0.5531216717998906, "grad_norm": 3.2241377818141057, "learning_rate": 8.771843141301658e-07, "loss": 0.568, "step": 1201 }, { "epoch": 0.5535822227339455, "grad_norm": 3.1610954571795853, "learning_rate": 8.757033069372514e-07, "loss": 0.5381, "step": 1202 }, { "epoch": 0.5540427736680004, "grad_norm": 2.8053997458619193, "learning_rate": 8.742225766010375e-07, "loss": 0.6084, "step": 1203 }, { "epoch": 0.5545033246020552, "grad_norm": 3.2358176609289977, "learning_rate": 8.727421264196819e-07, "loss": 0.5904, "step": 1204 }, { "epoch": 0.55496387553611, "grad_norm": 3.0926449818062127, "learning_rate": 8.712619596907187e-07, "loss": 0.5746, "step": 1205 }, { "epoch": 0.555424426470165, "grad_norm": 2.941504999556666, "learning_rate": 8.697820797110498e-07, "loss": 0.5781, "step": 1206 }, { "epoch": 0.5558849774042198, "grad_norm": 2.871854054187517, "learning_rate": 8.683024897769395e-07, "loss": 0.4819, "step": 1207 }, { "epoch": 0.5563455283382747, "grad_norm": 3.357101022208657, "learning_rate": 8.668231931840053e-07, "loss": 0.5112, "step": 1208 }, { "epoch": 0.5568060792723295, "grad_norm": 2.926617988170589, "learning_rate": 8.653441932272118e-07, "loss": 0.5429, "step": 1209 }, { "epoch": 0.5572666302063843, "grad_norm": 2.8511154209063703, "learning_rate": 8.638654932008626e-07, "loss": 0.589, "step": 1210 }, { "epoch": 0.5577271811404393, "grad_norm": 3.0744117053038984, "learning_rate": 8.623870963985929e-07, "loss": 0.4968, "step": 1211 }, { "epoch": 0.5581877320744941, "grad_norm": 3.2017070331025663, "learning_rate": 8.609090061133633e-07, "loss": 0.483, "step": 1212 }, { "epoch": 0.558648283008549, "grad_norm": 3.4242826485145024, "learning_rate": 8.594312256374512e-07, "loss": 0.6987, "step": 1213 }, { "epoch": 0.5591088339426038, "grad_norm": 3.215000736037071, "learning_rate": 8.579537582624437e-07, "loss": 0.569, "step": 1214 }, { "epoch": 0.5595693848766587, "grad_norm": 3.21779173610873, "learning_rate": 8.564766072792311e-07, "loss": 0.5391, "step": 1215 }, { "epoch": 0.5600299358107136, "grad_norm": 3.131067041749219, "learning_rate": 8.54999775977998e-07, "loss": 0.5869, "step": 1216 }, { "epoch": 0.5604904867447684, "grad_norm": 3.4662747526743347, "learning_rate": 8.535232676482189e-07, "loss": 0.5868, "step": 1217 }, { "epoch": 0.5609510376788233, "grad_norm": 3.428017059923847, "learning_rate": 8.520470855786466e-07, "loss": 0.6251, "step": 1218 }, { "epoch": 0.5614115886128782, "grad_norm": 3.3290755200586872, "learning_rate": 8.505712330573079e-07, "loss": 0.6237, "step": 1219 }, { "epoch": 0.561872139546933, "grad_norm": 2.9893892056090734, "learning_rate": 8.490957133714973e-07, "loss": 0.4902, "step": 1220 }, { "epoch": 0.5623326904809879, "grad_norm": 3.5120837728765664, "learning_rate": 8.476205298077649e-07, "loss": 0.6707, "step": 1221 }, { "epoch": 0.5627932414150427, "grad_norm": 3.1658388926282517, "learning_rate": 8.46145685651915e-07, "loss": 0.514, "step": 1222 }, { "epoch": 0.5632537923490976, "grad_norm": 3.117691401780372, "learning_rate": 8.446711841889945e-07, "loss": 0.6758, "step": 1223 }, { "epoch": 0.5637143432831525, "grad_norm": 3.000516000189048, "learning_rate": 8.431970287032861e-07, "loss": 0.5931, "step": 1224 }, { "epoch": 0.5641748942172073, "grad_norm": 2.796362177103071, "learning_rate": 8.417232224783041e-07, "loss": 0.475, "step": 1225 }, { "epoch": 0.5646354451512622, "grad_norm": 3.1187898436905743, "learning_rate": 8.402497687967836e-07, "loss": 0.4477, "step": 1226 }, { "epoch": 0.565095996085317, "grad_norm": 3.7014134238719962, "learning_rate": 8.387766709406735e-07, "loss": 0.5918, "step": 1227 }, { "epoch": 0.565556547019372, "grad_norm": 3.8201360952855854, "learning_rate": 8.373039321911323e-07, "loss": 0.6437, "step": 1228 }, { "epoch": 0.5660170979534268, "grad_norm": 3.4961060613658907, "learning_rate": 8.358315558285169e-07, "loss": 0.6191, "step": 1229 }, { "epoch": 0.5664776488874816, "grad_norm": 3.374429880061983, "learning_rate": 8.343595451323781e-07, "loss": 0.5717, "step": 1230 }, { "epoch": 0.5669381998215365, "grad_norm": 3.5990525265114863, "learning_rate": 8.328879033814515e-07, "loss": 0.7007, "step": 1231 }, { "epoch": 0.5673987507555914, "grad_norm": 3.43846448225756, "learning_rate": 8.31416633853651e-07, "loss": 0.5106, "step": 1232 }, { "epoch": 0.5678593016896463, "grad_norm": 3.240170428540241, "learning_rate": 8.29945739826062e-07, "loss": 0.6552, "step": 1233 }, { "epoch": 0.5683198526237011, "grad_norm": 3.268819388844623, "learning_rate": 8.284752245749327e-07, "loss": 0.563, "step": 1234 }, { "epoch": 0.5687804035577559, "grad_norm": 3.322362445609891, "learning_rate": 8.270050913756683e-07, "loss": 0.6392, "step": 1235 }, { "epoch": 0.5692409544918108, "grad_norm": 3.4868505757465855, "learning_rate": 8.255353435028226e-07, "loss": 0.5105, "step": 1236 }, { "epoch": 0.5697015054258657, "grad_norm": 3.7205750052168556, "learning_rate": 8.240659842300912e-07, "loss": 0.555, "step": 1237 }, { "epoch": 0.5701620563599206, "grad_norm": 3.3997467433257222, "learning_rate": 8.225970168303045e-07, "loss": 0.5784, "step": 1238 }, { "epoch": 0.5706226072939754, "grad_norm": 3.666481631082462, "learning_rate": 8.211284445754197e-07, "loss": 0.6415, "step": 1239 }, { "epoch": 0.5710831582280302, "grad_norm": 3.030350707526125, "learning_rate": 8.196602707365134e-07, "loss": 0.5301, "step": 1240 }, { "epoch": 0.5715437091620852, "grad_norm": 3.4386931144286104, "learning_rate": 8.18192498583776e-07, "loss": 0.4771, "step": 1241 }, { "epoch": 0.57200426009614, "grad_norm": 3.6879705378930017, "learning_rate": 8.16725131386502e-07, "loss": 0.4923, "step": 1242 }, { "epoch": 0.5724648110301949, "grad_norm": 3.258657524610071, "learning_rate": 8.152581724130849e-07, "loss": 0.4746, "step": 1243 }, { "epoch": 0.5729253619642497, "grad_norm": 3.207585199421819, "learning_rate": 8.13791624931008e-07, "loss": 0.6232, "step": 1244 }, { "epoch": 0.5733859128983047, "grad_norm": 3.1524462601648535, "learning_rate": 8.123254922068383e-07, "loss": 0.6256, "step": 1245 }, { "epoch": 0.5738464638323595, "grad_norm": 2.829223353488783, "learning_rate": 8.108597775062199e-07, "loss": 0.5638, "step": 1246 }, { "epoch": 0.5743070147664143, "grad_norm": 2.991108516511504, "learning_rate": 8.093944840938638e-07, "loss": 0.528, "step": 1247 }, { "epoch": 0.5747675657004692, "grad_norm": 3.122195079526398, "learning_rate": 8.079296152335454e-07, "loss": 0.5076, "step": 1248 }, { "epoch": 0.575228116634524, "grad_norm": 2.8771471658342227, "learning_rate": 8.06465174188092e-07, "loss": 0.4779, "step": 1249 }, { "epoch": 0.575688667568579, "grad_norm": 2.9430839805871236, "learning_rate": 8.050011642193787e-07, "loss": 0.5621, "step": 1250 }, { "epoch": 0.5761492185026338, "grad_norm": 3.2638073071160147, "learning_rate": 8.035375885883217e-07, "loss": 0.581, "step": 1251 }, { "epoch": 0.5766097694366886, "grad_norm": 2.8037502705877397, "learning_rate": 8.020744505548678e-07, "loss": 0.4642, "step": 1252 }, { "epoch": 0.5770703203707435, "grad_norm": 3.009407280040476, "learning_rate": 8.006117533779897e-07, "loss": 0.5578, "step": 1253 }, { "epoch": 0.5775308713047984, "grad_norm": 2.7699074199330163, "learning_rate": 7.991495003156799e-07, "loss": 0.548, "step": 1254 }, { "epoch": 0.5779914222388532, "grad_norm": 3.105688826369827, "learning_rate": 7.976876946249385e-07, "loss": 0.4663, "step": 1255 }, { "epoch": 0.5784519731729081, "grad_norm": 3.111402743097655, "learning_rate": 7.962263395617723e-07, "loss": 0.6285, "step": 1256 }, { "epoch": 0.5789125241069629, "grad_norm": 2.740304619765724, "learning_rate": 7.947654383811826e-07, "loss": 0.6369, "step": 1257 }, { "epoch": 0.5793730750410179, "grad_norm": 3.321527884757525, "learning_rate": 7.933049943371591e-07, "loss": 0.5833, "step": 1258 }, { "epoch": 0.5798336259750727, "grad_norm": 3.3544814320822325, "learning_rate": 7.918450106826756e-07, "loss": 0.4738, "step": 1259 }, { "epoch": 0.5802941769091275, "grad_norm": 2.8103939697101104, "learning_rate": 7.903854906696783e-07, "loss": 0.4384, "step": 1260 }, { "epoch": 0.5807547278431824, "grad_norm": 2.9663453333883867, "learning_rate": 7.889264375490819e-07, "loss": 0.5188, "step": 1261 }, { "epoch": 0.5812152787772372, "grad_norm": 3.2684264262146296, "learning_rate": 7.874678545707605e-07, "loss": 0.4919, "step": 1262 }, { "epoch": 0.5816758297112922, "grad_norm": 3.2619536382755183, "learning_rate": 7.86009744983541e-07, "loss": 0.5307, "step": 1263 }, { "epoch": 0.582136380645347, "grad_norm": 3.4192324602148094, "learning_rate": 7.845521120351967e-07, "loss": 0.5289, "step": 1264 }, { "epoch": 0.5825969315794018, "grad_norm": 3.359910314026855, "learning_rate": 7.830949589724381e-07, "loss": 0.6354, "step": 1265 }, { "epoch": 0.5830574825134567, "grad_norm": 2.824340063484425, "learning_rate": 7.816382890409079e-07, "loss": 0.4714, "step": 1266 }, { "epoch": 0.5835180334475116, "grad_norm": 3.0782272088831473, "learning_rate": 7.80182105485172e-07, "loss": 0.606, "step": 1267 }, { "epoch": 0.5839785843815665, "grad_norm": 3.4682328165851817, "learning_rate": 7.787264115487125e-07, "loss": 0.6244, "step": 1268 }, { "epoch": 0.5844391353156213, "grad_norm": 3.094844298682695, "learning_rate": 7.772712104739225e-07, "loss": 0.6137, "step": 1269 }, { "epoch": 0.5848996862496761, "grad_norm": 3.1487313141421818, "learning_rate": 7.758165055020959e-07, "loss": 0.4985, "step": 1270 }, { "epoch": 0.5853602371837311, "grad_norm": 3.0555631645161974, "learning_rate": 7.743622998734216e-07, "loss": 0.5003, "step": 1271 }, { "epoch": 0.5858207881177859, "grad_norm": 2.89659469677659, "learning_rate": 7.729085968269775e-07, "loss": 0.5633, "step": 1272 }, { "epoch": 0.5862813390518408, "grad_norm": 3.0731709449259954, "learning_rate": 7.714553996007207e-07, "loss": 0.6242, "step": 1273 }, { "epoch": 0.5867418899858956, "grad_norm": 3.5726138177591227, "learning_rate": 7.700027114314824e-07, "loss": 0.5221, "step": 1274 }, { "epoch": 0.5872024409199504, "grad_norm": 2.7589540594281448, "learning_rate": 7.685505355549599e-07, "loss": 0.5246, "step": 1275 }, { "epoch": 0.5876629918540054, "grad_norm": 2.7404624681182486, "learning_rate": 7.670988752057087e-07, "loss": 0.4816, "step": 1276 }, { "epoch": 0.5881235427880602, "grad_norm": 3.6053355624856906, "learning_rate": 7.656477336171372e-07, "loss": 0.5095, "step": 1277 }, { "epoch": 0.5885840937221151, "grad_norm": 3.127209652870214, "learning_rate": 7.64197114021497e-07, "loss": 0.5685, "step": 1278 }, { "epoch": 0.5890446446561699, "grad_norm": 3.3101979407630506, "learning_rate": 7.627470196498788e-07, "loss": 0.6193, "step": 1279 }, { "epoch": 0.5895051955902249, "grad_norm": 3.0195380063665938, "learning_rate": 7.612974537322015e-07, "loss": 0.6205, "step": 1280 }, { "epoch": 0.5899657465242797, "grad_norm": 3.130560991980397, "learning_rate": 7.598484194972076e-07, "loss": 0.5309, "step": 1281 }, { "epoch": 0.5904262974583345, "grad_norm": 2.875522406498749, "learning_rate": 7.583999201724565e-07, "loss": 0.5542, "step": 1282 }, { "epoch": 0.5908868483923894, "grad_norm": 3.0420530994023336, "learning_rate": 7.569519589843144e-07, "loss": 0.5288, "step": 1283 }, { "epoch": 0.5913473993264443, "grad_norm": 2.9257495629106565, "learning_rate": 7.555045391579492e-07, "loss": 0.5173, "step": 1284 }, { "epoch": 0.5918079502604991, "grad_norm": 3.0015108226030085, "learning_rate": 7.540576639173247e-07, "loss": 0.4839, "step": 1285 }, { "epoch": 0.592268501194554, "grad_norm": 2.7862911331282447, "learning_rate": 7.526113364851891e-07, "loss": 0.5727, "step": 1286 }, { "epoch": 0.5927290521286088, "grad_norm": 3.101011663748682, "learning_rate": 7.511655600830727e-07, "loss": 0.4828, "step": 1287 }, { "epoch": 0.5931896030626637, "grad_norm": 2.8268391388305663, "learning_rate": 7.497203379312771e-07, "loss": 0.471, "step": 1288 }, { "epoch": 0.5936501539967186, "grad_norm": 2.9519498437477503, "learning_rate": 7.482756732488691e-07, "loss": 0.5866, "step": 1289 }, { "epoch": 0.5941107049307734, "grad_norm": 2.8204629128276255, "learning_rate": 7.468315692536755e-07, "loss": 0.5342, "step": 1290 }, { "epoch": 0.5945712558648283, "grad_norm": 3.1769123313998913, "learning_rate": 7.453880291622725e-07, "loss": 0.4381, "step": 1291 }, { "epoch": 0.5950318067988831, "grad_norm": 3.014532693904851, "learning_rate": 7.439450561899813e-07, "loss": 0.4928, "step": 1292 }, { "epoch": 0.5954923577329381, "grad_norm": 3.0757999291111813, "learning_rate": 7.425026535508593e-07, "loss": 0.5666, "step": 1293 }, { "epoch": 0.5959529086669929, "grad_norm": 3.3979586211632555, "learning_rate": 7.410608244576937e-07, "loss": 0.5643, "step": 1294 }, { "epoch": 0.5964134596010477, "grad_norm": 3.259635269308452, "learning_rate": 7.396195721219945e-07, "loss": 0.5211, "step": 1295 }, { "epoch": 0.5968740105351026, "grad_norm": 2.9795483102185014, "learning_rate": 7.381788997539868e-07, "loss": 0.5127, "step": 1296 }, { "epoch": 0.5973345614691575, "grad_norm": 3.296329788058233, "learning_rate": 7.367388105626036e-07, "loss": 0.5219, "step": 1297 }, { "epoch": 0.5977951124032124, "grad_norm": 3.2181878786157205, "learning_rate": 7.352993077554798e-07, "loss": 0.4597, "step": 1298 }, { "epoch": 0.5982556633372672, "grad_norm": 2.952432694766975, "learning_rate": 7.33860394538943e-07, "loss": 0.5352, "step": 1299 }, { "epoch": 0.598716214271322, "grad_norm": 2.9260033657087505, "learning_rate": 7.324220741180088e-07, "loss": 0.5421, "step": 1300 }, { "epoch": 0.5991767652053769, "grad_norm": 3.211788941840772, "learning_rate": 7.309843496963715e-07, "loss": 0.5918, "step": 1301 }, { "epoch": 0.5996373161394318, "grad_norm": 2.6095863519936033, "learning_rate": 7.295472244763981e-07, "loss": 0.4312, "step": 1302 }, { "epoch": 0.6000978670734867, "grad_norm": 2.8768992985690374, "learning_rate": 7.281107016591213e-07, "loss": 0.4502, "step": 1303 }, { "epoch": 0.6005584180075415, "grad_norm": 3.0788536260420947, "learning_rate": 7.266747844442315e-07, "loss": 0.6204, "step": 1304 }, { "epoch": 0.6010189689415963, "grad_norm": 3.326645359641706, "learning_rate": 7.252394760300707e-07, "loss": 0.6226, "step": 1305 }, { "epoch": 0.6014795198756513, "grad_norm": 3.211498413419631, "learning_rate": 7.238047796136246e-07, "loss": 0.5597, "step": 1306 }, { "epoch": 0.6019400708097061, "grad_norm": 3.2233758236272863, "learning_rate": 7.223706983905153e-07, "loss": 0.5445, "step": 1307 }, { "epoch": 0.602400621743761, "grad_norm": 3.47488384908334, "learning_rate": 7.209372355549956e-07, "loss": 0.5055, "step": 1308 }, { "epoch": 0.6028611726778158, "grad_norm": 3.2688153234505664, "learning_rate": 7.195043942999404e-07, "loss": 0.6033, "step": 1309 }, { "epoch": 0.6033217236118708, "grad_norm": 2.95887248920406, "learning_rate": 7.180721778168397e-07, "loss": 0.4693, "step": 1310 }, { "epoch": 0.6037822745459256, "grad_norm": 3.132075461669666, "learning_rate": 7.166405892957925e-07, "loss": 0.602, "step": 1311 }, { "epoch": 0.6042428254799804, "grad_norm": 3.177087477555439, "learning_rate": 7.152096319254988e-07, "loss": 0.5589, "step": 1312 }, { "epoch": 0.6047033764140353, "grad_norm": 3.189488580952695, "learning_rate": 7.137793088932533e-07, "loss": 0.6641, "step": 1313 }, { "epoch": 0.6051639273480901, "grad_norm": 2.9722614592308263, "learning_rate": 7.123496233849367e-07, "loss": 0.5488, "step": 1314 }, { "epoch": 0.605624478282145, "grad_norm": 2.7544767985522443, "learning_rate": 7.109205785850106e-07, "loss": 0.4281, "step": 1315 }, { "epoch": 0.6060850292161999, "grad_norm": 3.3634419751440388, "learning_rate": 7.094921776765094e-07, "loss": 0.509, "step": 1316 }, { "epoch": 0.6065455801502547, "grad_norm": 2.9636798760844845, "learning_rate": 7.080644238410325e-07, "loss": 0.5577, "step": 1317 }, { "epoch": 0.6070061310843096, "grad_norm": 3.4072070822619063, "learning_rate": 7.066373202587397e-07, "loss": 0.6024, "step": 1318 }, { "epoch": 0.6074666820183645, "grad_norm": 3.107976483566616, "learning_rate": 7.052108701083407e-07, "loss": 0.5571, "step": 1319 }, { "epoch": 0.6079272329524193, "grad_norm": 2.965152268025164, "learning_rate": 7.0378507656709e-07, "loss": 0.5274, "step": 1320 }, { "epoch": 0.6083877838864742, "grad_norm": 2.910489708230388, "learning_rate": 7.023599428107814e-07, "loss": 0.4719, "step": 1321 }, { "epoch": 0.608848334820529, "grad_norm": 3.1173838425844553, "learning_rate": 7.009354720137364e-07, "loss": 0.5094, "step": 1322 }, { "epoch": 0.609308885754584, "grad_norm": 2.797005334303556, "learning_rate": 6.995116673488014e-07, "loss": 0.6097, "step": 1323 }, { "epoch": 0.6097694366886388, "grad_norm": 3.0227555124071928, "learning_rate": 6.980885319873397e-07, "loss": 0.5937, "step": 1324 }, { "epoch": 0.6102299876226936, "grad_norm": 2.777519608456516, "learning_rate": 6.966660690992214e-07, "loss": 0.4724, "step": 1325 }, { "epoch": 0.6106905385567485, "grad_norm": 3.095622108896745, "learning_rate": 6.952442818528219e-07, "loss": 0.5058, "step": 1326 }, { "epoch": 0.6111510894908033, "grad_norm": 3.334941539522611, "learning_rate": 6.938231734150093e-07, "loss": 0.6349, "step": 1327 }, { "epoch": 0.6116116404248583, "grad_norm": 3.059763555645584, "learning_rate": 6.9240274695114e-07, "loss": 0.4978, "step": 1328 }, { "epoch": 0.6120721913589131, "grad_norm": 3.115604189349727, "learning_rate": 6.909830056250526e-07, "loss": 0.6158, "step": 1329 }, { "epoch": 0.612532742292968, "grad_norm": 3.5110850133149913, "learning_rate": 6.895639525990586e-07, "loss": 0.6059, "step": 1330 }, { "epoch": 0.6129932932270228, "grad_norm": 3.1973315575132717, "learning_rate": 6.881455910339369e-07, "loss": 0.5804, "step": 1331 }, { "epoch": 0.6134538441610777, "grad_norm": 3.2520702824243335, "learning_rate": 6.867279240889259e-07, "loss": 0.5976, "step": 1332 }, { "epoch": 0.6139143950951326, "grad_norm": 3.447876365367868, "learning_rate": 6.853109549217166e-07, "loss": 0.6177, "step": 1333 }, { "epoch": 0.6143749460291874, "grad_norm": 3.1913415774468676, "learning_rate": 6.838946866884467e-07, "loss": 0.6005, "step": 1334 }, { "epoch": 0.6148354969632422, "grad_norm": 3.445057324348534, "learning_rate": 6.824791225436918e-07, "loss": 0.5425, "step": 1335 }, { "epoch": 0.6152960478972972, "grad_norm": 3.7985479498424786, "learning_rate": 6.810642656404596e-07, "loss": 0.5883, "step": 1336 }, { "epoch": 0.615756598831352, "grad_norm": 2.9721621801480067, "learning_rate": 6.796501191301824e-07, "loss": 0.5032, "step": 1337 }, { "epoch": 0.6162171497654069, "grad_norm": 2.709696747780453, "learning_rate": 6.782366861627101e-07, "loss": 0.408, "step": 1338 }, { "epoch": 0.6166777006994617, "grad_norm": 2.797272959212533, "learning_rate": 6.768239698863033e-07, "loss": 0.4895, "step": 1339 }, { "epoch": 0.6171382516335165, "grad_norm": 2.687895200110118, "learning_rate": 6.754119734476266e-07, "loss": 0.4482, "step": 1340 }, { "epoch": 0.6175988025675715, "grad_norm": 3.0721198321588887, "learning_rate": 6.740006999917405e-07, "loss": 0.6072, "step": 1341 }, { "epoch": 0.6180593535016263, "grad_norm": 2.9971206089309823, "learning_rate": 6.725901526620959e-07, "loss": 0.596, "step": 1342 }, { "epoch": 0.6185199044356812, "grad_norm": 2.9204887920199987, "learning_rate": 6.711803346005258e-07, "loss": 0.4545, "step": 1343 }, { "epoch": 0.618980455369736, "grad_norm": 3.208202279508134, "learning_rate": 6.697712489472395e-07, "loss": 0.6018, "step": 1344 }, { "epoch": 0.619441006303791, "grad_norm": 2.698911787184527, "learning_rate": 6.68362898840814e-07, "loss": 0.5294, "step": 1345 }, { "epoch": 0.6199015572378458, "grad_norm": 2.628724441436368, "learning_rate": 6.669552874181888e-07, "loss": 0.4935, "step": 1346 }, { "epoch": 0.6203621081719006, "grad_norm": 3.6333914395043996, "learning_rate": 6.655484178146576e-07, "loss": 0.5121, "step": 1347 }, { "epoch": 0.6208226591059555, "grad_norm": 2.840757981463247, "learning_rate": 6.641422931638614e-07, "loss": 0.5378, "step": 1348 }, { "epoch": 0.6212832100400104, "grad_norm": 3.119838224826943, "learning_rate": 6.627369165977837e-07, "loss": 0.5759, "step": 1349 }, { "epoch": 0.6217437609740653, "grad_norm": 2.854001213576483, "learning_rate": 6.613322912467392e-07, "loss": 0.4695, "step": 1350 }, { "epoch": 0.6222043119081201, "grad_norm": 3.0516655650363695, "learning_rate": 6.599284202393708e-07, "loss": 0.5992, "step": 1351 }, { "epoch": 0.6226648628421749, "grad_norm": 3.1765489105582927, "learning_rate": 6.585253067026417e-07, "loss": 0.5748, "step": 1352 }, { "epoch": 0.6231254137762298, "grad_norm": 3.012215519926939, "learning_rate": 6.571229537618266e-07, "loss": 0.5621, "step": 1353 }, { "epoch": 0.6235859647102847, "grad_norm": 3.277543278020275, "learning_rate": 6.557213645405064e-07, "loss": 0.4244, "step": 1354 }, { "epoch": 0.6240465156443396, "grad_norm": 3.070612553633971, "learning_rate": 6.54320542160562e-07, "loss": 0.6149, "step": 1355 }, { "epoch": 0.6245070665783944, "grad_norm": 3.3615068454127046, "learning_rate": 6.529204897421643e-07, "loss": 0.5554, "step": 1356 }, { "epoch": 0.6249676175124492, "grad_norm": 3.3561738697585266, "learning_rate": 6.515212104037713e-07, "loss": 0.5889, "step": 1357 }, { "epoch": 0.6254281684465042, "grad_norm": 3.0933916171873483, "learning_rate": 6.50122707262118e-07, "loss": 0.4735, "step": 1358 }, { "epoch": 0.625888719380559, "grad_norm": 3.5136171225226622, "learning_rate": 6.487249834322095e-07, "loss": 0.6822, "step": 1359 }, { "epoch": 0.6263492703146138, "grad_norm": 3.133863907558626, "learning_rate": 6.473280420273172e-07, "loss": 0.5195, "step": 1360 }, { "epoch": 0.6268098212486687, "grad_norm": 3.302281968664348, "learning_rate": 6.459318861589685e-07, "loss": 0.6776, "step": 1361 }, { "epoch": 0.6272703721827236, "grad_norm": 3.1331202449009643, "learning_rate": 6.445365189369411e-07, "loss": 0.5329, "step": 1362 }, { "epoch": 0.6277309231167785, "grad_norm": 3.2107222157004673, "learning_rate": 6.431419434692563e-07, "loss": 0.4996, "step": 1363 }, { "epoch": 0.6281914740508333, "grad_norm": 3.3986716761760984, "learning_rate": 6.417481628621717e-07, "loss": 0.5303, "step": 1364 }, { "epoch": 0.6286520249848881, "grad_norm": 3.066320791566163, "learning_rate": 6.403551802201748e-07, "loss": 0.5218, "step": 1365 }, { "epoch": 0.629112575918943, "grad_norm": 3.2972142393202772, "learning_rate": 6.389629986459755e-07, "loss": 0.6122, "step": 1366 }, { "epoch": 0.6295731268529979, "grad_norm": 3.1560358559235415, "learning_rate": 6.375716212404989e-07, "loss": 0.539, "step": 1367 }, { "epoch": 0.6300336777870528, "grad_norm": 3.0343679900830036, "learning_rate": 6.3618105110288e-07, "loss": 0.4605, "step": 1368 }, { "epoch": 0.6304942287211076, "grad_norm": 3.050511631671474, "learning_rate": 6.347912913304548e-07, "loss": 0.6179, "step": 1369 }, { "epoch": 0.6309547796551624, "grad_norm": 3.2629876129159627, "learning_rate": 6.334023450187549e-07, "loss": 0.5378, "step": 1370 }, { "epoch": 0.6314153305892174, "grad_norm": 3.0550921504844113, "learning_rate": 6.320142152614993e-07, "loss": 0.5586, "step": 1371 }, { "epoch": 0.6318758815232722, "grad_norm": 2.87904745693612, "learning_rate": 6.306269051505888e-07, "loss": 0.5824, "step": 1372 }, { "epoch": 0.6323364324573271, "grad_norm": 4.120605427481767, "learning_rate": 6.292404177760986e-07, "loss": 0.5787, "step": 1373 }, { "epoch": 0.6327969833913819, "grad_norm": 3.281112660782104, "learning_rate": 6.278547562262706e-07, "loss": 0.5811, "step": 1374 }, { "epoch": 0.6332575343254369, "grad_norm": 3.154859004430421, "learning_rate": 6.264699235875084e-07, "loss": 0.6182, "step": 1375 }, { "epoch": 0.6337180852594917, "grad_norm": 3.0668232663377295, "learning_rate": 6.250859229443684e-07, "loss": 0.5067, "step": 1376 }, { "epoch": 0.6341786361935465, "grad_norm": 3.060022701793419, "learning_rate": 6.237027573795538e-07, "loss": 0.5955, "step": 1377 }, { "epoch": 0.6346391871276014, "grad_norm": 3.158763791575397, "learning_rate": 6.223204299739087e-07, "loss": 0.6123, "step": 1378 }, { "epoch": 0.6350997380616562, "grad_norm": 3.075043672464968, "learning_rate": 6.209389438064092e-07, "loss": 0.5822, "step": 1379 }, { "epoch": 0.6355602889957112, "grad_norm": 2.7672680741174247, "learning_rate": 6.19558301954158e-07, "loss": 0.5106, "step": 1380 }, { "epoch": 0.636020839929766, "grad_norm": 2.9893686560154107, "learning_rate": 6.181785074923777e-07, "loss": 0.523, "step": 1381 }, { "epoch": 0.6364813908638208, "grad_norm": 3.6442869591374722, "learning_rate": 6.167995634944025e-07, "loss": 0.6623, "step": 1382 }, { "epoch": 0.6369419417978757, "grad_norm": 2.9356555531231527, "learning_rate": 6.154214730316738e-07, "loss": 0.4692, "step": 1383 }, { "epoch": 0.6374024927319306, "grad_norm": 3.201063109690519, "learning_rate": 6.1404423917373e-07, "loss": 0.4439, "step": 1384 }, { "epoch": 0.6378630436659855, "grad_norm": 3.0104867074963315, "learning_rate": 6.126678649882019e-07, "loss": 0.4768, "step": 1385 }, { "epoch": 0.6383235946000403, "grad_norm": 3.327881299870761, "learning_rate": 6.112923535408073e-07, "loss": 0.4477, "step": 1386 }, { "epoch": 0.6387841455340951, "grad_norm": 3.0269280110830863, "learning_rate": 6.099177078953397e-07, "loss": 0.5409, "step": 1387 }, { "epoch": 0.6392446964681501, "grad_norm": 3.277885482165906, "learning_rate": 6.085439311136664e-07, "loss": 0.6291, "step": 1388 }, { "epoch": 0.6397052474022049, "grad_norm": 3.180671239132894, "learning_rate": 6.071710262557181e-07, "loss": 0.5016, "step": 1389 }, { "epoch": 0.6401657983362598, "grad_norm": 3.056915861723639, "learning_rate": 6.057989963794832e-07, "loss": 0.4639, "step": 1390 }, { "epoch": 0.6406263492703146, "grad_norm": 2.977693194989121, "learning_rate": 6.044278445410025e-07, "loss": 0.5895, "step": 1391 }, { "epoch": 0.6410869002043694, "grad_norm": 2.9786799583494266, "learning_rate": 6.030575737943595e-07, "loss": 0.4599, "step": 1392 }, { "epoch": 0.6415474511384244, "grad_norm": 2.8694400733669227, "learning_rate": 6.016881871916766e-07, "loss": 0.6239, "step": 1393 }, { "epoch": 0.6420080020724792, "grad_norm": 3.509458526598221, "learning_rate": 6.003196877831059e-07, "loss": 0.6965, "step": 1394 }, { "epoch": 0.642468553006534, "grad_norm": 3.131981687735861, "learning_rate": 5.989520786168235e-07, "loss": 0.5088, "step": 1395 }, { "epoch": 0.6429291039405889, "grad_norm": 3.532782457274475, "learning_rate": 5.975853627390232e-07, "loss": 0.5129, "step": 1396 }, { "epoch": 0.6433896548746438, "grad_norm": 2.5625501669909307, "learning_rate": 5.962195431939084e-07, "loss": 0.4677, "step": 1397 }, { "epoch": 0.6438502058086987, "grad_norm": 3.27442037003512, "learning_rate": 5.94854623023686e-07, "loss": 0.5125, "step": 1398 }, { "epoch": 0.6443107567427535, "grad_norm": 3.2218724725996184, "learning_rate": 5.934906052685603e-07, "loss": 0.6094, "step": 1399 }, { "epoch": 0.6447713076768083, "grad_norm": 3.5948100953063102, "learning_rate": 5.921274929667251e-07, "loss": 0.6102, "step": 1400 }, { "epoch": 0.6452318586108633, "grad_norm": 3.1588508612291766, "learning_rate": 5.907652891543576e-07, "loss": 0.5734, "step": 1401 }, { "epoch": 0.6456924095449181, "grad_norm": 3.700691639948302, "learning_rate": 5.894039968656114e-07, "loss": 0.5963, "step": 1402 }, { "epoch": 0.646152960478973, "grad_norm": 3.2297651679426806, "learning_rate": 5.880436191326092e-07, "loss": 0.6396, "step": 1403 }, { "epoch": 0.6466135114130278, "grad_norm": 3.241229586568738, "learning_rate": 5.866841589854381e-07, "loss": 0.5148, "step": 1404 }, { "epoch": 0.6470740623470826, "grad_norm": 3.2127854047817186, "learning_rate": 5.853256194521395e-07, "loss": 0.5669, "step": 1405 }, { "epoch": 0.6475346132811376, "grad_norm": 3.1150319104457234, "learning_rate": 5.83968003558706e-07, "loss": 0.5669, "step": 1406 }, { "epoch": 0.6479951642151924, "grad_norm": 3.1285530927395095, "learning_rate": 5.826113143290717e-07, "loss": 0.5897, "step": 1407 }, { "epoch": 0.6484557151492473, "grad_norm": 3.5323185152016388, "learning_rate": 5.812555547851068e-07, "loss": 0.5374, "step": 1408 }, { "epoch": 0.6489162660833021, "grad_norm": 3.4748049321150383, "learning_rate": 5.799007279466111e-07, "loss": 0.5503, "step": 1409 }, { "epoch": 0.649376817017357, "grad_norm": 3.404681472569695, "learning_rate": 5.785468368313076e-07, "loss": 0.5303, "step": 1410 }, { "epoch": 0.6498373679514119, "grad_norm": 2.9485744210160107, "learning_rate": 5.77193884454833e-07, "loss": 0.5113, "step": 1411 }, { "epoch": 0.6502979188854667, "grad_norm": 2.7092540440014807, "learning_rate": 5.758418738307351e-07, "loss": 0.5, "step": 1412 }, { "epoch": 0.6507584698195216, "grad_norm": 2.853364404240714, "learning_rate": 5.74490807970463e-07, "loss": 0.5665, "step": 1413 }, { "epoch": 0.6512190207535765, "grad_norm": 3.116516600142004, "learning_rate": 5.731406898833623e-07, "loss": 0.6661, "step": 1414 }, { "epoch": 0.6516795716876314, "grad_norm": 2.8236292161466294, "learning_rate": 5.717915225766661e-07, "loss": 0.4524, "step": 1415 }, { "epoch": 0.6521401226216862, "grad_norm": 3.0310468470234246, "learning_rate": 5.704433090554911e-07, "loss": 0.462, "step": 1416 }, { "epoch": 0.652600673555741, "grad_norm": 3.24038506868089, "learning_rate": 5.690960523228294e-07, "loss": 0.6213, "step": 1417 }, { "epoch": 0.6530612244897959, "grad_norm": 3.0890141559456157, "learning_rate": 5.677497553795409e-07, "loss": 0.4549, "step": 1418 }, { "epoch": 0.6535217754238508, "grad_norm": 2.7880931209315842, "learning_rate": 5.664044212243489e-07, "loss": 0.4132, "step": 1419 }, { "epoch": 0.6539823263579057, "grad_norm": 2.9558254074326427, "learning_rate": 5.650600528538318e-07, "loss": 0.494, "step": 1420 }, { "epoch": 0.6544428772919605, "grad_norm": 2.914896993211941, "learning_rate": 5.637166532624163e-07, "loss": 0.5937, "step": 1421 }, { "epoch": 0.6549034282260153, "grad_norm": 3.292628338829398, "learning_rate": 5.623742254423718e-07, "loss": 0.5269, "step": 1422 }, { "epoch": 0.6553639791600703, "grad_norm": 3.2242310456562673, "learning_rate": 5.610327723838037e-07, "loss": 0.7023, "step": 1423 }, { "epoch": 0.6558245300941251, "grad_norm": 3.240542534935448, "learning_rate": 5.596922970746449e-07, "loss": 0.5462, "step": 1424 }, { "epoch": 0.65628508102818, "grad_norm": 2.949873633276392, "learning_rate": 5.583528025006513e-07, "loss": 0.4527, "step": 1425 }, { "epoch": 0.6567456319622348, "grad_norm": 3.45439820707799, "learning_rate": 5.570142916453944e-07, "loss": 0.5727, "step": 1426 }, { "epoch": 0.6572061828962897, "grad_norm": 3.0037709983691574, "learning_rate": 5.556767674902548e-07, "loss": 0.5799, "step": 1427 }, { "epoch": 0.6576667338303446, "grad_norm": 2.846902934654531, "learning_rate": 5.54340233014414e-07, "loss": 0.5848, "step": 1428 }, { "epoch": 0.6581272847643994, "grad_norm": 3.193107289231858, "learning_rate": 5.530046911948505e-07, "loss": 0.4999, "step": 1429 }, { "epoch": 0.6585878356984542, "grad_norm": 3.396826464978981, "learning_rate": 5.516701450063316e-07, "loss": 0.4476, "step": 1430 }, { "epoch": 0.6590483866325091, "grad_norm": 3.8083097104308505, "learning_rate": 5.503365974214058e-07, "loss": 0.6308, "step": 1431 }, { "epoch": 0.659508937566564, "grad_norm": 2.9191556934998615, "learning_rate": 5.490040514103995e-07, "loss": 0.4147, "step": 1432 }, { "epoch": 0.6599694885006189, "grad_norm": 2.8784243722308553, "learning_rate": 5.476725099414062e-07, "loss": 0.5174, "step": 1433 }, { "epoch": 0.6604300394346737, "grad_norm": 3.350266406134782, "learning_rate": 5.463419759802817e-07, "loss": 0.6058, "step": 1434 }, { "epoch": 0.6608905903687285, "grad_norm": 3.1483929420773555, "learning_rate": 5.450124524906401e-07, "loss": 0.518, "step": 1435 }, { "epoch": 0.6613511413027835, "grad_norm": 3.4936502869433137, "learning_rate": 5.436839424338425e-07, "loss": 0.5507, "step": 1436 }, { "epoch": 0.6618116922368383, "grad_norm": 3.1219573705478125, "learning_rate": 5.423564487689929e-07, "loss": 0.4988, "step": 1437 }, { "epoch": 0.6622722431708932, "grad_norm": 3.582117403088983, "learning_rate": 5.410299744529332e-07, "loss": 0.5612, "step": 1438 }, { "epoch": 0.662732794104948, "grad_norm": 3.5187886191292295, "learning_rate": 5.397045224402326e-07, "loss": 0.5867, "step": 1439 }, { "epoch": 0.663193345039003, "grad_norm": 2.963457737411872, "learning_rate": 5.383800956831846e-07, "loss": 0.4929, "step": 1440 }, { "epoch": 0.6636538959730578, "grad_norm": 3.4468724975782883, "learning_rate": 5.370566971317989e-07, "loss": 0.6108, "step": 1441 }, { "epoch": 0.6641144469071126, "grad_norm": 3.1533494269546267, "learning_rate": 5.357343297337943e-07, "loss": 0.5902, "step": 1442 }, { "epoch": 0.6645749978411675, "grad_norm": 3.3732802906601522, "learning_rate": 5.344129964345934e-07, "loss": 0.4952, "step": 1443 }, { "epoch": 0.6650355487752223, "grad_norm": 2.9687293866646747, "learning_rate": 5.330927001773154e-07, "loss": 0.5435, "step": 1444 }, { "epoch": 0.6654960997092773, "grad_norm": 3.480755832372405, "learning_rate": 5.317734439027699e-07, "loss": 0.564, "step": 1445 }, { "epoch": 0.6659566506433321, "grad_norm": 2.9931832336561564, "learning_rate": 5.304552305494492e-07, "loss": 0.4201, "step": 1446 }, { "epoch": 0.6664172015773869, "grad_norm": 2.85584265239967, "learning_rate": 5.291380630535231e-07, "loss": 0.5615, "step": 1447 }, { "epoch": 0.6668777525114418, "grad_norm": 2.8281402193294056, "learning_rate": 5.278219443488328e-07, "loss": 0.5043, "step": 1448 }, { "epoch": 0.6673383034454967, "grad_norm": 3.0403775949677923, "learning_rate": 5.265068773668812e-07, "loss": 0.5079, "step": 1449 }, { "epoch": 0.6677988543795516, "grad_norm": 3.3174054936344897, "learning_rate": 5.251928650368307e-07, "loss": 0.5716, "step": 1450 }, { "epoch": 0.6682594053136064, "grad_norm": 3.081361444525388, "learning_rate": 5.238799102854941e-07, "loss": 0.4981, "step": 1451 }, { "epoch": 0.6687199562476612, "grad_norm": 3.1883940258418684, "learning_rate": 5.225680160373275e-07, "loss": 0.4924, "step": 1452 }, { "epoch": 0.6691805071817162, "grad_norm": 3.311273891600756, "learning_rate": 5.212571852144261e-07, "loss": 0.5837, "step": 1453 }, { "epoch": 0.669641058115771, "grad_norm": 3.138972108234792, "learning_rate": 5.199474207365162e-07, "loss": 0.5111, "step": 1454 }, { "epoch": 0.6701016090498259, "grad_norm": 2.749902984155162, "learning_rate": 5.186387255209481e-07, "loss": 0.4912, "step": 1455 }, { "epoch": 0.6705621599838807, "grad_norm": 3.201073779062438, "learning_rate": 5.173311024826915e-07, "loss": 0.5993, "step": 1456 }, { "epoch": 0.6710227109179355, "grad_norm": 3.054277406726679, "learning_rate": 5.160245545343274e-07, "loss": 0.5429, "step": 1457 }, { "epoch": 0.6714832618519905, "grad_norm": 3.262974854228882, "learning_rate": 5.147190845860426e-07, "loss": 0.67, "step": 1458 }, { "epoch": 0.6719438127860453, "grad_norm": 3.2252159866634584, "learning_rate": 5.134146955456218e-07, "loss": 0.4783, "step": 1459 }, { "epoch": 0.6724043637201002, "grad_norm": 3.3051119530232946, "learning_rate": 5.121113903184431e-07, "loss": 0.4966, "step": 1460 }, { "epoch": 0.672864914654155, "grad_norm": 3.064671697196766, "learning_rate": 5.108091718074705e-07, "loss": 0.6525, "step": 1461 }, { "epoch": 0.6733254655882099, "grad_norm": 2.857963094780648, "learning_rate": 5.095080429132459e-07, "loss": 0.5369, "step": 1462 }, { "epoch": 0.6737860165222648, "grad_norm": 3.310426657500392, "learning_rate": 5.082080065338872e-07, "loss": 0.5112, "step": 1463 }, { "epoch": 0.6742465674563196, "grad_norm": 3.225512724763038, "learning_rate": 5.069090655650762e-07, "loss": 0.581, "step": 1464 }, { "epoch": 0.6747071183903744, "grad_norm": 3.2679785563098784, "learning_rate": 5.05611222900055e-07, "loss": 0.5522, "step": 1465 }, { "epoch": 0.6751676693244294, "grad_norm": 3.446697533304695, "learning_rate": 5.043144814296215e-07, "loss": 0.5458, "step": 1466 }, { "epoch": 0.6756282202584842, "grad_norm": 3.180139270812277, "learning_rate": 5.030188440421185e-07, "loss": 0.4763, "step": 1467 }, { "epoch": 0.6760887711925391, "grad_norm": 3.448958257550766, "learning_rate": 5.017243136234298e-07, "loss": 0.5669, "step": 1468 }, { "epoch": 0.6765493221265939, "grad_norm": 3.5331462163297243, "learning_rate": 5.004308930569757e-07, "loss": 0.6389, "step": 1469 }, { "epoch": 0.6770098730606487, "grad_norm": 2.9967238157665124, "learning_rate": 4.991385852237017e-07, "loss": 0.5617, "step": 1470 }, { "epoch": 0.6774704239947037, "grad_norm": 2.7688203871377235, "learning_rate": 4.978473930020767e-07, "loss": 0.4754, "step": 1471 }, { "epoch": 0.6779309749287585, "grad_norm": 3.266647952459185, "learning_rate": 4.965573192680841e-07, "loss": 0.5109, "step": 1472 }, { "epoch": 0.6783915258628134, "grad_norm": 2.825024520007373, "learning_rate": 4.952683668952152e-07, "loss": 0.4731, "step": 1473 }, { "epoch": 0.6788520767968682, "grad_norm": 3.426822674267336, "learning_rate": 4.939805387544649e-07, "loss": 0.5494, "step": 1474 }, { "epoch": 0.6793126277309232, "grad_norm": 3.4379075787612448, "learning_rate": 4.926938377143232e-07, "loss": 0.6234, "step": 1475 }, { "epoch": 0.679773178664978, "grad_norm": 3.037669101666441, "learning_rate": 4.914082666407704e-07, "loss": 0.4952, "step": 1476 }, { "epoch": 0.6802337295990328, "grad_norm": 3.1953172155925977, "learning_rate": 4.901238283972685e-07, "loss": 0.4285, "step": 1477 }, { "epoch": 0.6806942805330877, "grad_norm": 3.2567976107387646, "learning_rate": 4.888405258447576e-07, "loss": 0.5178, "step": 1478 }, { "epoch": 0.6811548314671426, "grad_norm": 3.0243656981433, "learning_rate": 4.875583618416481e-07, "loss": 0.5382, "step": 1479 }, { "epoch": 0.6816153824011975, "grad_norm": 2.995186764899891, "learning_rate": 4.862773392438131e-07, "loss": 0.5426, "step": 1480 }, { "epoch": 0.6820759333352523, "grad_norm": 3.6636504352304686, "learning_rate": 4.849974609045848e-07, "loss": 0.6404, "step": 1481 }, { "epoch": 0.6825364842693071, "grad_norm": 2.692736667622777, "learning_rate": 4.837187296747463e-07, "loss": 0.5281, "step": 1482 }, { "epoch": 0.682997035203362, "grad_norm": 2.7042483128746064, "learning_rate": 4.82441148402525e-07, "loss": 0.575, "step": 1483 }, { "epoch": 0.6834575861374169, "grad_norm": 3.0352564028625952, "learning_rate": 4.811647199335877e-07, "loss": 0.5098, "step": 1484 }, { "epoch": 0.6839181370714718, "grad_norm": 3.127898271916526, "learning_rate": 4.798894471110336e-07, "loss": 0.5613, "step": 1485 }, { "epoch": 0.6843786880055266, "grad_norm": 2.950470950095703, "learning_rate": 4.786153327753864e-07, "loss": 0.5638, "step": 1486 }, { "epoch": 0.6848392389395814, "grad_norm": 2.9327112511286777, "learning_rate": 4.773423797645911e-07, "loss": 0.5308, "step": 1487 }, { "epoch": 0.6852997898736364, "grad_norm": 3.2221808265567917, "learning_rate": 4.76070590914005e-07, "loss": 0.5875, "step": 1488 }, { "epoch": 0.6857603408076912, "grad_norm": 2.9032838510402383, "learning_rate": 4.747999690563932e-07, "loss": 0.4657, "step": 1489 }, { "epoch": 0.686220891741746, "grad_norm": 3.253618175545866, "learning_rate": 4.7353051702191994e-07, "loss": 0.5498, "step": 1490 }, { "epoch": 0.6866814426758009, "grad_norm": 3.3055447616805296, "learning_rate": 4.7226223763814545e-07, "loss": 0.5192, "step": 1491 }, { "epoch": 0.6871419936098558, "grad_norm": 3.662312307552012, "learning_rate": 4.709951337300174e-07, "loss": 0.5508, "step": 1492 }, { "epoch": 0.6876025445439107, "grad_norm": 3.0362093035408795, "learning_rate": 4.697292081198646e-07, "loss": 0.4648, "step": 1493 }, { "epoch": 0.6880630954779655, "grad_norm": 3.1492972902080627, "learning_rate": 4.684644636273922e-07, "loss": 0.4502, "step": 1494 }, { "epoch": 0.6885236464120204, "grad_norm": 2.9996769191548416, "learning_rate": 4.6720090306967465e-07, "loss": 0.6546, "step": 1495 }, { "epoch": 0.6889841973460752, "grad_norm": 2.9045793925682104, "learning_rate": 4.6593852926114784e-07, "loss": 0.4884, "step": 1496 }, { "epoch": 0.6894447482801301, "grad_norm": 3.1298670395653962, "learning_rate": 4.646773450136067e-07, "loss": 0.5642, "step": 1497 }, { "epoch": 0.689905299214185, "grad_norm": 3.0495111305757163, "learning_rate": 4.634173531361947e-07, "loss": 0.5585, "step": 1498 }, { "epoch": 0.6903658501482398, "grad_norm": 3.020709936806421, "learning_rate": 4.6215855643539903e-07, "loss": 0.4491, "step": 1499 }, { "epoch": 0.6908264010822946, "grad_norm": 3.1842354060638467, "learning_rate": 4.609009577150472e-07, "loss": 0.5734, "step": 1500 }, { "epoch": 0.6912869520163496, "grad_norm": 3.4495373317520914, "learning_rate": 4.5964455977629593e-07, "loss": 0.5147, "step": 1501 }, { "epoch": 0.6917475029504044, "grad_norm": 2.8702835074826494, "learning_rate": 4.583893654176285e-07, "loss": 0.4863, "step": 1502 }, { "epoch": 0.6922080538844593, "grad_norm": 2.929782205551764, "learning_rate": 4.5713537743484754e-07, "loss": 0.4934, "step": 1503 }, { "epoch": 0.6926686048185141, "grad_norm": 3.0017796045087017, "learning_rate": 4.5588259862106725e-07, "loss": 0.5107, "step": 1504 }, { "epoch": 0.6931291557525691, "grad_norm": 3.484258951507888, "learning_rate": 4.5463103176671016e-07, "loss": 0.6145, "step": 1505 }, { "epoch": 0.6935897066866239, "grad_norm": 2.7555413280578143, "learning_rate": 4.533806796594989e-07, "loss": 0.5649, "step": 1506 }, { "epoch": 0.6940502576206787, "grad_norm": 3.3509703674324425, "learning_rate": 4.521315450844492e-07, "loss": 0.5738, "step": 1507 }, { "epoch": 0.6945108085547336, "grad_norm": 3.075909066062986, "learning_rate": 4.508836308238664e-07, "loss": 0.5215, "step": 1508 }, { "epoch": 0.6949713594887884, "grad_norm": 3.116185512270017, "learning_rate": 4.4963693965733686e-07, "loss": 0.5707, "step": 1509 }, { "epoch": 0.6954319104228434, "grad_norm": 3.301765717057608, "learning_rate": 4.483914743617235e-07, "loss": 0.6423, "step": 1510 }, { "epoch": 0.6958924613568982, "grad_norm": 3.517582404076343, "learning_rate": 4.471472377111574e-07, "loss": 0.6392, "step": 1511 }, { "epoch": 0.696353012290953, "grad_norm": 3.4096020565867193, "learning_rate": 4.459042324770338e-07, "loss": 0.59, "step": 1512 }, { "epoch": 0.6968135632250079, "grad_norm": 3.2420031543237977, "learning_rate": 4.446624614280058e-07, "loss": 0.5858, "step": 1513 }, { "epoch": 0.6972741141590628, "grad_norm": 3.3459346730805, "learning_rate": 4.4342192732997565e-07, "loss": 0.5173, "step": 1514 }, { "epoch": 0.6977346650931177, "grad_norm": 3.2862422773543147, "learning_rate": 4.4218263294609205e-07, "loss": 0.5255, "step": 1515 }, { "epoch": 0.6981952160271725, "grad_norm": 3.1669321223815614, "learning_rate": 4.4094458103674204e-07, "loss": 0.5333, "step": 1516 }, { "epoch": 0.6986557669612273, "grad_norm": 2.9341763845722686, "learning_rate": 4.397077743595444e-07, "loss": 0.5475, "step": 1517 }, { "epoch": 0.6991163178952823, "grad_norm": 3.298479093779986, "learning_rate": 4.384722156693451e-07, "loss": 0.6168, "step": 1518 }, { "epoch": 0.6995768688293371, "grad_norm": 3.774693547188722, "learning_rate": 4.3723790771821067e-07, "loss": 0.4971, "step": 1519 }, { "epoch": 0.700037419763392, "grad_norm": 3.2701266862488003, "learning_rate": 4.3600485325542047e-07, "loss": 0.5021, "step": 1520 }, { "epoch": 0.7004979706974468, "grad_norm": 2.8544733630249457, "learning_rate": 4.3477305502746275e-07, "loss": 0.5578, "step": 1521 }, { "epoch": 0.7009585216315016, "grad_norm": 2.859027758778164, "learning_rate": 4.335425157780277e-07, "loss": 0.5669, "step": 1522 }, { "epoch": 0.7014190725655566, "grad_norm": 3.1340569464788226, "learning_rate": 4.323132382480015e-07, "loss": 0.4886, "step": 1523 }, { "epoch": 0.7018796234996114, "grad_norm": 3.533977315795044, "learning_rate": 4.3108522517545866e-07, "loss": 0.6171, "step": 1524 }, { "epoch": 0.7023401744336663, "grad_norm": 3.1550549229148186, "learning_rate": 4.2985847929565865e-07, "loss": 0.5375, "step": 1525 }, { "epoch": 0.7028007253677211, "grad_norm": 2.7951674621480063, "learning_rate": 4.2863300334103837e-07, "loss": 0.4927, "step": 1526 }, { "epoch": 0.703261276301776, "grad_norm": 3.404007775142097, "learning_rate": 4.2740880004120474e-07, "loss": 0.5759, "step": 1527 }, { "epoch": 0.7037218272358309, "grad_norm": 3.0714696704935154, "learning_rate": 4.2618587212293147e-07, "loss": 0.5976, "step": 1528 }, { "epoch": 0.7041823781698857, "grad_norm": 3.6804392641687826, "learning_rate": 4.2496422231015115e-07, "loss": 0.6249, "step": 1529 }, { "epoch": 0.7046429291039406, "grad_norm": 3.2731509390374627, "learning_rate": 4.237438533239488e-07, "loss": 0.5356, "step": 1530 }, { "epoch": 0.7051034800379955, "grad_norm": 3.6052417982765563, "learning_rate": 4.2252476788255733e-07, "loss": 0.5792, "step": 1531 }, { "epoch": 0.7055640309720503, "grad_norm": 3.3733022802458446, "learning_rate": 4.213069687013505e-07, "loss": 0.6865, "step": 1532 }, { "epoch": 0.7060245819061052, "grad_norm": 2.6172790916082618, "learning_rate": 4.200904584928373e-07, "loss": 0.4629, "step": 1533 }, { "epoch": 0.70648513284016, "grad_norm": 3.3735377620802764, "learning_rate": 4.1887523996665474e-07, "loss": 0.6296, "step": 1534 }, { "epoch": 0.7069456837742149, "grad_norm": 3.5155816418818584, "learning_rate": 4.176613158295639e-07, "loss": 0.6252, "step": 1535 }, { "epoch": 0.7074062347082698, "grad_norm": 2.9319016244934795, "learning_rate": 4.164486887854424e-07, "loss": 0.6537, "step": 1536 }, { "epoch": 0.7078667856423246, "grad_norm": 3.0752439651363703, "learning_rate": 4.15237361535278e-07, "loss": 0.6219, "step": 1537 }, { "epoch": 0.7083273365763795, "grad_norm": 3.772619082368496, "learning_rate": 4.140273367771643e-07, "loss": 0.5692, "step": 1538 }, { "epoch": 0.7087878875104343, "grad_norm": 3.0735890994766457, "learning_rate": 4.1281861720629374e-07, "loss": 0.5757, "step": 1539 }, { "epoch": 0.7092484384444893, "grad_norm": 3.449289367509567, "learning_rate": 4.1161120551495023e-07, "loss": 0.6328, "step": 1540 }, { "epoch": 0.7097089893785441, "grad_norm": 3.4333027638235, "learning_rate": 4.1040510439250676e-07, "loss": 0.453, "step": 1541 }, { "epoch": 0.7101695403125989, "grad_norm": 3.4665469813554695, "learning_rate": 4.092003165254154e-07, "loss": 0.5686, "step": 1542 }, { "epoch": 0.7106300912466538, "grad_norm": 2.979020490366814, "learning_rate": 4.0799684459720295e-07, "loss": 0.459, "step": 1543 }, { "epoch": 0.7110906421807087, "grad_norm": 3.180076549332751, "learning_rate": 4.067946912884672e-07, "loss": 0.6623, "step": 1544 }, { "epoch": 0.7115511931147636, "grad_norm": 3.345275079523344, "learning_rate": 4.055938592768663e-07, "loss": 0.5705, "step": 1545 }, { "epoch": 0.7120117440488184, "grad_norm": 2.9898569750138875, "learning_rate": 4.0439435123711707e-07, "loss": 0.4446, "step": 1546 }, { "epoch": 0.7124722949828732, "grad_norm": 2.892301015122262, "learning_rate": 4.031961698409869e-07, "loss": 0.4983, "step": 1547 }, { "epoch": 0.7129328459169281, "grad_norm": 3.291501709727223, "learning_rate": 4.0199931775728767e-07, "loss": 0.6132, "step": 1548 }, { "epoch": 0.713393396850983, "grad_norm": 3.1031247090774974, "learning_rate": 4.008037976518711e-07, "loss": 0.5285, "step": 1549 }, { "epoch": 0.7138539477850379, "grad_norm": 3.5470619388484623, "learning_rate": 3.996096121876221e-07, "loss": 0.4594, "step": 1550 }, { "epoch": 0.7143144987190927, "grad_norm": 3.095992564328872, "learning_rate": 3.984167640244518e-07, "loss": 0.5343, "step": 1551 }, { "epoch": 0.7147750496531475, "grad_norm": 3.528528857914403, "learning_rate": 3.972252558192938e-07, "loss": 0.5564, "step": 1552 }, { "epoch": 0.7152356005872025, "grad_norm": 3.0649641218559673, "learning_rate": 3.960350902260967e-07, "loss": 0.4632, "step": 1553 }, { "epoch": 0.7156961515212573, "grad_norm": 3.4153270827868747, "learning_rate": 3.948462698958188e-07, "loss": 0.4675, "step": 1554 }, { "epoch": 0.7161567024553122, "grad_norm": 3.1991732501247507, "learning_rate": 3.9365879747642106e-07, "loss": 0.5517, "step": 1555 }, { "epoch": 0.716617253389367, "grad_norm": 2.9871782936902713, "learning_rate": 3.924726756128631e-07, "loss": 0.5189, "step": 1556 }, { "epoch": 0.7170778043234219, "grad_norm": 2.9744051796884334, "learning_rate": 3.912879069470966e-07, "loss": 0.537, "step": 1557 }, { "epoch": 0.7175383552574768, "grad_norm": 2.9363741803915433, "learning_rate": 3.9010449411805747e-07, "loss": 0.6124, "step": 1558 }, { "epoch": 0.7179989061915316, "grad_norm": 2.9230129555443094, "learning_rate": 3.889224397616635e-07, "loss": 0.4577, "step": 1559 }, { "epoch": 0.7184594571255865, "grad_norm": 2.9997803472056392, "learning_rate": 3.8774174651080596e-07, "loss": 0.5885, "step": 1560 }, { "epoch": 0.7189200080596413, "grad_norm": 3.121077182785591, "learning_rate": 3.865624169953439e-07, "loss": 0.617, "step": 1561 }, { "epoch": 0.7193805589936962, "grad_norm": 3.3670297246006378, "learning_rate": 3.853844538420993e-07, "loss": 0.5977, "step": 1562 }, { "epoch": 0.7198411099277511, "grad_norm": 2.842166340649367, "learning_rate": 3.8420785967485115e-07, "loss": 0.5409, "step": 1563 }, { "epoch": 0.7203016608618059, "grad_norm": 3.5406635193421656, "learning_rate": 3.83032637114328e-07, "loss": 0.6372, "step": 1564 }, { "epoch": 0.7207622117958608, "grad_norm": 3.194617429657409, "learning_rate": 3.8185878877820443e-07, "loss": 0.5469, "step": 1565 }, { "epoch": 0.7212227627299157, "grad_norm": 3.417157379310719, "learning_rate": 3.806863172810936e-07, "loss": 0.4731, "step": 1566 }, { "epoch": 0.7216833136639705, "grad_norm": 3.231247160093252, "learning_rate": 3.7951522523454214e-07, "loss": 0.6959, "step": 1567 }, { "epoch": 0.7221438645980254, "grad_norm": 3.358643739910038, "learning_rate": 3.7834551524702364e-07, "loss": 0.616, "step": 1568 }, { "epoch": 0.7226044155320802, "grad_norm": 3.159395399638094, "learning_rate": 3.7717718992393365e-07, "loss": 0.539, "step": 1569 }, { "epoch": 0.7230649664661352, "grad_norm": 3.288938398533548, "learning_rate": 3.760102518675839e-07, "loss": 0.6766, "step": 1570 }, { "epoch": 0.72352551740019, "grad_norm": 3.1929425292220484, "learning_rate": 3.748447036771949e-07, "loss": 0.5312, "step": 1571 }, { "epoch": 0.7239860683342448, "grad_norm": 3.1532440738029455, "learning_rate": 3.736805479488936e-07, "loss": 0.5694, "step": 1572 }, { "epoch": 0.7244466192682997, "grad_norm": 3.1699423213216726, "learning_rate": 3.7251778727570305e-07, "loss": 0.5722, "step": 1573 }, { "epoch": 0.7249071702023545, "grad_norm": 2.9111701929100198, "learning_rate": 3.7135642424753967e-07, "loss": 0.5001, "step": 1574 }, { "epoch": 0.7253677211364095, "grad_norm": 3.1237883324779623, "learning_rate": 3.701964614512082e-07, "loss": 0.5708, "step": 1575 }, { "epoch": 0.7258282720704643, "grad_norm": 2.7652944931913623, "learning_rate": 3.690379014703928e-07, "loss": 0.4368, "step": 1576 }, { "epoch": 0.7262888230045191, "grad_norm": 3.209037928138226, "learning_rate": 3.67880746885653e-07, "loss": 0.6249, "step": 1577 }, { "epoch": 0.726749373938574, "grad_norm": 3.0792001259828323, "learning_rate": 3.667250002744199e-07, "loss": 0.5269, "step": 1578 }, { "epoch": 0.7272099248726289, "grad_norm": 3.3870656223423428, "learning_rate": 3.6557066421098604e-07, "loss": 0.4438, "step": 1579 }, { "epoch": 0.7276704758066838, "grad_norm": 3.187784911852215, "learning_rate": 3.644177412665039e-07, "loss": 0.4357, "step": 1580 }, { "epoch": 0.7281310267407386, "grad_norm": 3.3629689440356927, "learning_rate": 3.6326623400897796e-07, "loss": 0.5516, "step": 1581 }, { "epoch": 0.7285915776747934, "grad_norm": 3.579677642223135, "learning_rate": 3.621161450032586e-07, "loss": 0.6288, "step": 1582 }, { "epoch": 0.7290521286088484, "grad_norm": 2.918537780420392, "learning_rate": 3.609674768110381e-07, "loss": 0.5887, "step": 1583 }, { "epoch": 0.7295126795429032, "grad_norm": 3.2728257749235903, "learning_rate": 3.59820231990844e-07, "loss": 0.6692, "step": 1584 }, { "epoch": 0.7299732304769581, "grad_norm": 3.1155337342093614, "learning_rate": 3.5867441309803325e-07, "loss": 0.4757, "step": 1585 }, { "epoch": 0.7304337814110129, "grad_norm": 3.017811596425641, "learning_rate": 3.5753002268478625e-07, "loss": 0.5903, "step": 1586 }, { "epoch": 0.7308943323450677, "grad_norm": 3.5070818468579708, "learning_rate": 3.5638706330010236e-07, "loss": 0.5974, "step": 1587 }, { "epoch": 0.7313548832791227, "grad_norm": 3.013090840123994, "learning_rate": 3.552455374897935e-07, "loss": 0.536, "step": 1588 }, { "epoch": 0.7318154342131775, "grad_norm": 2.7740669709707517, "learning_rate": 3.5410544779647735e-07, "loss": 0.4466, "step": 1589 }, { "epoch": 0.7322759851472324, "grad_norm": 3.437946984823077, "learning_rate": 3.529667967595742e-07, "loss": 0.5002, "step": 1590 }, { "epoch": 0.7327365360812872, "grad_norm": 2.813883495540624, "learning_rate": 3.518295869152994e-07, "loss": 0.4955, "step": 1591 }, { "epoch": 0.7331970870153421, "grad_norm": 2.8502290667648342, "learning_rate": 3.5069382079665763e-07, "loss": 0.4805, "step": 1592 }, { "epoch": 0.733657637949397, "grad_norm": 3.234497100576854, "learning_rate": 3.4955950093343857e-07, "loss": 0.7048, "step": 1593 }, { "epoch": 0.7341181888834518, "grad_norm": 3.131622303838718, "learning_rate": 3.484266298522106e-07, "loss": 0.5671, "step": 1594 }, { "epoch": 0.7345787398175067, "grad_norm": 2.875003807206262, "learning_rate": 3.472952100763141e-07, "loss": 0.5951, "step": 1595 }, { "epoch": 0.7350392907515616, "grad_norm": 2.967122454191587, "learning_rate": 3.461652441258579e-07, "loss": 0.5307, "step": 1596 }, { "epoch": 0.7354998416856164, "grad_norm": 3.501420492607783, "learning_rate": 3.450367345177122e-07, "loss": 0.5531, "step": 1597 }, { "epoch": 0.7359603926196713, "grad_norm": 3.0527813124759535, "learning_rate": 3.4390968376550367e-07, "loss": 0.5332, "step": 1598 }, { "epoch": 0.7364209435537261, "grad_norm": 2.8619147841769266, "learning_rate": 3.4278409437960865e-07, "loss": 0.5634, "step": 1599 }, { "epoch": 0.7368814944877811, "grad_norm": 3.1275902113895264, "learning_rate": 3.4165996886714944e-07, "loss": 0.5385, "step": 1600 }, { "epoch": 0.7373420454218359, "grad_norm": 2.93151492063357, "learning_rate": 3.405373097319875e-07, "loss": 0.489, "step": 1601 }, { "epoch": 0.7378025963558907, "grad_norm": 3.2966345212103354, "learning_rate": 3.3941611947471703e-07, "loss": 0.5367, "step": 1602 }, { "epoch": 0.7382631472899456, "grad_norm": 2.7068239388271516, "learning_rate": 3.3829640059266283e-07, "loss": 0.5216, "step": 1603 }, { "epoch": 0.7387236982240004, "grad_norm": 3.435870012306948, "learning_rate": 3.3717815557987027e-07, "loss": 0.5401, "step": 1604 }, { "epoch": 0.7391842491580554, "grad_norm": 3.774078430992497, "learning_rate": 3.360613869271016e-07, "loss": 0.5301, "step": 1605 }, { "epoch": 0.7396448000921102, "grad_norm": 2.974006922477261, "learning_rate": 3.349460971218332e-07, "loss": 0.5254, "step": 1606 }, { "epoch": 0.740105351026165, "grad_norm": 3.1026779731207106, "learning_rate": 3.3383228864824496e-07, "loss": 0.4061, "step": 1607 }, { "epoch": 0.7405659019602199, "grad_norm": 3.1384179646636836, "learning_rate": 3.327199639872177e-07, "loss": 0.5428, "step": 1608 }, { "epoch": 0.7410264528942748, "grad_norm": 2.860698890001966, "learning_rate": 3.316091256163288e-07, "loss": 0.5852, "step": 1609 }, { "epoch": 0.7414870038283297, "grad_norm": 2.8765636527594154, "learning_rate": 3.3049977600984304e-07, "loss": 0.4724, "step": 1610 }, { "epoch": 0.7419475547623845, "grad_norm": 3.3500780381811746, "learning_rate": 3.293919176387104e-07, "loss": 0.6346, "step": 1611 }, { "epoch": 0.7424081056964393, "grad_norm": 3.7026885508653193, "learning_rate": 3.2828555297055946e-07, "loss": 0.5841, "step": 1612 }, { "epoch": 0.7428686566304943, "grad_norm": 3.0274792259951737, "learning_rate": 3.271806844696905e-07, "loss": 0.5425, "step": 1613 }, { "epoch": 0.7433292075645491, "grad_norm": 3.746883296992905, "learning_rate": 3.260773145970723e-07, "loss": 0.6408, "step": 1614 }, { "epoch": 0.743789758498604, "grad_norm": 3.357176161663444, "learning_rate": 3.2497544581033555e-07, "loss": 0.6193, "step": 1615 }, { "epoch": 0.7442503094326588, "grad_norm": 3.59282723815335, "learning_rate": 3.2387508056376724e-07, "loss": 0.5914, "step": 1616 }, { "epoch": 0.7447108603667136, "grad_norm": 3.3394681551065184, "learning_rate": 3.2277622130830505e-07, "loss": 0.5148, "step": 1617 }, { "epoch": 0.7451714113007686, "grad_norm": 2.8985388546730535, "learning_rate": 3.216788704915327e-07, "loss": 0.5079, "step": 1618 }, { "epoch": 0.7456319622348234, "grad_norm": 3.069882169066366, "learning_rate": 3.2058303055767443e-07, "loss": 0.4949, "step": 1619 }, { "epoch": 0.7460925131688783, "grad_norm": 2.911216557138714, "learning_rate": 3.19488703947588e-07, "loss": 0.585, "step": 1620 }, { "epoch": 0.7465530641029331, "grad_norm": 3.442946480558847, "learning_rate": 3.1839589309876115e-07, "loss": 0.544, "step": 1621 }, { "epoch": 0.747013615036988, "grad_norm": 2.7674990139149847, "learning_rate": 3.1730460044530573e-07, "loss": 0.5363, "step": 1622 }, { "epoch": 0.7474741659710429, "grad_norm": 3.229630271750196, "learning_rate": 3.1621482841795124e-07, "loss": 0.6189, "step": 1623 }, { "epoch": 0.7479347169050977, "grad_norm": 3.1155759443087945, "learning_rate": 3.151265794440404e-07, "loss": 0.4911, "step": 1624 }, { "epoch": 0.7483952678391526, "grad_norm": 3.4937874237026114, "learning_rate": 3.140398559475244e-07, "loss": 0.4647, "step": 1625 }, { "epoch": 0.7488558187732075, "grad_norm": 3.2934599527181034, "learning_rate": 3.129546603489548e-07, "loss": 0.4893, "step": 1626 }, { "epoch": 0.7493163697072623, "grad_norm": 3.0506555877752595, "learning_rate": 3.1187099506548153e-07, "loss": 0.5267, "step": 1627 }, { "epoch": 0.7497769206413172, "grad_norm": 3.5830543718291015, "learning_rate": 3.1078886251084525e-07, "loss": 0.5325, "step": 1628 }, { "epoch": 0.750237471575372, "grad_norm": 3.088278820265878, "learning_rate": 3.0970826509537304e-07, "loss": 0.5628, "step": 1629 }, { "epoch": 0.7506980225094269, "grad_norm": 3.1594316317748294, "learning_rate": 3.0862920522597167e-07, "loss": 0.4871, "step": 1630 }, { "epoch": 0.7511585734434818, "grad_norm": 2.8769280379230735, "learning_rate": 3.075516853061244e-07, "loss": 0.4936, "step": 1631 }, { "epoch": 0.7516191243775366, "grad_norm": 3.4881331778008176, "learning_rate": 3.0647570773588403e-07, "loss": 0.5471, "step": 1632 }, { "epoch": 0.7520796753115915, "grad_norm": 3.006655258177075, "learning_rate": 3.0540127491186727e-07, "loss": 0.4765, "step": 1633 }, { "epoch": 0.7525402262456463, "grad_norm": 3.4773351804629296, "learning_rate": 3.043283892272508e-07, "loss": 0.5993, "step": 1634 }, { "epoch": 0.7530007771797013, "grad_norm": 2.9508457470911256, "learning_rate": 3.0325705307176564e-07, "loss": 0.444, "step": 1635 }, { "epoch": 0.7534613281137561, "grad_norm": 3.593179054371365, "learning_rate": 3.0218726883168955e-07, "loss": 0.5524, "step": 1636 }, { "epoch": 0.7539218790478109, "grad_norm": 2.985359000550235, "learning_rate": 3.011190388898464e-07, "loss": 0.4384, "step": 1637 }, { "epoch": 0.7543824299818658, "grad_norm": 3.0881433691468723, "learning_rate": 3.0005236562559566e-07, "loss": 0.5832, "step": 1638 }, { "epoch": 0.7548429809159207, "grad_norm": 2.64976112453638, "learning_rate": 2.989872514148298e-07, "loss": 0.5254, "step": 1639 }, { "epoch": 0.7553035318499756, "grad_norm": 3.178227072350045, "learning_rate": 2.9792369862997046e-07, "loss": 0.4896, "step": 1640 }, { "epoch": 0.7557640827840304, "grad_norm": 3.1094527462428543, "learning_rate": 2.9686170963995915e-07, "loss": 0.5024, "step": 1641 }, { "epoch": 0.7562246337180852, "grad_norm": 2.830372620158942, "learning_rate": 2.958012868102553e-07, "loss": 0.5466, "step": 1642 }, { "epoch": 0.7566851846521401, "grad_norm": 3.5892656567533057, "learning_rate": 2.9474243250283035e-07, "loss": 0.5726, "step": 1643 }, { "epoch": 0.757145735586195, "grad_norm": 3.1400502525766463, "learning_rate": 2.936851490761606e-07, "loss": 0.6686, "step": 1644 }, { "epoch": 0.7576062865202499, "grad_norm": 3.611911685117865, "learning_rate": 2.926294388852246e-07, "loss": 0.531, "step": 1645 }, { "epoch": 0.7580668374543047, "grad_norm": 2.9071023212444076, "learning_rate": 2.9157530428149677e-07, "loss": 0.446, "step": 1646 }, { "epoch": 0.7585273883883595, "grad_norm": 3.573059215096367, "learning_rate": 2.9052274761294094e-07, "loss": 0.6017, "step": 1647 }, { "epoch": 0.7589879393224145, "grad_norm": 2.926410495931059, "learning_rate": 2.8947177122400737e-07, "loss": 0.4499, "step": 1648 }, { "epoch": 0.7594484902564693, "grad_norm": 2.9077271228177906, "learning_rate": 2.8842237745562583e-07, "loss": 0.4628, "step": 1649 }, { "epoch": 0.7599090411905242, "grad_norm": 3.128424023474016, "learning_rate": 2.873745686452017e-07, "loss": 0.5119, "step": 1650 }, { "epoch": 0.760369592124579, "grad_norm": 3.2061268862774464, "learning_rate": 2.863283471266088e-07, "loss": 0.5419, "step": 1651 }, { "epoch": 0.760830143058634, "grad_norm": 3.176712368087586, "learning_rate": 2.852837152301867e-07, "loss": 0.575, "step": 1652 }, { "epoch": 0.7612906939926888, "grad_norm": 3.002968313659636, "learning_rate": 2.8424067528273374e-07, "loss": 0.5412, "step": 1653 }, { "epoch": 0.7617512449267436, "grad_norm": 3.3131721641797522, "learning_rate": 2.83199229607502e-07, "loss": 0.5517, "step": 1654 }, { "epoch": 0.7622117958607985, "grad_norm": 3.0397564537569806, "learning_rate": 2.821593805241932e-07, "loss": 0.4976, "step": 1655 }, { "epoch": 0.7626723467948533, "grad_norm": 3.3953414466743714, "learning_rate": 2.811211303489527e-07, "loss": 0.5132, "step": 1656 }, { "epoch": 0.7631328977289082, "grad_norm": 3.2736894751575525, "learning_rate": 2.8008448139436367e-07, "loss": 0.5464, "step": 1657 }, { "epoch": 0.7635934486629631, "grad_norm": 3.1651926004830915, "learning_rate": 2.7904943596944373e-07, "loss": 0.5585, "step": 1658 }, { "epoch": 0.7640539995970179, "grad_norm": 3.036581859418946, "learning_rate": 2.7801599637963893e-07, "loss": 0.5441, "step": 1659 }, { "epoch": 0.7645145505310728, "grad_norm": 2.625544113481615, "learning_rate": 2.769841649268171e-07, "loss": 0.5043, "step": 1660 }, { "epoch": 0.7649751014651277, "grad_norm": 3.165210415457858, "learning_rate": 2.759539439092655e-07, "loss": 0.5323, "step": 1661 }, { "epoch": 0.7654356523991825, "grad_norm": 2.968389660757054, "learning_rate": 2.7492533562168407e-07, "loss": 0.5542, "step": 1662 }, { "epoch": 0.7658962033332374, "grad_norm": 3.28366116605155, "learning_rate": 2.738983423551804e-07, "loss": 0.4676, "step": 1663 }, { "epoch": 0.7663567542672922, "grad_norm": 3.0790653400040715, "learning_rate": 2.7287296639726443e-07, "loss": 0.5762, "step": 1664 }, { "epoch": 0.7668173052013472, "grad_norm": 2.8472233247954795, "learning_rate": 2.7184921003184424e-07, "loss": 0.5132, "step": 1665 }, { "epoch": 0.767277856135402, "grad_norm": 3.173379604510871, "learning_rate": 2.7082707553922067e-07, "loss": 0.5762, "step": 1666 }, { "epoch": 0.7677384070694568, "grad_norm": 3.3239774867727268, "learning_rate": 2.698065651960809e-07, "loss": 0.5437, "step": 1667 }, { "epoch": 0.7681989580035117, "grad_norm": 3.004069248804067, "learning_rate": 2.687876812754963e-07, "loss": 0.5013, "step": 1668 }, { "epoch": 0.7686595089375665, "grad_norm": 2.848925751252902, "learning_rate": 2.67770426046914e-07, "loss": 0.3852, "step": 1669 }, { "epoch": 0.7691200598716215, "grad_norm": 3.47280346861256, "learning_rate": 2.6675480177615326e-07, "loss": 0.6775, "step": 1670 }, { "epoch": 0.7695806108056763, "grad_norm": 3.2787012091514938, "learning_rate": 2.6574081072540264e-07, "loss": 0.575, "step": 1671 }, { "epoch": 0.7700411617397311, "grad_norm": 2.7278525194039727, "learning_rate": 2.647284551532104e-07, "loss": 0.4094, "step": 1672 }, { "epoch": 0.770501712673786, "grad_norm": 3.4300818176528063, "learning_rate": 2.6371773731448357e-07, "loss": 0.5578, "step": 1673 }, { "epoch": 0.7709622636078409, "grad_norm": 2.968395762450879, "learning_rate": 2.6270865946048084e-07, "loss": 0.5684, "step": 1674 }, { "epoch": 0.7714228145418958, "grad_norm": 2.8747901281611172, "learning_rate": 2.617012238388077e-07, "loss": 0.4909, "step": 1675 }, { "epoch": 0.7718833654759506, "grad_norm": 3.1871301141813384, "learning_rate": 2.60695432693412e-07, "loss": 0.5484, "step": 1676 }, { "epoch": 0.7723439164100054, "grad_norm": 3.2847684133576855, "learning_rate": 2.596912882645792e-07, "loss": 0.5747, "step": 1677 }, { "epoch": 0.7728044673440604, "grad_norm": 2.6955957814746707, "learning_rate": 2.5868879278892597e-07, "loss": 0.4021, "step": 1678 }, { "epoch": 0.7732650182781152, "grad_norm": 2.5964611847982786, "learning_rate": 2.576879484993968e-07, "loss": 0.5846, "step": 1679 }, { "epoch": 0.7737255692121701, "grad_norm": 3.0346764723083868, "learning_rate": 2.56688757625258e-07, "loss": 0.4852, "step": 1680 }, { "epoch": 0.7741861201462249, "grad_norm": 3.172623244649948, "learning_rate": 2.5569122239209364e-07, "loss": 0.5507, "step": 1681 }, { "epoch": 0.7746466710802797, "grad_norm": 3.513693571783645, "learning_rate": 2.54695345021799e-07, "loss": 0.6611, "step": 1682 }, { "epoch": 0.7751072220143347, "grad_norm": 2.9204689205282337, "learning_rate": 2.537011277325777e-07, "loss": 0.4998, "step": 1683 }, { "epoch": 0.7755677729483895, "grad_norm": 2.8688540667247437, "learning_rate": 2.527085727389354e-07, "loss": 0.5582, "step": 1684 }, { "epoch": 0.7760283238824444, "grad_norm": 3.254940468026647, "learning_rate": 2.5171768225167465e-07, "loss": 0.5264, "step": 1685 }, { "epoch": 0.7764888748164992, "grad_norm": 3.319619529120737, "learning_rate": 2.5072845847789126e-07, "loss": 0.5844, "step": 1686 }, { "epoch": 0.7769494257505541, "grad_norm": 2.711029642886499, "learning_rate": 2.4974090362096843e-07, "loss": 0.4466, "step": 1687 }, { "epoch": 0.777409976684609, "grad_norm": 2.9892514572487947, "learning_rate": 2.487550198805715e-07, "loss": 0.5246, "step": 1688 }, { "epoch": 0.7778705276186638, "grad_norm": 3.2361104141741213, "learning_rate": 2.4777080945264416e-07, "loss": 0.5495, "step": 1689 }, { "epoch": 0.7783310785527187, "grad_norm": 2.778603000630699, "learning_rate": 2.467882745294031e-07, "loss": 0.43, "step": 1690 }, { "epoch": 0.7787916294867736, "grad_norm": 2.9641786432879442, "learning_rate": 2.458074172993324e-07, "loss": 0.5534, "step": 1691 }, { "epoch": 0.7792521804208284, "grad_norm": 3.0337502563475276, "learning_rate": 2.4482823994717974e-07, "loss": 0.556, "step": 1692 }, { "epoch": 0.7797127313548833, "grad_norm": 3.0752220435659616, "learning_rate": 2.4385074465395084e-07, "loss": 0.536, "step": 1693 }, { "epoch": 0.7801732822889381, "grad_norm": 3.1565462424883246, "learning_rate": 2.4287493359690534e-07, "loss": 0.4882, "step": 1694 }, { "epoch": 0.780633833222993, "grad_norm": 3.0427946667754777, "learning_rate": 2.4190080894955054e-07, "loss": 0.4546, "step": 1695 }, { "epoch": 0.7810943841570479, "grad_norm": 3.2264986971076155, "learning_rate": 2.4092837288163805e-07, "loss": 0.4948, "step": 1696 }, { "epoch": 0.7815549350911027, "grad_norm": 3.35844053135339, "learning_rate": 2.399576275591586e-07, "loss": 0.5087, "step": 1697 }, { "epoch": 0.7820154860251576, "grad_norm": 3.0112899189235116, "learning_rate": 2.389885751443358e-07, "loss": 0.5081, "step": 1698 }, { "epoch": 0.7824760369592124, "grad_norm": 3.2352717237579722, "learning_rate": 2.3802121779562446e-07, "loss": 0.6003, "step": 1699 }, { "epoch": 0.7829365878932674, "grad_norm": 2.671473345514583, "learning_rate": 2.3705555766770203e-07, "loss": 0.466, "step": 1700 }, { "epoch": 0.7833971388273222, "grad_norm": 3.574193258639437, "learning_rate": 2.3609159691146575e-07, "loss": 0.5839, "step": 1701 }, { "epoch": 0.783857689761377, "grad_norm": 3.051925522352705, "learning_rate": 2.3512933767402942e-07, "loss": 0.6309, "step": 1702 }, { "epoch": 0.7843182406954319, "grad_norm": 3.5580346934856686, "learning_rate": 2.3416878209871493e-07, "loss": 0.5154, "step": 1703 }, { "epoch": 0.7847787916294868, "grad_norm": 3.2002268979972905, "learning_rate": 2.3320993232504993e-07, "loss": 0.4721, "step": 1704 }, { "epoch": 0.7852393425635417, "grad_norm": 2.9878864928285007, "learning_rate": 2.3225279048876367e-07, "loss": 0.5072, "step": 1705 }, { "epoch": 0.7856998934975965, "grad_norm": 3.1381323142253685, "learning_rate": 2.312973587217798e-07, "loss": 0.6361, "step": 1706 }, { "epoch": 0.7861604444316513, "grad_norm": 2.833292804782264, "learning_rate": 2.3034363915221378e-07, "loss": 0.4536, "step": 1707 }, { "epoch": 0.7866209953657062, "grad_norm": 3.1215024593062015, "learning_rate": 2.2939163390436732e-07, "loss": 0.4223, "step": 1708 }, { "epoch": 0.7870815462997611, "grad_norm": 3.311510890413361, "learning_rate": 2.2844134509872292e-07, "loss": 0.5819, "step": 1709 }, { "epoch": 0.787542097233816, "grad_norm": 3.040556067928825, "learning_rate": 2.2749277485194085e-07, "loss": 0.5059, "step": 1710 }, { "epoch": 0.7880026481678708, "grad_norm": 2.9822721827674985, "learning_rate": 2.26545925276853e-07, "loss": 0.4199, "step": 1711 }, { "epoch": 0.7884631991019256, "grad_norm": 3.1686788844095637, "learning_rate": 2.2560079848245905e-07, "loss": 0.4875, "step": 1712 }, { "epoch": 0.7889237500359806, "grad_norm": 3.3382646126754154, "learning_rate": 2.2465739657392057e-07, "loss": 0.5694, "step": 1713 }, { "epoch": 0.7893843009700354, "grad_norm": 2.9076693356746084, "learning_rate": 2.2371572165255792e-07, "loss": 0.4706, "step": 1714 }, { "epoch": 0.7898448519040903, "grad_norm": 3.925072525348147, "learning_rate": 2.2277577581584473e-07, "loss": 0.5798, "step": 1715 }, { "epoch": 0.7903054028381451, "grad_norm": 3.484998071545751, "learning_rate": 2.218375611574027e-07, "loss": 0.5484, "step": 1716 }, { "epoch": 0.7907659537722, "grad_norm": 3.301699935752327, "learning_rate": 2.2090107976699802e-07, "loss": 0.5115, "step": 1717 }, { "epoch": 0.7912265047062549, "grad_norm": 3.042551513495106, "learning_rate": 2.1996633373053653e-07, "loss": 0.579, "step": 1718 }, { "epoch": 0.7916870556403097, "grad_norm": 4.004445646052093, "learning_rate": 2.190333251300578e-07, "loss": 0.5797, "step": 1719 }, { "epoch": 0.7921476065743646, "grad_norm": 3.0220642732410457, "learning_rate": 2.1810205604373233e-07, "loss": 0.5784, "step": 1720 }, { "epoch": 0.7926081575084194, "grad_norm": 3.1050063154048204, "learning_rate": 2.171725285458559e-07, "loss": 0.4233, "step": 1721 }, { "epoch": 0.7930687084424743, "grad_norm": 2.8115340027844966, "learning_rate": 2.162447447068444e-07, "loss": 0.5435, "step": 1722 }, { "epoch": 0.7935292593765292, "grad_norm": 2.9604984251953526, "learning_rate": 2.1531870659323082e-07, "loss": 0.5504, "step": 1723 }, { "epoch": 0.793989810310584, "grad_norm": 3.2405218813980166, "learning_rate": 2.1439441626765943e-07, "loss": 0.5433, "step": 1724 }, { "epoch": 0.7944503612446389, "grad_norm": 3.1770006678350655, "learning_rate": 2.1347187578888158e-07, "loss": 0.6119, "step": 1725 }, { "epoch": 0.7949109121786938, "grad_norm": 3.5617997893715945, "learning_rate": 2.1255108721175065e-07, "loss": 0.5371, "step": 1726 }, { "epoch": 0.7953714631127486, "grad_norm": 3.6514613591140317, "learning_rate": 2.1163205258721806e-07, "loss": 0.6407, "step": 1727 }, { "epoch": 0.7958320140468035, "grad_norm": 3.0047824295231127, "learning_rate": 2.1071477396232894e-07, "loss": 0.488, "step": 1728 }, { "epoch": 0.7962925649808583, "grad_norm": 2.8092385249228293, "learning_rate": 2.097992533802163e-07, "loss": 0.5019, "step": 1729 }, { "epoch": 0.7967531159149133, "grad_norm": 3.173996181660635, "learning_rate": 2.0888549288009804e-07, "loss": 0.6845, "step": 1730 }, { "epoch": 0.7972136668489681, "grad_norm": 2.8459418296608145, "learning_rate": 2.0797349449727163e-07, "loss": 0.5215, "step": 1731 }, { "epoch": 0.7976742177830229, "grad_norm": 2.867902063042974, "learning_rate": 2.070632602631086e-07, "loss": 0.5121, "step": 1732 }, { "epoch": 0.7981347687170778, "grad_norm": 3.218011225763629, "learning_rate": 2.0615479220505293e-07, "loss": 0.5774, "step": 1733 }, { "epoch": 0.7985953196511326, "grad_norm": 3.037535439229652, "learning_rate": 2.05248092346613e-07, "loss": 0.5455, "step": 1734 }, { "epoch": 0.7990558705851876, "grad_norm": 3.44155141665967, "learning_rate": 2.0434316270735875e-07, "loss": 0.3846, "step": 1735 }, { "epoch": 0.7995164215192424, "grad_norm": 3.251900855648779, "learning_rate": 2.0344000530291872e-07, "loss": 0.4974, "step": 1736 }, { "epoch": 0.7999769724532972, "grad_norm": 3.3912040917359167, "learning_rate": 2.025386221449722e-07, "loss": 0.5486, "step": 1737 }, { "epoch": 0.8004375233873521, "grad_norm": 3.289387489843882, "learning_rate": 2.0163901524124771e-07, "loss": 0.5502, "step": 1738 }, { "epoch": 0.800898074321407, "grad_norm": 3.3531542710146884, "learning_rate": 2.0074118659551697e-07, "loss": 0.4939, "step": 1739 }, { "epoch": 0.8013586252554619, "grad_norm": 3.0019863613387643, "learning_rate": 1.9984513820759052e-07, "loss": 0.4646, "step": 1740 }, { "epoch": 0.8018191761895167, "grad_norm": 2.9999237180628726, "learning_rate": 1.9895087207331417e-07, "loss": 0.5051, "step": 1741 }, { "epoch": 0.8022797271235715, "grad_norm": 3.440811419447699, "learning_rate": 1.980583901845636e-07, "loss": 0.5719, "step": 1742 }, { "epoch": 0.8027402780576265, "grad_norm": 2.954781327967473, "learning_rate": 1.9716769452924065e-07, "loss": 0.4992, "step": 1743 }, { "epoch": 0.8032008289916813, "grad_norm": 2.9449328433082433, "learning_rate": 1.9627878709126778e-07, "loss": 0.5873, "step": 1744 }, { "epoch": 0.8036613799257362, "grad_norm": 2.9550948833942914, "learning_rate": 1.9539166985058508e-07, "loss": 0.5085, "step": 1745 }, { "epoch": 0.804121930859791, "grad_norm": 3.0968335366037256, "learning_rate": 1.945063447831452e-07, "loss": 0.4837, "step": 1746 }, { "epoch": 0.8045824817938458, "grad_norm": 2.933117571611147, "learning_rate": 1.936228138609084e-07, "loss": 0.5462, "step": 1747 }, { "epoch": 0.8050430327279008, "grad_norm": 3.652807035479544, "learning_rate": 1.92741079051839e-07, "loss": 0.5419, "step": 1748 }, { "epoch": 0.8055035836619556, "grad_norm": 2.906573757054981, "learning_rate": 1.9186114231990104e-07, "loss": 0.5128, "step": 1749 }, { "epoch": 0.8059641345960105, "grad_norm": 2.947586128595587, "learning_rate": 1.9098300562505264e-07, "loss": 0.4349, "step": 1750 }, { "epoch": 0.8064246855300653, "grad_norm": 3.0454260234507924, "learning_rate": 1.901066709232434e-07, "loss": 0.6054, "step": 1751 }, { "epoch": 0.8068852364641202, "grad_norm": 3.411169435801757, "learning_rate": 1.8923214016640898e-07, "loss": 0.4776, "step": 1752 }, { "epoch": 0.8073457873981751, "grad_norm": 3.2706182841611278, "learning_rate": 1.8835941530246657e-07, "loss": 0.5874, "step": 1753 }, { "epoch": 0.8078063383322299, "grad_norm": 3.0129377224539513, "learning_rate": 1.8748849827531133e-07, "loss": 0.4817, "step": 1754 }, { "epoch": 0.8082668892662848, "grad_norm": 3.185149212645243, "learning_rate": 1.866193910248115e-07, "loss": 0.4825, "step": 1755 }, { "epoch": 0.8087274402003397, "grad_norm": 3.343736216544324, "learning_rate": 1.857520954868047e-07, "loss": 0.5353, "step": 1756 }, { "epoch": 0.8091879911343945, "grad_norm": 2.7806290534872526, "learning_rate": 1.848866135930922e-07, "loss": 0.5085, "step": 1757 }, { "epoch": 0.8096485420684494, "grad_norm": 3.0440711729049656, "learning_rate": 1.8402294727143642e-07, "loss": 0.4974, "step": 1758 }, { "epoch": 0.8101090930025042, "grad_norm": 3.084951995273267, "learning_rate": 1.831610984455557e-07, "loss": 0.5661, "step": 1759 }, { "epoch": 0.8105696439365591, "grad_norm": 3.0500734438430106, "learning_rate": 1.8230106903511965e-07, "loss": 0.4579, "step": 1760 }, { "epoch": 0.811030194870614, "grad_norm": 3.2959846911138304, "learning_rate": 1.814428609557458e-07, "loss": 0.396, "step": 1761 }, { "epoch": 0.8114907458046688, "grad_norm": 3.4228830494829583, "learning_rate": 1.805864761189949e-07, "loss": 0.6473, "step": 1762 }, { "epoch": 0.8119512967387237, "grad_norm": 3.5275669816599247, "learning_rate": 1.7973191643236574e-07, "loss": 0.5014, "step": 1763 }, { "epoch": 0.8124118476727785, "grad_norm": 2.655456203502326, "learning_rate": 1.7887918379929356e-07, "loss": 0.4284, "step": 1764 }, { "epoch": 0.8128723986068335, "grad_norm": 3.119374485962208, "learning_rate": 1.780282801191425e-07, "loss": 0.5117, "step": 1765 }, { "epoch": 0.8133329495408883, "grad_norm": 3.603722648362608, "learning_rate": 1.771792072872028e-07, "loss": 0.5251, "step": 1766 }, { "epoch": 0.8137935004749431, "grad_norm": 3.636072681634815, "learning_rate": 1.7633196719468846e-07, "loss": 0.5528, "step": 1767 }, { "epoch": 0.814254051408998, "grad_norm": 2.9215076629877563, "learning_rate": 1.7548656172872922e-07, "loss": 0.6344, "step": 1768 }, { "epoch": 0.8147146023430529, "grad_norm": 3.093587532258315, "learning_rate": 1.746429927723696e-07, "loss": 0.5363, "step": 1769 }, { "epoch": 0.8151751532771078, "grad_norm": 3.202770682033625, "learning_rate": 1.738012622045635e-07, "loss": 0.4939, "step": 1770 }, { "epoch": 0.8156357042111626, "grad_norm": 3.0790051489322945, "learning_rate": 1.7296137190016913e-07, "loss": 0.5565, "step": 1771 }, { "epoch": 0.8160962551452174, "grad_norm": 3.522068919897294, "learning_rate": 1.7212332372994654e-07, "loss": 0.6042, "step": 1772 }, { "epoch": 0.8165568060792723, "grad_norm": 2.947521873551182, "learning_rate": 1.7128711956055274e-07, "loss": 0.5351, "step": 1773 }, { "epoch": 0.8170173570133272, "grad_norm": 3.197903959240655, "learning_rate": 1.7045276125453645e-07, "loss": 0.5098, "step": 1774 }, { "epoch": 0.8174779079473821, "grad_norm": 3.3416585809701833, "learning_rate": 1.6962025067033604e-07, "loss": 0.5499, "step": 1775 }, { "epoch": 0.8179384588814369, "grad_norm": 2.6718382039429756, "learning_rate": 1.6878958966227363e-07, "loss": 0.4988, "step": 1776 }, { "epoch": 0.8183990098154917, "grad_norm": 2.79487789143777, "learning_rate": 1.6796078008055225e-07, "loss": 0.4255, "step": 1777 }, { "epoch": 0.8188595607495467, "grad_norm": 2.693027511901247, "learning_rate": 1.671338237712502e-07, "loss": 0.5501, "step": 1778 }, { "epoch": 0.8193201116836015, "grad_norm": 2.834140007806073, "learning_rate": 1.6630872257631834e-07, "loss": 0.3992, "step": 1779 }, { "epoch": 0.8197806626176564, "grad_norm": 2.9518201333896883, "learning_rate": 1.6548547833357573e-07, "loss": 0.4642, "step": 1780 }, { "epoch": 0.8202412135517112, "grad_norm": 3.1159470952195636, "learning_rate": 1.6466409287670468e-07, "loss": 0.5313, "step": 1781 }, { "epoch": 0.8207017644857661, "grad_norm": 3.257480140561922, "learning_rate": 1.638445680352476e-07, "loss": 0.5634, "step": 1782 }, { "epoch": 0.821162315419821, "grad_norm": 3.423203304235, "learning_rate": 1.6302690563460288e-07, "loss": 0.5939, "step": 1783 }, { "epoch": 0.8216228663538758, "grad_norm": 2.954374172187009, "learning_rate": 1.6221110749601973e-07, "loss": 0.3948, "step": 1784 }, { "epoch": 0.8220834172879307, "grad_norm": 2.9485459890560994, "learning_rate": 1.613971754365957e-07, "loss": 0.483, "step": 1785 }, { "epoch": 0.8225439682219855, "grad_norm": 3.052068636325435, "learning_rate": 1.6058511126927176e-07, "loss": 0.6306, "step": 1786 }, { "epoch": 0.8230045191560404, "grad_norm": 3.276425368352071, "learning_rate": 1.5977491680282762e-07, "loss": 0.5366, "step": 1787 }, { "epoch": 0.8234650700900953, "grad_norm": 3.322933684622075, "learning_rate": 1.589665938418795e-07, "loss": 0.5081, "step": 1788 }, { "epoch": 0.8239256210241501, "grad_norm": 3.119742625969975, "learning_rate": 1.581601441868743e-07, "loss": 0.5368, "step": 1789 }, { "epoch": 0.824386171958205, "grad_norm": 2.9653697872519036, "learning_rate": 1.5735556963408693e-07, "loss": 0.6565, "step": 1790 }, { "epoch": 0.8248467228922599, "grad_norm": 3.373344219441705, "learning_rate": 1.5655287197561495e-07, "loss": 0.6218, "step": 1791 }, { "epoch": 0.8253072738263147, "grad_norm": 3.1421457828079795, "learning_rate": 1.5575205299937599e-07, "loss": 0.5257, "step": 1792 }, { "epoch": 0.8257678247603696, "grad_norm": 2.9754341538820426, "learning_rate": 1.549531144891032e-07, "loss": 0.5216, "step": 1793 }, { "epoch": 0.8262283756944244, "grad_norm": 3.744224931739949, "learning_rate": 1.5415605822434053e-07, "loss": 0.5341, "step": 1794 }, { "epoch": 0.8266889266284794, "grad_norm": 3.312132421784843, "learning_rate": 1.5336088598043995e-07, "loss": 0.4812, "step": 1795 }, { "epoch": 0.8271494775625342, "grad_norm": 3.1823045221187805, "learning_rate": 1.5256759952855737e-07, "loss": 0.6088, "step": 1796 }, { "epoch": 0.827610028496589, "grad_norm": 3.144738416475895, "learning_rate": 1.5177620063564712e-07, "loss": 0.626, "step": 1797 }, { "epoch": 0.8280705794306439, "grad_norm": 3.09856360625291, "learning_rate": 1.5098669106446026e-07, "loss": 0.5281, "step": 1798 }, { "epoch": 0.8285311303646987, "grad_norm": 2.8488512325058206, "learning_rate": 1.5019907257353925e-07, "loss": 0.4859, "step": 1799 }, { "epoch": 0.8289916812987537, "grad_norm": 3.1008236444549166, "learning_rate": 1.4941334691721474e-07, "loss": 0.5706, "step": 1800 }, { "epoch": 0.8294522322328085, "grad_norm": 3.5681942314685804, "learning_rate": 1.4862951584560034e-07, "loss": 0.5725, "step": 1801 }, { "epoch": 0.8299127831668633, "grad_norm": 3.479007930611103, "learning_rate": 1.4784758110459073e-07, "loss": 0.455, "step": 1802 }, { "epoch": 0.8303733341009182, "grad_norm": 3.6942391140034454, "learning_rate": 1.4706754443585644e-07, "loss": 0.5998, "step": 1803 }, { "epoch": 0.8308338850349731, "grad_norm": 3.0399020070035414, "learning_rate": 1.4628940757683972e-07, "loss": 0.531, "step": 1804 }, { "epoch": 0.831294435969028, "grad_norm": 3.3824973071742668, "learning_rate": 1.4551317226075176e-07, "loss": 0.632, "step": 1805 }, { "epoch": 0.8317549869030828, "grad_norm": 3.0111947391524474, "learning_rate": 1.4473884021656858e-07, "loss": 0.4523, "step": 1806 }, { "epoch": 0.8322155378371376, "grad_norm": 3.245242080771524, "learning_rate": 1.4396641316902558e-07, "loss": 0.5705, "step": 1807 }, { "epoch": 0.8326760887711926, "grad_norm": 3.101099899015713, "learning_rate": 1.431958928386169e-07, "loss": 0.5403, "step": 1808 }, { "epoch": 0.8331366397052474, "grad_norm": 3.4188190961039213, "learning_rate": 1.4242728094158807e-07, "loss": 0.5086, "step": 1809 }, { "epoch": 0.8335971906393023, "grad_norm": 3.179675044847406, "learning_rate": 1.41660579189934e-07, "loss": 0.4884, "step": 1810 }, { "epoch": 0.8340577415733571, "grad_norm": 3.225968066840041, "learning_rate": 1.4089578929139635e-07, "loss": 0.558, "step": 1811 }, { "epoch": 0.8345182925074119, "grad_norm": 2.9209325285695433, "learning_rate": 1.4013291294945652e-07, "loss": 0.5264, "step": 1812 }, { "epoch": 0.8349788434414669, "grad_norm": 3.1770764013810244, "learning_rate": 1.3937195186333483e-07, "loss": 0.4391, "step": 1813 }, { "epoch": 0.8354393943755217, "grad_norm": 3.5252276429059015, "learning_rate": 1.3861290772798552e-07, "loss": 0.6889, "step": 1814 }, { "epoch": 0.8358999453095766, "grad_norm": 3.334270364364526, "learning_rate": 1.378557822340922e-07, "loss": 0.5445, "step": 1815 }, { "epoch": 0.8363604962436314, "grad_norm": 2.8274203446918955, "learning_rate": 1.3710057706806588e-07, "loss": 0.4528, "step": 1816 }, { "epoch": 0.8368210471776864, "grad_norm": 2.970687258829064, "learning_rate": 1.3634729391204003e-07, "loss": 0.4576, "step": 1817 }, { "epoch": 0.8372815981117412, "grad_norm": 3.0526123989623404, "learning_rate": 1.355959344438665e-07, "loss": 0.4776, "step": 1818 }, { "epoch": 0.837742149045796, "grad_norm": 3.108639242698878, "learning_rate": 1.3484650033711308e-07, "loss": 0.505, "step": 1819 }, { "epoch": 0.8382026999798509, "grad_norm": 3.568732239359027, "learning_rate": 1.3409899326105856e-07, "loss": 0.6024, "step": 1820 }, { "epoch": 0.8386632509139058, "grad_norm": 3.6924487796854875, "learning_rate": 1.3335341488068997e-07, "loss": 0.5641, "step": 1821 }, { "epoch": 0.8391238018479606, "grad_norm": 2.7392248909998833, "learning_rate": 1.3260976685669767e-07, "loss": 0.4318, "step": 1822 }, { "epoch": 0.8395843527820155, "grad_norm": 3.0029706301333032, "learning_rate": 1.3186805084547292e-07, "loss": 0.5296, "step": 1823 }, { "epoch": 0.8400449037160703, "grad_norm": 2.837543312679349, "learning_rate": 1.3112826849910374e-07, "loss": 0.6362, "step": 1824 }, { "epoch": 0.8405054546501252, "grad_norm": 3.625903809155881, "learning_rate": 1.303904214653705e-07, "loss": 0.637, "step": 1825 }, { "epoch": 0.8409660055841801, "grad_norm": 3.0363996469171393, "learning_rate": 1.2965451138774342e-07, "loss": 0.4819, "step": 1826 }, { "epoch": 0.841426556518235, "grad_norm": 2.98767711754336, "learning_rate": 1.2892053990537855e-07, "loss": 0.416, "step": 1827 }, { "epoch": 0.8418871074522898, "grad_norm": 3.368814353904181, "learning_rate": 1.2818850865311304e-07, "loss": 0.4946, "step": 1828 }, { "epoch": 0.8423476583863446, "grad_norm": 2.866982470674382, "learning_rate": 1.2745841926146328e-07, "loss": 0.6318, "step": 1829 }, { "epoch": 0.8428082093203996, "grad_norm": 3.1759522349648397, "learning_rate": 1.2673027335662023e-07, "loss": 0.5292, "step": 1830 }, { "epoch": 0.8432687602544544, "grad_norm": 3.6119453846451184, "learning_rate": 1.2600407256044543e-07, "loss": 0.5958, "step": 1831 }, { "epoch": 0.8437293111885092, "grad_norm": 3.0612740647541714, "learning_rate": 1.2527981849046855e-07, "loss": 0.53, "step": 1832 }, { "epoch": 0.8441898621225641, "grad_norm": 3.0373834058484523, "learning_rate": 1.245575127598828e-07, "loss": 0.4711, "step": 1833 }, { "epoch": 0.844650413056619, "grad_norm": 3.368821133890806, "learning_rate": 1.2383715697754194e-07, "loss": 0.5848, "step": 1834 }, { "epoch": 0.8451109639906739, "grad_norm": 2.6277881136703476, "learning_rate": 1.23118752747956e-07, "loss": 0.4662, "step": 1835 }, { "epoch": 0.8455715149247287, "grad_norm": 3.3152498659397733, "learning_rate": 1.224023016712886e-07, "loss": 0.5397, "step": 1836 }, { "epoch": 0.8460320658587835, "grad_norm": 2.820968906592374, "learning_rate": 1.2168780534335288e-07, "loss": 0.5026, "step": 1837 }, { "epoch": 0.8464926167928384, "grad_norm": 3.677672748447081, "learning_rate": 1.2097526535560732e-07, "loss": 0.6399, "step": 1838 }, { "epoch": 0.8469531677268933, "grad_norm": 3.009389680786758, "learning_rate": 1.2026468329515415e-07, "loss": 0.4806, "step": 1839 }, { "epoch": 0.8474137186609482, "grad_norm": 3.3016771762707444, "learning_rate": 1.1955606074473368e-07, "loss": 0.5922, "step": 1840 }, { "epoch": 0.847874269595003, "grad_norm": 3.161579889786646, "learning_rate": 1.1884939928272108e-07, "loss": 0.587, "step": 1841 }, { "epoch": 0.8483348205290578, "grad_norm": 3.490485543641311, "learning_rate": 1.1814470048312508e-07, "loss": 0.4875, "step": 1842 }, { "epoch": 0.8487953714631128, "grad_norm": 3.106076702165179, "learning_rate": 1.1744196591558153e-07, "loss": 0.4083, "step": 1843 }, { "epoch": 0.8492559223971676, "grad_norm": 3.138124379295683, "learning_rate": 1.167411971453509e-07, "loss": 0.5066, "step": 1844 }, { "epoch": 0.8497164733312225, "grad_norm": 2.9058322118896642, "learning_rate": 1.1604239573331653e-07, "loss": 0.4712, "step": 1845 }, { "epoch": 0.8501770242652773, "grad_norm": 3.2851181268981398, "learning_rate": 1.1534556323597821e-07, "loss": 0.5974, "step": 1846 }, { "epoch": 0.8506375751993323, "grad_norm": 3.000290790230382, "learning_rate": 1.1465070120545106e-07, "loss": 0.4775, "step": 1847 }, { "epoch": 0.8510981261333871, "grad_norm": 3.3447028114933377, "learning_rate": 1.1395781118946124e-07, "loss": 0.59, "step": 1848 }, { "epoch": 0.8515586770674419, "grad_norm": 2.6080519147762393, "learning_rate": 1.1326689473134166e-07, "loss": 0.4667, "step": 1849 }, { "epoch": 0.8520192280014968, "grad_norm": 2.9008718617685556, "learning_rate": 1.1257795337003007e-07, "loss": 0.5282, "step": 1850 }, { "epoch": 0.8524797789355516, "grad_norm": 3.169497217996663, "learning_rate": 1.1189098864006486e-07, "loss": 0.5051, "step": 1851 }, { "epoch": 0.8529403298696066, "grad_norm": 3.215162842301655, "learning_rate": 1.112060020715817e-07, "loss": 0.6173, "step": 1852 }, { "epoch": 0.8534008808036614, "grad_norm": 3.42236331926524, "learning_rate": 1.1052299519030961e-07, "loss": 0.48, "step": 1853 }, { "epoch": 0.8538614317377162, "grad_norm": 3.045553200483245, "learning_rate": 1.0984196951756863e-07, "loss": 0.5809, "step": 1854 }, { "epoch": 0.8543219826717711, "grad_norm": 3.0183531859561543, "learning_rate": 1.0916292657026616e-07, "loss": 0.5366, "step": 1855 }, { "epoch": 0.854782533605826, "grad_norm": 3.233752941923723, "learning_rate": 1.084858678608922e-07, "loss": 0.5023, "step": 1856 }, { "epoch": 0.8552430845398808, "grad_norm": 3.7031466518660467, "learning_rate": 1.078107948975181e-07, "loss": 0.5871, "step": 1857 }, { "epoch": 0.8557036354739357, "grad_norm": 3.231664196992226, "learning_rate": 1.0713770918379206e-07, "loss": 0.5641, "step": 1858 }, { "epoch": 0.8561641864079905, "grad_norm": 2.8655131047972344, "learning_rate": 1.0646661221893538e-07, "loss": 0.5098, "step": 1859 }, { "epoch": 0.8566247373420455, "grad_norm": 3.01957958338486, "learning_rate": 1.0579750549773992e-07, "loss": 0.5884, "step": 1860 }, { "epoch": 0.8570852882761003, "grad_norm": 2.8146657597507017, "learning_rate": 1.0513039051056504e-07, "loss": 0.5192, "step": 1861 }, { "epoch": 0.8575458392101551, "grad_norm": 3.579182844425862, "learning_rate": 1.0446526874333262e-07, "loss": 0.4323, "step": 1862 }, { "epoch": 0.85800639014421, "grad_norm": 3.1443912809179, "learning_rate": 1.0380214167752588e-07, "loss": 0.5651, "step": 1863 }, { "epoch": 0.8584669410782648, "grad_norm": 3.1366442156691607, "learning_rate": 1.0314101079018456e-07, "loss": 0.597, "step": 1864 }, { "epoch": 0.8589274920123198, "grad_norm": 3.0110739023780178, "learning_rate": 1.0248187755390247e-07, "loss": 0.4957, "step": 1865 }, { "epoch": 0.8593880429463746, "grad_norm": 3.1211140110824656, "learning_rate": 1.0182474343682346e-07, "loss": 0.5807, "step": 1866 }, { "epoch": 0.8598485938804294, "grad_norm": 3.196850276682387, "learning_rate": 1.0116960990263879e-07, "loss": 0.432, "step": 1867 }, { "epoch": 0.8603091448144843, "grad_norm": 3.0649211283399143, "learning_rate": 1.0051647841058385e-07, "loss": 0.4683, "step": 1868 }, { "epoch": 0.8607696957485392, "grad_norm": 3.0704976261787276, "learning_rate": 9.986535041543409e-08, "loss": 0.5084, "step": 1869 }, { "epoch": 0.8612302466825941, "grad_norm": 2.8430066136777223, "learning_rate": 9.921622736750345e-08, "loss": 0.4893, "step": 1870 }, { "epoch": 0.8616907976166489, "grad_norm": 3.1871422194117915, "learning_rate": 9.856911071263918e-08, "loss": 0.5582, "step": 1871 }, { "epoch": 0.8621513485507037, "grad_norm": 3.1695517066788335, "learning_rate": 9.792400189221927e-08, "loss": 0.5491, "step": 1872 }, { "epoch": 0.8626118994847587, "grad_norm": 3.192178213736622, "learning_rate": 9.72809023431509e-08, "loss": 0.5684, "step": 1873 }, { "epoch": 0.8630724504188135, "grad_norm": 3.5963603776479833, "learning_rate": 9.663981349786443e-08, "loss": 0.5006, "step": 1874 }, { "epoch": 0.8635330013528684, "grad_norm": 2.9762857754260996, "learning_rate": 9.600073678431186e-08, "loss": 0.5183, "step": 1875 }, { "epoch": 0.8639935522869232, "grad_norm": 3.512211825383684, "learning_rate": 9.53636736259642e-08, "loss": 0.5459, "step": 1876 }, { "epoch": 0.864454103220978, "grad_norm": 2.959882910299774, "learning_rate": 9.472862544180659e-08, "loss": 0.538, "step": 1877 }, { "epoch": 0.864914654155033, "grad_norm": 3.201929324826627, "learning_rate": 9.409559364633646e-08, "loss": 0.4949, "step": 1878 }, { "epoch": 0.8653752050890878, "grad_norm": 3.2818565007848703, "learning_rate": 9.346457964956023e-08, "loss": 0.5904, "step": 1879 }, { "epoch": 0.8658357560231427, "grad_norm": 3.1114338599754374, "learning_rate": 9.283558485698894e-08, "loss": 0.4755, "step": 1880 }, { "epoch": 0.8662963069571975, "grad_norm": 2.923497203537618, "learning_rate": 9.220861066963715e-08, "loss": 0.4812, "step": 1881 }, { "epoch": 0.8667568578912525, "grad_norm": 3.199024742719346, "learning_rate": 9.158365848401817e-08, "loss": 0.546, "step": 1882 }, { "epoch": 0.8672174088253073, "grad_norm": 2.9843247776927257, "learning_rate": 9.096072969214197e-08, "loss": 0.543, "step": 1883 }, { "epoch": 0.8676779597593621, "grad_norm": 3.0691692564049653, "learning_rate": 9.0339825681511e-08, "loss": 0.5088, "step": 1884 }, { "epoch": 0.868138510693417, "grad_norm": 3.383333358394925, "learning_rate": 8.972094783511807e-08, "loss": 0.6416, "step": 1885 }, { "epoch": 0.8685990616274719, "grad_norm": 3.0949849503575773, "learning_rate": 8.910409753144344e-08, "loss": 0.4999, "step": 1886 }, { "epoch": 0.8690596125615268, "grad_norm": 3.273519298979524, "learning_rate": 8.848927614445011e-08, "loss": 0.5933, "step": 1887 }, { "epoch": 0.8695201634955816, "grad_norm": 3.0038213358417183, "learning_rate": 8.787648504358291e-08, "loss": 0.4551, "step": 1888 }, { "epoch": 0.8699807144296364, "grad_norm": 3.1769567170597615, "learning_rate": 8.726572559376433e-08, "loss": 0.6628, "step": 1889 }, { "epoch": 0.8704412653636913, "grad_norm": 3.3453327234543067, "learning_rate": 8.665699915539094e-08, "loss": 0.656, "step": 1890 }, { "epoch": 0.8709018162977462, "grad_norm": 3.0704691251867513, "learning_rate": 8.605030708433147e-08, "loss": 0.5198, "step": 1891 }, { "epoch": 0.871362367231801, "grad_norm": 3.019643595841549, "learning_rate": 8.544565073192367e-08, "loss": 0.4624, "step": 1892 }, { "epoch": 0.8718229181658559, "grad_norm": 3.08664418112486, "learning_rate": 8.484303144497007e-08, "loss": 0.49, "step": 1893 }, { "epoch": 0.8722834690999107, "grad_norm": 3.262560025269264, "learning_rate": 8.424245056573653e-08, "loss": 0.5118, "step": 1894 }, { "epoch": 0.8727440200339657, "grad_norm": 3.0452114320652086, "learning_rate": 8.364390943194855e-08, "loss": 0.5284, "step": 1895 }, { "epoch": 0.8732045709680205, "grad_norm": 3.2005415403238366, "learning_rate": 8.304740937678833e-08, "loss": 0.6092, "step": 1896 }, { "epoch": 0.8736651219020753, "grad_norm": 3.1426226729757794, "learning_rate": 8.245295172889121e-08, "loss": 0.4615, "step": 1897 }, { "epoch": 0.8741256728361302, "grad_norm": 2.9393983445977403, "learning_rate": 8.186053781234414e-08, "loss": 0.4892, "step": 1898 }, { "epoch": 0.8745862237701851, "grad_norm": 2.745952741647922, "learning_rate": 8.12701689466816e-08, "loss": 0.5493, "step": 1899 }, { "epoch": 0.87504677470424, "grad_norm": 2.948466311290946, "learning_rate": 8.068184644688248e-08, "loss": 0.474, "step": 1900 }, { "epoch": 0.8755073256382948, "grad_norm": 3.172245629853397, "learning_rate": 8.009557162336822e-08, "loss": 0.5008, "step": 1901 }, { "epoch": 0.8759678765723496, "grad_norm": 3.2352168665011853, "learning_rate": 7.951134578199925e-08, "loss": 0.4891, "step": 1902 }, { "epoch": 0.8764284275064045, "grad_norm": 3.2067263063797866, "learning_rate": 7.892917022407153e-08, "loss": 0.5522, "step": 1903 }, { "epoch": 0.8768889784404594, "grad_norm": 3.389480208236002, "learning_rate": 7.834904624631523e-08, "loss": 0.6624, "step": 1904 }, { "epoch": 0.8773495293745143, "grad_norm": 3.2052256238249903, "learning_rate": 7.777097514089014e-08, "loss": 0.6098, "step": 1905 }, { "epoch": 0.8778100803085691, "grad_norm": 3.2221846053421337, "learning_rate": 7.719495819538324e-08, "loss": 0.5657, "step": 1906 }, { "epoch": 0.878270631242624, "grad_norm": 3.018523302700844, "learning_rate": 7.66209966928072e-08, "loss": 0.4995, "step": 1907 }, { "epoch": 0.8787311821766789, "grad_norm": 3.5183165482279417, "learning_rate": 7.604909191159537e-08, "loss": 0.5429, "step": 1908 }, { "epoch": 0.8791917331107337, "grad_norm": 3.1284052208629416, "learning_rate": 7.547924512560044e-08, "loss": 0.6085, "step": 1909 }, { "epoch": 0.8796522840447886, "grad_norm": 2.985628461533933, "learning_rate": 7.491145760409134e-08, "loss": 0.6259, "step": 1910 }, { "epoch": 0.8801128349788434, "grad_norm": 3.340901063995122, "learning_rate": 7.434573061174965e-08, "loss": 0.6032, "step": 1911 }, { "epoch": 0.8805733859128984, "grad_norm": 3.0741105448726027, "learning_rate": 7.378206540866783e-08, "loss": 0.5554, "step": 1912 }, { "epoch": 0.8810339368469532, "grad_norm": 2.9332744072817922, "learning_rate": 7.322046325034603e-08, "loss": 0.4567, "step": 1913 }, { "epoch": 0.881494487781008, "grad_norm": 2.8092291752801857, "learning_rate": 7.266092538768853e-08, "loss": 0.5092, "step": 1914 }, { "epoch": 0.8819550387150629, "grad_norm": 3.438214393413268, "learning_rate": 7.210345306700238e-08, "loss": 0.563, "step": 1915 }, { "epoch": 0.8824155896491177, "grad_norm": 3.2745821627508565, "learning_rate": 7.154804752999344e-08, "loss": 0.486, "step": 1916 }, { "epoch": 0.8828761405831727, "grad_norm": 2.892400504571004, "learning_rate": 7.099471001376434e-08, "loss": 0.5935, "step": 1917 }, { "epoch": 0.8833366915172275, "grad_norm": 3.2933601567721387, "learning_rate": 7.044344175081107e-08, "loss": 0.6022, "step": 1918 }, { "epoch": 0.8837972424512823, "grad_norm": 3.4349342024607052, "learning_rate": 6.989424396902078e-08, "loss": 0.6374, "step": 1919 }, { "epoch": 0.8842577933853372, "grad_norm": 3.1507821922545203, "learning_rate": 6.934711789166902e-08, "loss": 0.5632, "step": 1920 }, { "epoch": 0.8847183443193921, "grad_norm": 3.089402889899754, "learning_rate": 6.880206473741646e-08, "loss": 0.5506, "step": 1921 }, { "epoch": 0.885178895253447, "grad_norm": 3.181790361740718, "learning_rate": 6.825908572030703e-08, "loss": 0.6158, "step": 1922 }, { "epoch": 0.8856394461875018, "grad_norm": 3.0742747552763077, "learning_rate": 6.771818204976453e-08, "loss": 0.5871, "step": 1923 }, { "epoch": 0.8860999971215566, "grad_norm": 3.010007045848609, "learning_rate": 6.71793549305899e-08, "loss": 0.5538, "step": 1924 }, { "epoch": 0.8865605480556116, "grad_norm": 3.3014591830340225, "learning_rate": 6.66426055629593e-08, "loss": 0.5739, "step": 1925 }, { "epoch": 0.8870210989896664, "grad_norm": 2.8280186580117337, "learning_rate": 6.610793514242074e-08, "loss": 0.5761, "step": 1926 }, { "epoch": 0.8874816499237212, "grad_norm": 2.8955154098471696, "learning_rate": 6.557534485989135e-08, "loss": 0.4836, "step": 1927 }, { "epoch": 0.8879422008577761, "grad_norm": 3.2863599262582546, "learning_rate": 6.504483590165533e-08, "loss": 0.6659, "step": 1928 }, { "epoch": 0.8884027517918309, "grad_norm": 2.7810076021075325, "learning_rate": 6.451640944936087e-08, "loss": 0.5125, "step": 1929 }, { "epoch": 0.8888633027258859, "grad_norm": 2.829017960020371, "learning_rate": 6.399006668001772e-08, "loss": 0.4712, "step": 1930 }, { "epoch": 0.8893238536599407, "grad_norm": 3.1200160087326485, "learning_rate": 6.346580876599394e-08, "loss": 0.5241, "step": 1931 }, { "epoch": 0.8897844045939955, "grad_norm": 3.284501857717466, "learning_rate": 6.294363687501459e-08, "loss": 0.4956, "step": 1932 }, { "epoch": 0.8902449555280504, "grad_norm": 3.0368507746622497, "learning_rate": 6.242355217015793e-08, "loss": 0.5335, "step": 1933 }, { "epoch": 0.8907055064621053, "grad_norm": 2.707473642304596, "learning_rate": 6.190555580985291e-08, "loss": 0.4174, "step": 1934 }, { "epoch": 0.8911660573961602, "grad_norm": 3.31398125775273, "learning_rate": 6.138964894787802e-08, "loss": 0.5595, "step": 1935 }, { "epoch": 0.891626608330215, "grad_norm": 3.113081540988843, "learning_rate": 6.08758327333564e-08, "loss": 0.5791, "step": 1936 }, { "epoch": 0.8920871592642698, "grad_norm": 2.8145565837317608, "learning_rate": 6.036410831075489e-08, "loss": 0.5162, "step": 1937 }, { "epoch": 0.8925477101983248, "grad_norm": 3.196959966502691, "learning_rate": 5.985447681988187e-08, "loss": 0.656, "step": 1938 }, { "epoch": 0.8930082611323796, "grad_norm": 3.2061347516327476, "learning_rate": 5.934693939588276e-08, "loss": 0.5659, "step": 1939 }, { "epoch": 0.8934688120664345, "grad_norm": 2.7407334976039937, "learning_rate": 5.884149716923947e-08, "loss": 0.5263, "step": 1940 }, { "epoch": 0.8939293630004893, "grad_norm": 3.0639659095757925, "learning_rate": 5.833815126576713e-08, "loss": 0.4792, "step": 1941 }, { "epoch": 0.8943899139345441, "grad_norm": 3.705402334304555, "learning_rate": 5.78369028066108e-08, "loss": 0.5598, "step": 1942 }, { "epoch": 0.8948504648685991, "grad_norm": 3.314179143806477, "learning_rate": 5.7337752908244604e-08, "loss": 0.6057, "step": 1943 }, { "epoch": 0.8953110158026539, "grad_norm": 3.093901085439443, "learning_rate": 5.684070268246799e-08, "loss": 0.5189, "step": 1944 }, { "epoch": 0.8957715667367088, "grad_norm": 2.9306316685724814, "learning_rate": 5.634575323640334e-08, "loss": 0.518, "step": 1945 }, { "epoch": 0.8962321176707636, "grad_norm": 2.6131420700596744, "learning_rate": 5.5852905672494235e-08, "loss": 0.4514, "step": 1946 }, { "epoch": 0.8966926686048186, "grad_norm": 3.4838526888557224, "learning_rate": 5.5362161088502335e-08, "loss": 0.6153, "step": 1947 }, { "epoch": 0.8971532195388734, "grad_norm": 2.7410462665447732, "learning_rate": 5.487352057750538e-08, "loss": 0.5487, "step": 1948 }, { "epoch": 0.8976137704729282, "grad_norm": 3.323249823556552, "learning_rate": 5.438698522789409e-08, "loss": 0.4776, "step": 1949 }, { "epoch": 0.8980743214069831, "grad_norm": 3.3914954871956207, "learning_rate": 5.390255612337058e-08, "loss": 0.5644, "step": 1950 }, { "epoch": 0.898534872341038, "grad_norm": 3.453540431554986, "learning_rate": 5.3420234342945515e-08, "loss": 0.5669, "step": 1951 }, { "epoch": 0.8989954232750929, "grad_norm": 3.2848747343127696, "learning_rate": 5.2940020960935416e-08, "loss": 0.6555, "step": 1952 }, { "epoch": 0.8994559742091477, "grad_norm": 3.335247069769459, "learning_rate": 5.246191704696079e-08, "loss": 0.5593, "step": 1953 }, { "epoch": 0.8999165251432025, "grad_norm": 2.373713227217355, "learning_rate": 5.1985923665943787e-08, "loss": 0.3408, "step": 1954 }, { "epoch": 0.9003770760772574, "grad_norm": 3.2307700903116334, "learning_rate": 5.1512041878105095e-08, "loss": 0.5363, "step": 1955 }, { "epoch": 0.9008376270113123, "grad_norm": 3.2016297093263817, "learning_rate": 5.104027273896239e-08, "loss": 0.5384, "step": 1956 }, { "epoch": 0.9012981779453672, "grad_norm": 3.20013985926728, "learning_rate": 5.057061729932777e-08, "loss": 0.4193, "step": 1957 }, { "epoch": 0.901758728879422, "grad_norm": 2.7276925258485436, "learning_rate": 5.0103076605304885e-08, "loss": 0.5143, "step": 1958 }, { "epoch": 0.9022192798134768, "grad_norm": 2.9954612745734903, "learning_rate": 4.963765169828737e-08, "loss": 0.4886, "step": 1959 }, { "epoch": 0.9026798307475318, "grad_norm": 3.1663609715145102, "learning_rate": 4.917434361495609e-08, "loss": 0.529, "step": 1960 }, { "epoch": 0.9031403816815866, "grad_norm": 3.1213827237745333, "learning_rate": 4.871315338727711e-08, "loss": 0.5155, "step": 1961 }, { "epoch": 0.9036009326156415, "grad_norm": 2.7436550688979837, "learning_rate": 4.825408204249881e-08, "loss": 0.4606, "step": 1962 }, { "epoch": 0.9040614835496963, "grad_norm": 3.249120899149753, "learning_rate": 4.779713060315016e-08, "loss": 0.5054, "step": 1963 }, { "epoch": 0.9045220344837512, "grad_norm": 2.90283494064835, "learning_rate": 4.734230008703877e-08, "loss": 0.5579, "step": 1964 }, { "epoch": 0.9049825854178061, "grad_norm": 3.4042002585801865, "learning_rate": 4.688959150724703e-08, "loss": 0.5775, "step": 1965 }, { "epoch": 0.9054431363518609, "grad_norm": 3.1250557810610182, "learning_rate": 4.6439005872132454e-08, "loss": 0.6137, "step": 1966 }, { "epoch": 0.9059036872859157, "grad_norm": 3.1195028338428226, "learning_rate": 4.599054418532267e-08, "loss": 0.561, "step": 1967 }, { "epoch": 0.9063642382199706, "grad_norm": 2.8851519193897643, "learning_rate": 4.554420744571463e-08, "loss": 0.5235, "step": 1968 }, { "epoch": 0.9068247891540255, "grad_norm": 3.6481537119175997, "learning_rate": 4.5099996647473215e-08, "loss": 0.4759, "step": 1969 }, { "epoch": 0.9072853400880804, "grad_norm": 2.790296368178869, "learning_rate": 4.465791278002684e-08, "loss": 0.4618, "step": 1970 }, { "epoch": 0.9077458910221352, "grad_norm": 3.556677402896051, "learning_rate": 4.4217956828066614e-08, "loss": 0.5706, "step": 1971 }, { "epoch": 0.90820644195619, "grad_norm": 2.978589496267026, "learning_rate": 4.3780129771544885e-08, "loss": 0.538, "step": 1972 }, { "epoch": 0.908666992890245, "grad_norm": 3.6725000383644386, "learning_rate": 4.3344432585670886e-08, "loss": 0.6891, "step": 1973 }, { "epoch": 0.9091275438242998, "grad_norm": 3.2917964842156717, "learning_rate": 4.291086624091067e-08, "loss": 0.5737, "step": 1974 }, { "epoch": 0.9095880947583547, "grad_norm": 3.0743766947515088, "learning_rate": 4.2479431702983845e-08, "loss": 0.5731, "step": 1975 }, { "epoch": 0.9100486456924095, "grad_norm": 3.0506298198871993, "learning_rate": 4.205012993286139e-08, "loss": 0.4587, "step": 1976 }, { "epoch": 0.9105091966264645, "grad_norm": 3.0219223404896907, "learning_rate": 4.162296188676417e-08, "loss": 0.527, "step": 1977 }, { "epoch": 0.9109697475605193, "grad_norm": 2.79095841731694, "learning_rate": 4.119792851616022e-08, "loss": 0.4888, "step": 1978 }, { "epoch": 0.9114302984945741, "grad_norm": 3.4172455118807283, "learning_rate": 4.0775030767762895e-08, "loss": 0.5758, "step": 1979 }, { "epoch": 0.911890849428629, "grad_norm": 3.239863460054807, "learning_rate": 4.035426958352861e-08, "loss": 0.5547, "step": 1980 }, { "epoch": 0.9123514003626838, "grad_norm": 2.8654349056505017, "learning_rate": 3.99356459006549e-08, "loss": 0.438, "step": 1981 }, { "epoch": 0.9128119512967388, "grad_norm": 2.9491143905443065, "learning_rate": 3.9519160651578456e-08, "loss": 0.5215, "step": 1982 }, { "epoch": 0.9132725022307936, "grad_norm": 3.590224655674636, "learning_rate": 3.910481476397231e-08, "loss": 0.5205, "step": 1983 }, { "epoch": 0.9137330531648484, "grad_norm": 3.0897242283731816, "learning_rate": 3.8692609160744796e-08, "loss": 0.4683, "step": 1984 }, { "epoch": 0.9141936040989033, "grad_norm": 2.6174259753352773, "learning_rate": 3.8282544760037005e-08, "loss": 0.5923, "step": 1985 }, { "epoch": 0.9146541550329582, "grad_norm": 3.281785734259912, "learning_rate": 3.787462247522033e-08, "loss": 0.5454, "step": 1986 }, { "epoch": 0.915114705967013, "grad_norm": 3.0957714777119003, "learning_rate": 3.74688432148953e-08, "loss": 0.4839, "step": 1987 }, { "epoch": 0.9155752569010679, "grad_norm": 3.2146157875749735, "learning_rate": 3.7065207882888915e-08, "loss": 0.5671, "step": 1988 }, { "epoch": 0.9160358078351227, "grad_norm": 3.2632112706777097, "learning_rate": 3.666371737825269e-08, "loss": 0.5403, "step": 1989 }, { "epoch": 0.9164963587691777, "grad_norm": 2.588003078548097, "learning_rate": 3.626437259526094e-08, "loss": 0.4528, "step": 1990 }, { "epoch": 0.9169569097032325, "grad_norm": 3.5583915478306025, "learning_rate": 3.58671744234087e-08, "loss": 0.5301, "step": 1991 }, { "epoch": 0.9174174606372874, "grad_norm": 2.97658613622478, "learning_rate": 3.54721237474096e-08, "loss": 0.5364, "step": 1992 }, { "epoch": 0.9178780115713422, "grad_norm": 3.005308580137038, "learning_rate": 3.5079221447193665e-08, "loss": 0.5161, "step": 1993 }, { "epoch": 0.918338562505397, "grad_norm": 3.4338709756555335, "learning_rate": 3.468846839790629e-08, "loss": 0.5028, "step": 1994 }, { "epoch": 0.918799113439452, "grad_norm": 3.022995712791708, "learning_rate": 3.4299865469905156e-08, "loss": 0.4699, "step": 1995 }, { "epoch": 0.9192596643735068, "grad_norm": 3.0589636260878654, "learning_rate": 3.391341352875887e-08, "loss": 0.5397, "step": 1996 }, { "epoch": 0.9197202153075617, "grad_norm": 3.029785621200751, "learning_rate": 3.3529113435245e-08, "loss": 0.5563, "step": 1997 }, { "epoch": 0.9201807662416165, "grad_norm": 3.12537296154988, "learning_rate": 3.314696604534839e-08, "loss": 0.559, "step": 1998 }, { "epoch": 0.9206413171756714, "grad_norm": 3.416953532596617, "learning_rate": 3.276697221025848e-08, "loss": 0.5028, "step": 1999 }, { "epoch": 0.9211018681097263, "grad_norm": 3.7639797946349485, "learning_rate": 3.238913277636846e-08, "loss": 0.4885, "step": 2000 }, { "epoch": 0.9215624190437811, "grad_norm": 3.355422997580863, "learning_rate": 3.201344858527233e-08, "loss": 0.5052, "step": 2001 }, { "epoch": 0.922022969977836, "grad_norm": 3.1382941450648425, "learning_rate": 3.163992047376374e-08, "loss": 0.5372, "step": 2002 }, { "epoch": 0.9224835209118909, "grad_norm": 3.4395472891340377, "learning_rate": 3.126854927383416e-08, "loss": 0.5008, "step": 2003 }, { "epoch": 0.9229440718459457, "grad_norm": 3.1440121431768078, "learning_rate": 3.089933581267024e-08, "loss": 0.6216, "step": 2004 }, { "epoch": 0.9234046227800006, "grad_norm": 3.519897694068012, "learning_rate": 3.053228091265314e-08, "loss": 0.5959, "step": 2005 }, { "epoch": 0.9238651737140554, "grad_norm": 3.1572535401512756, "learning_rate": 3.016738539135566e-08, "loss": 0.4811, "step": 2006 }, { "epoch": 0.9243257246481102, "grad_norm": 3.2199243277505616, "learning_rate": 2.980465006154076e-08, "loss": 0.5451, "step": 2007 }, { "epoch": 0.9247862755821652, "grad_norm": 3.3241287834796096, "learning_rate": 2.9444075731160256e-08, "loss": 0.6162, "step": 2008 }, { "epoch": 0.92524682651622, "grad_norm": 3.164226428742799, "learning_rate": 2.908566320335215e-08, "loss": 0.5484, "step": 2009 }, { "epoch": 0.9257073774502749, "grad_norm": 3.4187086885851774, "learning_rate": 2.872941327643963e-08, "loss": 0.5215, "step": 2010 }, { "epoch": 0.9261679283843297, "grad_norm": 3.33801664810882, "learning_rate": 2.837532674392862e-08, "loss": 0.4978, "step": 2011 }, { "epoch": 0.9266284793183847, "grad_norm": 3.1540649821799573, "learning_rate": 2.8023404394506345e-08, "loss": 0.5169, "step": 2012 }, { "epoch": 0.9270890302524395, "grad_norm": 3.332203820351019, "learning_rate": 2.767364701204e-08, "loss": 0.4781, "step": 2013 }, { "epoch": 0.9275495811864943, "grad_norm": 3.0643310804507253, "learning_rate": 2.7326055375573976e-08, "loss": 0.5149, "step": 2014 }, { "epoch": 0.9280101321205492, "grad_norm": 2.841374181617664, "learning_rate": 2.6980630259329063e-08, "loss": 0.5501, "step": 2015 }, { "epoch": 0.9284706830546041, "grad_norm": 3.3194075100355835, "learning_rate": 2.6637372432700476e-08, "loss": 0.6163, "step": 2016 }, { "epoch": 0.928931233988659, "grad_norm": 3.0237095704445633, "learning_rate": 2.629628266025552e-08, "loss": 0.5245, "step": 2017 }, { "epoch": 0.9293917849227138, "grad_norm": 3.0048824665623566, "learning_rate": 2.5957361701732904e-08, "loss": 0.7035, "step": 2018 }, { "epoch": 0.9298523358567686, "grad_norm": 2.850832918602004, "learning_rate": 2.5620610312040436e-08, "loss": 0.4749, "step": 2019 }, { "epoch": 0.9303128867908235, "grad_norm": 2.889924325240167, "learning_rate": 2.528602924125334e-08, "loss": 0.4982, "step": 2020 }, { "epoch": 0.9307734377248784, "grad_norm": 3.097179425048999, "learning_rate": 2.495361923461281e-08, "loss": 0.57, "step": 2021 }, { "epoch": 0.9312339886589333, "grad_norm": 3.195072157476586, "learning_rate": 2.462338103252415e-08, "loss": 0.6917, "step": 2022 }, { "epoch": 0.9316945395929881, "grad_norm": 2.964114672020939, "learning_rate": 2.4295315370555402e-08, "loss": 0.4675, "step": 2023 }, { "epoch": 0.9321550905270429, "grad_norm": 2.698298386130625, "learning_rate": 2.3969422979435162e-08, "loss": 0.4485, "step": 2024 }, { "epoch": 0.9326156414610979, "grad_norm": 3.05208848367821, "learning_rate": 2.3645704585051775e-08, "loss": 0.6052, "step": 2025 }, { "epoch": 0.9330761923951527, "grad_norm": 3.626805603045707, "learning_rate": 2.3324160908451017e-08, "loss": 0.458, "step": 2026 }, { "epoch": 0.9335367433292076, "grad_norm": 3.2241633697370413, "learning_rate": 2.300479266583455e-08, "loss": 0.5033, "step": 2027 }, { "epoch": 0.9339972942632624, "grad_norm": 3.5953424649745735, "learning_rate": 2.2687600568558785e-08, "loss": 0.7304, "step": 2028 }, { "epoch": 0.9344578451973173, "grad_norm": 3.8158227802153952, "learning_rate": 2.237258532313302e-08, "loss": 0.5633, "step": 2029 }, { "epoch": 0.9349183961313722, "grad_norm": 3.040372697625964, "learning_rate": 2.205974763121754e-08, "loss": 0.5436, "step": 2030 }, { "epoch": 0.935378947065427, "grad_norm": 2.9246000563345866, "learning_rate": 2.1749088189622844e-08, "loss": 0.5782, "step": 2031 }, { "epoch": 0.9358394979994819, "grad_norm": 3.1558727218247244, "learning_rate": 2.144060769030742e-08, "loss": 0.5921, "step": 2032 }, { "epoch": 0.9363000489335367, "grad_norm": 2.9122079524337754, "learning_rate": 2.113430682037598e-08, "loss": 0.5348, "step": 2033 }, { "epoch": 0.9367605998675916, "grad_norm": 2.5503478146770244, "learning_rate": 2.083018626207933e-08, "loss": 0.4812, "step": 2034 }, { "epoch": 0.9372211508016465, "grad_norm": 3.2463091586540958, "learning_rate": 2.0528246692810835e-08, "loss": 0.5133, "step": 2035 }, { "epoch": 0.9376817017357013, "grad_norm": 2.7033666955582225, "learning_rate": 2.0228488785106634e-08, "loss": 0.5074, "step": 2036 }, { "epoch": 0.9381422526697561, "grad_norm": 3.3458142554070736, "learning_rate": 1.9930913206643306e-08, "loss": 0.5898, "step": 2037 }, { "epoch": 0.9386028036038111, "grad_norm": 3.0937605106073947, "learning_rate": 1.9635520620236323e-08, "loss": 0.4866, "step": 2038 }, { "epoch": 0.9390633545378659, "grad_norm": 3.3805419118935545, "learning_rate": 1.934231168383915e-08, "loss": 0.6963, "step": 2039 }, { "epoch": 0.9395239054719208, "grad_norm": 2.9416660248540536, "learning_rate": 1.9051287050541263e-08, "loss": 0.5596, "step": 2040 }, { "epoch": 0.9399844564059756, "grad_norm": 2.5583360386888883, "learning_rate": 1.876244736856658e-08, "loss": 0.3893, "step": 2041 }, { "epoch": 0.9404450073400306, "grad_norm": 3.602394844208884, "learning_rate": 1.847579328127269e-08, "loss": 0.6501, "step": 2042 }, { "epoch": 0.9409055582740854, "grad_norm": 3.278596111429843, "learning_rate": 1.819132542714874e-08, "loss": 0.5125, "step": 2043 }, { "epoch": 0.9413661092081402, "grad_norm": 3.2792307923445794, "learning_rate": 1.790904443981478e-08, "loss": 0.546, "step": 2044 }, { "epoch": 0.9418266601421951, "grad_norm": 3.354526716736895, "learning_rate": 1.7628950948018974e-08, "loss": 0.5228, "step": 2045 }, { "epoch": 0.9422872110762499, "grad_norm": 3.2620023435160395, "learning_rate": 1.7351045575638044e-08, "loss": 0.4842, "step": 2046 }, { "epoch": 0.9427477620103049, "grad_norm": 3.509391862715867, "learning_rate": 1.7075328941674295e-08, "loss": 0.5415, "step": 2047 }, { "epoch": 0.9432083129443597, "grad_norm": 2.9644652855209945, "learning_rate": 1.680180166025513e-08, "loss": 0.5368, "step": 2048 }, { "epoch": 0.9436688638784145, "grad_norm": 3.103421227871461, "learning_rate": 1.653046434063121e-08, "loss": 0.6085, "step": 2049 }, { "epoch": 0.9441294148124694, "grad_norm": 3.2771987160259877, "learning_rate": 1.626131758717575e-08, "loss": 0.5967, "step": 2050 }, { "epoch": 0.9445899657465243, "grad_norm": 2.840695547956422, "learning_rate": 1.59943619993822e-08, "loss": 0.5215, "step": 2051 }, { "epoch": 0.9450505166805792, "grad_norm": 3.5326470386345545, "learning_rate": 1.572959817186359e-08, "loss": 0.5683, "step": 2052 }, { "epoch": 0.945511067614634, "grad_norm": 3.4768749960533127, "learning_rate": 1.5467026694351404e-08, "loss": 0.5466, "step": 2053 }, { "epoch": 0.9459716185486888, "grad_norm": 3.3583093067528313, "learning_rate": 1.5206648151693478e-08, "loss": 0.553, "step": 2054 }, { "epoch": 0.9464321694827438, "grad_norm": 3.009812369106128, "learning_rate": 1.4948463123853337e-08, "loss": 0.4992, "step": 2055 }, { "epoch": 0.9468927204167986, "grad_norm": 2.9099681097218286, "learning_rate": 1.4692472185908633e-08, "loss": 0.4736, "step": 2056 }, { "epoch": 0.9473532713508535, "grad_norm": 3.3355322307095108, "learning_rate": 1.4438675908050036e-08, "loss": 0.5518, "step": 2057 }, { "epoch": 0.9478138222849083, "grad_norm": 2.5897747211717586, "learning_rate": 1.4187074855579795e-08, "loss": 0.3858, "step": 2058 }, { "epoch": 0.9482743732189632, "grad_norm": 3.3391284256863103, "learning_rate": 1.3937669588910406e-08, "loss": 0.6045, "step": 2059 }, { "epoch": 0.9487349241530181, "grad_norm": 3.0970613079517957, "learning_rate": 1.3690460663563829e-08, "loss": 0.5936, "step": 2060 }, { "epoch": 0.9491954750870729, "grad_norm": 3.3727344553879384, "learning_rate": 1.344544863016961e-08, "loss": 0.5295, "step": 2061 }, { "epoch": 0.9496560260211278, "grad_norm": 2.9033018743602312, "learning_rate": 1.3202634034464199e-08, "loss": 0.549, "step": 2062 }, { "epoch": 0.9501165769551826, "grad_norm": 2.958202980684059, "learning_rate": 1.2962017417289418e-08, "loss": 0.5569, "step": 2063 }, { "epoch": 0.9505771278892375, "grad_norm": 2.976503730921731, "learning_rate": 1.2723599314591105e-08, "loss": 0.5678, "step": 2064 }, { "epoch": 0.9510376788232924, "grad_norm": 3.1304654491952872, "learning_rate": 1.2487380257418578e-08, "loss": 0.5418, "step": 2065 }, { "epoch": 0.9514982297573472, "grad_norm": 3.25198019264722, "learning_rate": 1.2253360771922739e-08, "loss": 0.5825, "step": 2066 }, { "epoch": 0.951958780691402, "grad_norm": 3.5969220214188997, "learning_rate": 1.2021541379355404e-08, "loss": 0.5422, "step": 2067 }, { "epoch": 0.952419331625457, "grad_norm": 2.84240161611037, "learning_rate": 1.1791922596067649e-08, "loss": 0.5475, "step": 2068 }, { "epoch": 0.9528798825595118, "grad_norm": 2.7264361506813537, "learning_rate": 1.1564504933509244e-08, "loss": 0.4494, "step": 2069 }, { "epoch": 0.9533404334935667, "grad_norm": 2.9820660647231296, "learning_rate": 1.1339288898227106e-08, "loss": 0.5436, "step": 2070 }, { "epoch": 0.9538009844276215, "grad_norm": 3.139054611664098, "learning_rate": 1.1116274991864072e-08, "loss": 0.4871, "step": 2071 }, { "epoch": 0.9542615353616765, "grad_norm": 2.8848562462423377, "learning_rate": 1.0895463711158349e-08, "loss": 0.4775, "step": 2072 }, { "epoch": 0.9547220862957313, "grad_norm": 2.940726909982554, "learning_rate": 1.0676855547941844e-08, "loss": 0.4509, "step": 2073 }, { "epoch": 0.9551826372297861, "grad_norm": 3.011334549424482, "learning_rate": 1.0460450989139169e-08, "loss": 0.5029, "step": 2074 }, { "epoch": 0.955643188163841, "grad_norm": 2.997675838063664, "learning_rate": 1.0246250516766863e-08, "loss": 0.5425, "step": 2075 }, { "epoch": 0.9561037390978958, "grad_norm": 2.985898615725367, "learning_rate": 1.0034254607932168e-08, "loss": 0.5514, "step": 2076 }, { "epoch": 0.9565642900319508, "grad_norm": 3.0381025030705233, "learning_rate": 9.82446373483159e-09, "loss": 0.4691, "step": 2077 }, { "epoch": 0.9570248409660056, "grad_norm": 2.7713452807820937, "learning_rate": 9.616878364750446e-09, "loss": 0.4628, "step": 2078 }, { "epoch": 0.9574853919000604, "grad_norm": 2.9033280044478853, "learning_rate": 9.411498960061436e-09, "loss": 0.4301, "step": 2079 }, { "epoch": 0.9579459428341153, "grad_norm": 3.2313767447057917, "learning_rate": 9.208325978223741e-09, "loss": 0.5824, "step": 2080 }, { "epoch": 0.9584064937681702, "grad_norm": 3.0409141727255915, "learning_rate": 9.00735987178214e-09, "loss": 0.5694, "step": 2081 }, { "epoch": 0.9588670447022251, "grad_norm": 3.0094026886557526, "learning_rate": 8.808601088365453e-09, "loss": 0.5971, "step": 2082 }, { "epoch": 0.9593275956362799, "grad_norm": 3.4474240649260555, "learning_rate": 8.612050070686217e-09, "loss": 0.5686, "step": 2083 }, { "epoch": 0.9597881465703347, "grad_norm": 3.264238061980219, "learning_rate": 8.417707256539675e-09, "loss": 0.4905, "step": 2084 }, { "epoch": 0.9602486975043897, "grad_norm": 3.445851440079108, "learning_rate": 8.225573078802006e-09, "loss": 0.536, "step": 2085 }, { "epoch": 0.9607092484384445, "grad_norm": 3.2368359736420684, "learning_rate": 8.035647965430215e-09, "loss": 0.5559, "step": 2086 }, { "epoch": 0.9611697993724994, "grad_norm": 2.7042062125888835, "learning_rate": 7.847932339460906e-09, "loss": 0.537, "step": 2087 }, { "epoch": 0.9616303503065542, "grad_norm": 3.352414822995779, "learning_rate": 7.662426619009178e-09, "loss": 0.5649, "step": 2088 }, { "epoch": 0.962090901240609, "grad_norm": 3.04805145690995, "learning_rate": 7.479131217267732e-09, "loss": 0.6832, "step": 2089 }, { "epoch": 0.962551452174664, "grad_norm": 3.0121047224737474, "learning_rate": 7.2980465425063196e-09, "loss": 0.5337, "step": 2090 }, { "epoch": 0.9630120031087188, "grad_norm": 3.4643442345702815, "learning_rate": 7.119172998070411e-09, "loss": 0.5652, "step": 2091 }, { "epoch": 0.9634725540427737, "grad_norm": 3.1880017551075035, "learning_rate": 6.9425109823803e-09, "loss": 0.4508, "step": 2092 }, { "epoch": 0.9639331049768285, "grad_norm": 3.5879986405315836, "learning_rate": 6.768060888930449e-09, "loss": 0.611, "step": 2093 }, { "epoch": 0.9643936559108834, "grad_norm": 3.1074545754597493, "learning_rate": 6.595823106288589e-09, "loss": 0.6024, "step": 2094 }, { "epoch": 0.9648542068449383, "grad_norm": 3.1449480649098573, "learning_rate": 6.4257980180948415e-09, "loss": 0.5402, "step": 2095 }, { "epoch": 0.9653147577789931, "grad_norm": 3.240649850034883, "learning_rate": 6.257986003060489e-09, "loss": 0.5048, "step": 2096 }, { "epoch": 0.965775308713048, "grad_norm": 2.958071414094661, "learning_rate": 6.09238743496776e-09, "loss": 0.5455, "step": 2097 }, { "epoch": 0.9662358596471029, "grad_norm": 3.3522897979936244, "learning_rate": 5.929002682668494e-09, "loss": 0.5016, "step": 2098 }, { "epoch": 0.9666964105811577, "grad_norm": 3.3646238303023464, "learning_rate": 5.7678321100836925e-09, "loss": 0.6399, "step": 2099 }, { "epoch": 0.9671569615152126, "grad_norm": 3.146114846622207, "learning_rate": 5.608876076202307e-09, "loss": 0.4167, "step": 2100 }, { "epoch": 0.9676175124492674, "grad_norm": 3.2240830087243104, "learning_rate": 5.452134935080899e-09, "loss": 0.519, "step": 2101 }, { "epoch": 0.9680780633833223, "grad_norm": 2.9509149534120898, "learning_rate": 5.29760903584231e-09, "loss": 0.4709, "step": 2102 }, { "epoch": 0.9685386143173772, "grad_norm": 2.9753664130176345, "learning_rate": 5.145298722675439e-09, "loss": 0.5777, "step": 2103 }, { "epoch": 0.968999165251432, "grad_norm": 3.026540264362648, "learning_rate": 4.9952043348342465e-09, "loss": 0.4863, "step": 2104 }, { "epoch": 0.9694597161854869, "grad_norm": 3.3658355981602197, "learning_rate": 4.847326206636526e-09, "loss": 0.5415, "step": 2105 }, { "epoch": 0.9699202671195417, "grad_norm": 3.370998012399039, "learning_rate": 4.701664667464245e-09, "loss": 0.5083, "step": 2106 }, { "epoch": 0.9703808180535967, "grad_norm": 3.0673456063667293, "learning_rate": 4.5582200417617625e-09, "loss": 0.5203, "step": 2107 }, { "epoch": 0.9708413689876515, "grad_norm": 3.05103189149114, "learning_rate": 4.416992649035612e-09, "loss": 0.5176, "step": 2108 }, { "epoch": 0.9713019199217063, "grad_norm": 2.88875246748082, "learning_rate": 4.2779828038536085e-09, "loss": 0.5542, "step": 2109 }, { "epoch": 0.9717624708557612, "grad_norm": 3.03592366750373, "learning_rate": 4.14119081584452e-09, "loss": 0.5563, "step": 2110 }, { "epoch": 0.9722230217898161, "grad_norm": 3.6405619640934184, "learning_rate": 4.00661698969662e-09, "loss": 0.5303, "step": 2111 }, { "epoch": 0.972683572723871, "grad_norm": 2.8429713783566926, "learning_rate": 3.874261625157915e-09, "loss": 0.5358, "step": 2112 }, { "epoch": 0.9731441236579258, "grad_norm": 2.7464922162048238, "learning_rate": 3.744125017034916e-09, "loss": 0.5143, "step": 2113 }, { "epoch": 0.9736046745919806, "grad_norm": 2.9563946004018575, "learning_rate": 3.6162074551919772e-09, "loss": 0.4829, "step": 2114 }, { "epoch": 0.9740652255260355, "grad_norm": 3.4014494942852282, "learning_rate": 3.4905092245509637e-09, "loss": 0.5424, "step": 2115 }, { "epoch": 0.9745257764600904, "grad_norm": 3.4672435378031845, "learning_rate": 3.3670306050902485e-09, "loss": 0.5683, "step": 2116 }, { "epoch": 0.9749863273941453, "grad_norm": 2.852268606152846, "learning_rate": 3.2457718718443827e-09, "loss": 0.5653, "step": 2117 }, { "epoch": 0.9754468783282001, "grad_norm": 3.00608356499513, "learning_rate": 3.1267332949033166e-09, "loss": 0.5951, "step": 2118 }, { "epoch": 0.9759074292622549, "grad_norm": 3.279145796994075, "learning_rate": 3.009915139412067e-09, "loss": 0.5203, "step": 2119 }, { "epoch": 0.9763679801963099, "grad_norm": 3.034428779025666, "learning_rate": 2.8953176655696075e-09, "loss": 0.6216, "step": 2120 }, { "epoch": 0.9768285311303647, "grad_norm": 3.400829045951133, "learning_rate": 2.7829411286287572e-09, "loss": 0.5613, "step": 2121 }, { "epoch": 0.9772890820644196, "grad_norm": 3.1672048746314547, "learning_rate": 2.6727857788954033e-09, "loss": 0.5935, "step": 2122 }, { "epoch": 0.9777496329984744, "grad_norm": 3.26875632935082, "learning_rate": 2.5648518617280567e-09, "loss": 0.5012, "step": 2123 }, { "epoch": 0.9782101839325293, "grad_norm": 3.271789631236318, "learning_rate": 2.459139617537187e-09, "loss": 0.4801, "step": 2124 }, { "epoch": 0.9786707348665842, "grad_norm": 3.2181552684453583, "learning_rate": 2.3556492817847773e-09, "loss": 0.5425, "step": 2125 }, { "epoch": 0.979131285800639, "grad_norm": 3.0926183100897493, "learning_rate": 2.2543810849836586e-09, "loss": 0.5107, "step": 2126 }, { "epoch": 0.9795918367346939, "grad_norm": 2.8517011860409887, "learning_rate": 2.1553352526972878e-09, "loss": 0.5229, "step": 2127 }, { "epoch": 0.9800523876687487, "grad_norm": 2.8804770422411283, "learning_rate": 2.0585120055389705e-09, "loss": 0.4706, "step": 2128 }, { "epoch": 0.9805129386028036, "grad_norm": 3.202711757414141, "learning_rate": 1.963911559171416e-09, "loss": 0.5939, "step": 2129 }, { "epoch": 0.9809734895368585, "grad_norm": 3.2187761279662706, "learning_rate": 1.8715341243061846e-09, "loss": 0.5934, "step": 2130 }, { "epoch": 0.9814340404709133, "grad_norm": 3.881243006981297, "learning_rate": 1.7813799067035729e-09, "loss": 0.5152, "step": 2131 }, { "epoch": 0.9818945914049682, "grad_norm": 3.1208611268878124, "learning_rate": 1.6934491071719515e-09, "loss": 0.5928, "step": 2132 }, { "epoch": 0.9823551423390231, "grad_norm": 3.6853248210429426, "learning_rate": 1.6077419215668742e-09, "loss": 0.5364, "step": 2133 }, { "epoch": 0.9828156932730779, "grad_norm": 3.672076871339007, "learning_rate": 1.5242585407915231e-09, "loss": 0.4995, "step": 2134 }, { "epoch": 0.9832762442071328, "grad_norm": 3.308203790005742, "learning_rate": 1.4429991507954874e-09, "loss": 0.5985, "step": 2135 }, { "epoch": 0.9837367951411876, "grad_norm": 3.4864392711956063, "learning_rate": 1.3639639325748741e-09, "loss": 0.5583, "step": 2136 }, { "epoch": 0.9841973460752426, "grad_norm": 2.865215830877929, "learning_rate": 1.287153062171642e-09, "loss": 0.5195, "step": 2137 }, { "epoch": 0.9846578970092974, "grad_norm": 3.1310255182151754, "learning_rate": 1.2125667106730464e-09, "loss": 0.5164, "step": 2138 }, { "epoch": 0.9851184479433522, "grad_norm": 3.112056106943904, "learning_rate": 1.1402050442118616e-09, "loss": 0.4333, "step": 2139 }, { "epoch": 0.9855789988774071, "grad_norm": 3.038352446915218, "learning_rate": 1.0700682239653814e-09, "loss": 0.5689, "step": 2140 }, { "epoch": 0.9860395498114619, "grad_norm": 3.0348900204468903, "learning_rate": 1.002156406155419e-09, "loss": 0.5682, "step": 2141 }, { "epoch": 0.9865001007455169, "grad_norm": 3.115185553653833, "learning_rate": 9.364697420476408e-10, "loss": 0.4725, "step": 2142 }, { "epoch": 0.9869606516795717, "grad_norm": 3.087394317229768, "learning_rate": 8.730083779516784e-10, "loss": 0.5686, "step": 2143 }, { "epoch": 0.9874212026136265, "grad_norm": 2.8838196694727647, "learning_rate": 8.117724552205718e-10, "loss": 0.4791, "step": 2144 }, { "epoch": 0.9878817535476814, "grad_norm": 2.9019511674207417, "learning_rate": 7.527621102503268e-10, "loss": 0.5693, "step": 2145 }, { "epoch": 0.9883423044817363, "grad_norm": 3.153315732843879, "learning_rate": 6.959774744796921e-10, "loss": 0.6183, "step": 2146 }, { "epoch": 0.9888028554157912, "grad_norm": 3.2841453445938162, "learning_rate": 6.414186743899375e-10, "loss": 0.4677, "step": 2147 }, { "epoch": 0.989263406349846, "grad_norm": 3.205902530946899, "learning_rate": 5.890858315046321e-10, "loss": 0.5202, "step": 2148 }, { "epoch": 0.9897239572839008, "grad_norm": 2.911194242328273, "learning_rate": 5.389790623891999e-10, "loss": 0.4913, "step": 2149 }, { "epoch": 0.9901845082179558, "grad_norm": 3.087952168592988, "learning_rate": 4.910984786506978e-10, "loss": 0.5976, "step": 2150 }, { "epoch": 0.9906450591520106, "grad_norm": 3.2125702072705162, "learning_rate": 4.454441869377046e-10, "loss": 0.5693, "step": 2151 }, { "epoch": 0.9911056100860655, "grad_norm": 3.1911572214123365, "learning_rate": 4.020162889399881e-10, "loss": 0.5079, "step": 2152 }, { "epoch": 0.9915661610201203, "grad_norm": 2.842869367824956, "learning_rate": 3.6081488138817176e-10, "loss": 0.5438, "step": 2153 }, { "epoch": 0.9920267119541751, "grad_norm": 3.59761551281296, "learning_rate": 3.2184005605373487e-10, "loss": 0.6356, "step": 2154 }, { "epoch": 0.9924872628882301, "grad_norm": 3.266624178638357, "learning_rate": 2.850918997485685e-10, "loss": 0.6384, "step": 2155 }, { "epoch": 0.9929478138222849, "grad_norm": 2.847984241902026, "learning_rate": 2.505704943251974e-10, "loss": 0.4189, "step": 2156 }, { "epoch": 0.9934083647563398, "grad_norm": 2.8940228158585684, "learning_rate": 2.1827591667578083e-10, "loss": 0.543, "step": 2157 }, { "epoch": 0.9938689156903946, "grad_norm": 3.2268790504480798, "learning_rate": 1.8820823873311187e-10, "loss": 0.5865, "step": 2158 }, { "epoch": 0.9943294666244495, "grad_norm": 2.816014447163771, "learning_rate": 1.6036752746939608e-10, "loss": 0.6104, "step": 2159 }, { "epoch": 0.9947900175585044, "grad_norm": 2.834508001501944, "learning_rate": 1.347538448966956e-10, "loss": 0.5525, "step": 2160 }, { "epoch": 0.9952505684925592, "grad_norm": 3.296408037769054, "learning_rate": 1.113672480663741e-10, "loss": 0.527, "step": 2161 }, { "epoch": 0.995711119426614, "grad_norm": 3.1020643793075795, "learning_rate": 9.020778906965176e-11, "loss": 0.5439, "step": 2162 }, { "epoch": 0.996171670360669, "grad_norm": 2.9887893240557477, "learning_rate": 7.127551503671724e-11, "loss": 0.5408, "step": 2163 }, { "epoch": 0.9966322212947238, "grad_norm": 3.311987992840308, "learning_rate": 5.4570468136949655e-11, "loss": 0.5569, "step": 2164 }, { "epoch": 0.9970927722287787, "grad_norm": 3.3309536253152436, "learning_rate": 4.009268557902956e-11, "loss": 0.5253, "step": 2165 }, { "epoch": 0.9975533231628335, "grad_norm": 3.3046896055168546, "learning_rate": 2.7842199610605965e-11, "loss": 0.5117, "step": 2166 }, { "epoch": 0.9980138740968884, "grad_norm": 3.1272281406984206, "learning_rate": 1.7819037518185252e-11, "loss": 0.5905, "step": 2167 }, { "epoch": 0.9984744250309433, "grad_norm": 3.614597151505851, "learning_rate": 1.0023221627242229e-11, "loss": 0.4945, "step": 2168 }, { "epoch": 0.9989349759649981, "grad_norm": 3.190348932550531, "learning_rate": 4.454769301998063e-12, "loss": 0.6752, "step": 2169 }, { "epoch": 0.999395526899053, "grad_norm": 3.393120014197588, "learning_rate": 1.1136929456423416e-12, "loss": 0.5265, "step": 2170 }, { "epoch": 0.9998560778331078, "grad_norm": 3.1321707930638376, "learning_rate": 0.0, "loss": 0.5797, "step": 2171 }, { "epoch": 0.9998560778331078, "step": 2171, "total_flos": 1478472491958272.0, "train_loss": 0.577474653254122, "train_runtime": 98261.8902, "train_samples_per_second": 1.414, "train_steps_per_second": 0.022 } ], "logging_steps": 1.0, "max_steps": 2171, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1478472491958272.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }