|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1257, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002386634844868735, |
|
"grad_norm": 129.18601989746094, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 14.4042, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00477326968973747, |
|
"grad_norm": 55.742095947265625, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 12.0437, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007159904534606206, |
|
"grad_norm": 39.26912307739258, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 8.1864, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00954653937947494, |
|
"grad_norm": 10.621451377868652, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 4.9227, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011933174224343675, |
|
"grad_norm": 10.118457794189453, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 4.8046, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014319809069212411, |
|
"grad_norm": 9.227359771728516, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 4.813, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016706443914081145, |
|
"grad_norm": 7.128821849822998, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.551, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01909307875894988, |
|
"grad_norm": 9.488492012023926, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 4.484, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021479713603818614, |
|
"grad_norm": 9.141919136047363, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 4.5506, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02386634844868735, |
|
"grad_norm": 6.18914270401001, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 4.5217, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026252983293556086, |
|
"grad_norm": 7.893601894378662, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 4.2932, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.028639618138424822, |
|
"grad_norm": 5.094172954559326, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 4.2497, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.031026252983293555, |
|
"grad_norm": 5.149319171905518, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 4.3677, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03341288782816229, |
|
"grad_norm": 4.346240520477295, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.1722, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03579952267303103, |
|
"grad_norm": 3.216545581817627, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 4.1916, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03818615751789976, |
|
"grad_norm": 3.108903408050537, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 4.0829, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0405727923627685, |
|
"grad_norm": 3.1567437648773193, |
|
"learning_rate": 4.047619047619048e-05, |
|
"loss": 4.0211, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04295942720763723, |
|
"grad_norm": 12.768291473388672, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 4.1494, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.045346062052505964, |
|
"grad_norm": 3.211005926132202, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 4.1356, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0477326968973747, |
|
"grad_norm": 5.850863456726074, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 4.2211, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.050119331742243436, |
|
"grad_norm": 4.25229024887085, |
|
"learning_rate": 5e-05, |
|
"loss": 4.1235, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05250596658711217, |
|
"grad_norm": 2.7204909324645996, |
|
"learning_rate": 5.2380952380952384e-05, |
|
"loss": 3.9658, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05489260143198091, |
|
"grad_norm": 3.8032662868499756, |
|
"learning_rate": 5.4761904761904766e-05, |
|
"loss": 3.8705, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.057279236276849645, |
|
"grad_norm": 3.7930989265441895, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 4.0484, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.059665871121718374, |
|
"grad_norm": 3.6324238777160645, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 3.9811, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06205250596658711, |
|
"grad_norm": 3.116438388824463, |
|
"learning_rate": 6.19047619047619e-05, |
|
"loss": 3.9441, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06443914081145585, |
|
"grad_norm": 3.774524688720703, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 3.9418, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06682577565632458, |
|
"grad_norm": 4.456262588500977, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 4.0214, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06921241050119331, |
|
"grad_norm": 4.311483860015869, |
|
"learning_rate": 6.904761904761905e-05, |
|
"loss": 3.9148, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07159904534606205, |
|
"grad_norm": 3.6151814460754395, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 3.8497, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07398568019093078, |
|
"grad_norm": 3.262566328048706, |
|
"learning_rate": 7.380952380952382e-05, |
|
"loss": 3.8559, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07637231503579953, |
|
"grad_norm": 4.0872907638549805, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 3.7604, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07875894988066826, |
|
"grad_norm": 4.191164493560791, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 3.6232, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.081145584725537, |
|
"grad_norm": 3.834773540496826, |
|
"learning_rate": 8.095238095238096e-05, |
|
"loss": 3.5658, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08353221957040573, |
|
"grad_norm": 4.667291164398193, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 3.6749, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08591885441527446, |
|
"grad_norm": 3.7559292316436768, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 3.5708, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0883054892601432, |
|
"grad_norm": 4.605666160583496, |
|
"learning_rate": 8.80952380952381e-05, |
|
"loss": 3.5868, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09069212410501193, |
|
"grad_norm": 3.557979106903076, |
|
"learning_rate": 9.047619047619048e-05, |
|
"loss": 3.5797, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09307875894988067, |
|
"grad_norm": 3.252253770828247, |
|
"learning_rate": 9.285714285714286e-05, |
|
"loss": 3.4353, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0954653937947494, |
|
"grad_norm": 7.634258270263672, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 3.3768, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09785202863961814, |
|
"grad_norm": 4.522894859313965, |
|
"learning_rate": 9.761904761904762e-05, |
|
"loss": 3.4649, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10023866348448687, |
|
"grad_norm": 3.660895824432373, |
|
"learning_rate": 0.0001, |
|
"loss": 3.3314, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1026252983293556, |
|
"grad_norm": 3.2496635913848877, |
|
"learning_rate": 9.999961058466053e-05, |
|
"loss": 3.3748, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10501193317422435, |
|
"grad_norm": 3.1348578929901123, |
|
"learning_rate": 9.999844234470782e-05, |
|
"loss": 3.1998, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10739856801909307, |
|
"grad_norm": 4.37357759475708, |
|
"learning_rate": 9.999649529833915e-05, |
|
"loss": 3.2159, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10978520286396182, |
|
"grad_norm": 3.638875961303711, |
|
"learning_rate": 9.999376947588288e-05, |
|
"loss": 3.2499, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11217183770883055, |
|
"grad_norm": 4.375198841094971, |
|
"learning_rate": 9.999026491979808e-05, |
|
"loss": 3.1967, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11455847255369929, |
|
"grad_norm": 3.1584630012512207, |
|
"learning_rate": 9.99859816846739e-05, |
|
"loss": 3.2014, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11694510739856802, |
|
"grad_norm": 3.404482126235962, |
|
"learning_rate": 9.998091983722863e-05, |
|
"loss": 3.1216, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11933174224343675, |
|
"grad_norm": 3.8131189346313477, |
|
"learning_rate": 9.99750794563087e-05, |
|
"loss": 3.0097, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12171837708830549, |
|
"grad_norm": 3.831827163696289, |
|
"learning_rate": 9.996846063288747e-05, |
|
"loss": 3.1055, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12410501193317422, |
|
"grad_norm": 3.281343698501587, |
|
"learning_rate": 9.996106347006379e-05, |
|
"loss": 3.1001, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12649164677804295, |
|
"grad_norm": 3.8899145126342773, |
|
"learning_rate": 9.99528880830604e-05, |
|
"loss": 3.0959, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1288782816229117, |
|
"grad_norm": 2.8943607807159424, |
|
"learning_rate": 9.994393459922218e-05, |
|
"loss": 3.0305, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13126491646778043, |
|
"grad_norm": 3.5095627307891846, |
|
"learning_rate": 9.993420315801406e-05, |
|
"loss": 2.9394, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13365155131264916, |
|
"grad_norm": 3.2103517055511475, |
|
"learning_rate": 9.992369391101895e-05, |
|
"loss": 2.999, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1360381861575179, |
|
"grad_norm": 2.3885068893432617, |
|
"learning_rate": 9.991240702193532e-05, |
|
"loss": 3.1839, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.13842482100238662, |
|
"grad_norm": 3.747015953063965, |
|
"learning_rate": 9.990034266657467e-05, |
|
"loss": 2.9744, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.14081145584725538, |
|
"grad_norm": 3.1654574871063232, |
|
"learning_rate": 9.988750103285883e-05, |
|
"loss": 2.9318, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1431980906921241, |
|
"grad_norm": 3.403306007385254, |
|
"learning_rate": 9.987388232081694e-05, |
|
"loss": 2.9445, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14558472553699284, |
|
"grad_norm": 3.077636480331421, |
|
"learning_rate": 9.985948674258243e-05, |
|
"loss": 2.9306, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.14797136038186157, |
|
"grad_norm": 3.725022554397583, |
|
"learning_rate": 9.984431452238967e-05, |
|
"loss": 2.8871, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15035799522673032, |
|
"grad_norm": 4.53774356842041, |
|
"learning_rate": 9.982836589657043e-05, |
|
"loss": 2.9007, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15274463007159905, |
|
"grad_norm": 2.8278591632843018, |
|
"learning_rate": 9.981164111355035e-05, |
|
"loss": 2.9678, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15513126491646778, |
|
"grad_norm": 3.511444568634033, |
|
"learning_rate": 9.979414043384485e-05, |
|
"loss": 2.9657, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1575178997613365, |
|
"grad_norm": 2.6156179904937744, |
|
"learning_rate": 9.977586413005531e-05, |
|
"loss": 2.9317, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15990453460620524, |
|
"grad_norm": 3.9150912761688232, |
|
"learning_rate": 9.975681248686461e-05, |
|
"loss": 2.9199, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.162291169451074, |
|
"grad_norm": 3.9258997440338135, |
|
"learning_rate": 9.973698580103285e-05, |
|
"loss": 2.82, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16467780429594273, |
|
"grad_norm": 2.9867348670959473, |
|
"learning_rate": 9.971638438139266e-05, |
|
"loss": 2.8516, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.16706443914081145, |
|
"grad_norm": 3.0659570693969727, |
|
"learning_rate": 9.96950085488444e-05, |
|
"loss": 2.6836, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16945107398568018, |
|
"grad_norm": 2.818599224090576, |
|
"learning_rate": 9.967285863635112e-05, |
|
"loss": 2.9212, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1718377088305489, |
|
"grad_norm": 2.9934067726135254, |
|
"learning_rate": 9.964993498893349e-05, |
|
"loss": 2.7864, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17422434367541767, |
|
"grad_norm": 2.358505964279175, |
|
"learning_rate": 9.962623796366429e-05, |
|
"loss": 2.7296, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1766109785202864, |
|
"grad_norm": 2.630748748779297, |
|
"learning_rate": 9.960176792966289e-05, |
|
"loss": 2.74, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.17899761336515513, |
|
"grad_norm": 2.7040629386901855, |
|
"learning_rate": 9.95765252680896e-05, |
|
"loss": 2.8416, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.18138424821002386, |
|
"grad_norm": 2.9643092155456543, |
|
"learning_rate": 9.95505103721396e-05, |
|
"loss": 2.7412, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.18377088305489261, |
|
"grad_norm": 3.3539974689483643, |
|
"learning_rate": 9.952372364703687e-05, |
|
"loss": 2.7819, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18615751789976134, |
|
"grad_norm": 3.3523247241973877, |
|
"learning_rate": 9.949616551002787e-05, |
|
"loss": 2.7698, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.18854415274463007, |
|
"grad_norm": 2.5793633460998535, |
|
"learning_rate": 9.946783639037504e-05, |
|
"loss": 2.7085, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1909307875894988, |
|
"grad_norm": 3.2276599407196045, |
|
"learning_rate": 9.943873672935014e-05, |
|
"loss": 2.7572, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19331742243436753, |
|
"grad_norm": 3.1655075550079346, |
|
"learning_rate": 9.940886698022734e-05, |
|
"loss": 2.6529, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1957040572792363, |
|
"grad_norm": 3.296135663986206, |
|
"learning_rate": 9.93782276082762e-05, |
|
"loss": 2.7917, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.19809069212410502, |
|
"grad_norm": 2.3760223388671875, |
|
"learning_rate": 9.934681909075434e-05, |
|
"loss": 2.6912, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.20047732696897375, |
|
"grad_norm": 2.8032619953155518, |
|
"learning_rate": 9.931464191690015e-05, |
|
"loss": 2.8242, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20286396181384247, |
|
"grad_norm": 2.846773386001587, |
|
"learning_rate": 9.928169658792498e-05, |
|
"loss": 2.6749, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2052505966587112, |
|
"grad_norm": 2.901726245880127, |
|
"learning_rate": 9.924798361700553e-05, |
|
"loss": 2.6449, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20763723150357996, |
|
"grad_norm": 3.291959524154663, |
|
"learning_rate": 9.92135035292757e-05, |
|
"loss": 2.6156, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2100238663484487, |
|
"grad_norm": 2.4123713970184326, |
|
"learning_rate": 9.91782568618185e-05, |
|
"loss": 2.7662, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21241050119331742, |
|
"grad_norm": 2.883798837661743, |
|
"learning_rate": 9.914224416365764e-05, |
|
"loss": 2.7688, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21479713603818615, |
|
"grad_norm": 2.7051072120666504, |
|
"learning_rate": 9.910546599574902e-05, |
|
"loss": 2.8533, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2171837708830549, |
|
"grad_norm": 2.8238630294799805, |
|
"learning_rate": 9.906792293097194e-05, |
|
"loss": 2.6326, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.21957040572792363, |
|
"grad_norm": 2.51338529586792, |
|
"learning_rate": 9.90296155541202e-05, |
|
"loss": 2.7023, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22195704057279236, |
|
"grad_norm": 2.7575676441192627, |
|
"learning_rate": 9.899054446189304e-05, |
|
"loss": 2.648, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2243436754176611, |
|
"grad_norm": 2.750290870666504, |
|
"learning_rate": 9.895071026288574e-05, |
|
"loss": 2.6133, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22673031026252982, |
|
"grad_norm": 2.5514075756073, |
|
"learning_rate": 9.891011357758022e-05, |
|
"loss": 2.569, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22911694510739858, |
|
"grad_norm": 2.7157366275787354, |
|
"learning_rate": 9.886875503833536e-05, |
|
"loss": 2.693, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2315035799522673, |
|
"grad_norm": 3.114830255508423, |
|
"learning_rate": 9.882663528937717e-05, |
|
"loss": 2.596, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23389021479713604, |
|
"grad_norm": 2.7666478157043457, |
|
"learning_rate": 9.87837549867887e-05, |
|
"loss": 2.5166, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23627684964200477, |
|
"grad_norm": 2.94891095161438, |
|
"learning_rate": 9.87401147984998e-05, |
|
"loss": 2.7035, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2386634844868735, |
|
"grad_norm": 2.5706005096435547, |
|
"learning_rate": 9.869571540427689e-05, |
|
"loss": 2.6282, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24105011933174225, |
|
"grad_norm": 2.8076047897338867, |
|
"learning_rate": 9.865055749571215e-05, |
|
"loss": 2.7397, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.24343675417661098, |
|
"grad_norm": 2.445986032485962, |
|
"learning_rate": 9.860464177621284e-05, |
|
"loss": 2.4252, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2458233890214797, |
|
"grad_norm": 2.591123580932617, |
|
"learning_rate": 9.855796896099045e-05, |
|
"loss": 2.6638, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.24821002386634844, |
|
"grad_norm": 2.5579094886779785, |
|
"learning_rate": 9.851053977704931e-05, |
|
"loss": 2.5365, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.25059665871121717, |
|
"grad_norm": 2.701978921890259, |
|
"learning_rate": 9.846235496317555e-05, |
|
"loss": 2.48, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2529832935560859, |
|
"grad_norm": 2.6815707683563232, |
|
"learning_rate": 9.841341526992536e-05, |
|
"loss": 2.5878, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2553699284009546, |
|
"grad_norm": 2.3997161388397217, |
|
"learning_rate": 9.836372145961345e-05, |
|
"loss": 2.6202, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2577565632458234, |
|
"grad_norm": 2.5097908973693848, |
|
"learning_rate": 9.83132743063011e-05, |
|
"loss": 2.5595, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.26014319809069214, |
|
"grad_norm": 2.3788466453552246, |
|
"learning_rate": 9.826207459578411e-05, |
|
"loss": 2.4104, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.26252983293556087, |
|
"grad_norm": 2.3941385746002197, |
|
"learning_rate": 9.821012312558058e-05, |
|
"loss": 2.4048, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2649164677804296, |
|
"grad_norm": 2.558845281600952, |
|
"learning_rate": 9.815742070491852e-05, |
|
"loss": 2.5062, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.26730310262529833, |
|
"grad_norm": 2.1203112602233887, |
|
"learning_rate": 9.810396815472314e-05, |
|
"loss": 2.4535, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.26968973747016706, |
|
"grad_norm": 2.465987205505371, |
|
"learning_rate": 9.804976630760419e-05, |
|
"loss": 2.4999, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2720763723150358, |
|
"grad_norm": 2.280484914779663, |
|
"learning_rate": 9.799481600784286e-05, |
|
"loss": 2.403, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2744630071599045, |
|
"grad_norm": 2.6242918968200684, |
|
"learning_rate": 9.793911811137875e-05, |
|
"loss": 2.4721, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.27684964200477324, |
|
"grad_norm": 2.8333303928375244, |
|
"learning_rate": 9.788267348579648e-05, |
|
"loss": 2.5081, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.27923627684964203, |
|
"grad_norm": 2.4998323917388916, |
|
"learning_rate": 9.782548301031217e-05, |
|
"loss": 2.4825, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.28162291169451076, |
|
"grad_norm": 2.535447835922241, |
|
"learning_rate": 9.776754757575975e-05, |
|
"loss": 2.4639, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2840095465393795, |
|
"grad_norm": 2.608652114868164, |
|
"learning_rate": 9.770886808457709e-05, |
|
"loss": 2.5209, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2863961813842482, |
|
"grad_norm": 2.5548174381256104, |
|
"learning_rate": 9.764944545079196e-05, |
|
"loss": 2.5707, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.28878281622911695, |
|
"grad_norm": 2.3992514610290527, |
|
"learning_rate": 9.758928060000778e-05, |
|
"loss": 2.515, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.2911694510739857, |
|
"grad_norm": 3.199773073196411, |
|
"learning_rate": 9.752837446938915e-05, |
|
"loss": 2.4069, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2935560859188544, |
|
"grad_norm": 2.689730405807495, |
|
"learning_rate": 9.746672800764735e-05, |
|
"loss": 2.4314, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.29594272076372313, |
|
"grad_norm": 2.357332944869995, |
|
"learning_rate": 9.740434217502547e-05, |
|
"loss": 2.3856, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.29832935560859186, |
|
"grad_norm": 2.409001111984253, |
|
"learning_rate": 9.734121794328357e-05, |
|
"loss": 2.4423, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.30071599045346065, |
|
"grad_norm": 2.4118542671203613, |
|
"learning_rate": 9.727735629568336e-05, |
|
"loss": 2.5059, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3031026252983294, |
|
"grad_norm": 2.4173245429992676, |
|
"learning_rate": 9.721275822697306e-05, |
|
"loss": 2.386, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3054892601431981, |
|
"grad_norm": 2.1414527893066406, |
|
"learning_rate": 9.714742474337186e-05, |
|
"loss": 2.5249, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.30787589498806683, |
|
"grad_norm": 2.1543893814086914, |
|
"learning_rate": 9.708135686255416e-05, |
|
"loss": 2.5626, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.31026252983293556, |
|
"grad_norm": 3.0734686851501465, |
|
"learning_rate": 9.701455561363379e-05, |
|
"loss": 2.3829, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3126491646778043, |
|
"grad_norm": 2.2871782779693604, |
|
"learning_rate": 9.6947022037148e-05, |
|
"loss": 2.4011, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.315035799522673, |
|
"grad_norm": 2.4067625999450684, |
|
"learning_rate": 9.687875718504126e-05, |
|
"loss": 2.4074, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.31742243436754175, |
|
"grad_norm": 3.624891757965088, |
|
"learning_rate": 9.680976212064874e-05, |
|
"loss": 2.4101, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3198090692124105, |
|
"grad_norm": 2.027580499649048, |
|
"learning_rate": 9.674003791867991e-05, |
|
"loss": 2.4197, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3221957040572792, |
|
"grad_norm": 2.436666965484619, |
|
"learning_rate": 9.666958566520174e-05, |
|
"loss": 2.4592, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.324582338902148, |
|
"grad_norm": 2.1806681156158447, |
|
"learning_rate": 9.659840645762175e-05, |
|
"loss": 2.4362, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3269689737470167, |
|
"grad_norm": 2.1825525760650635, |
|
"learning_rate": 9.652650140467093e-05, |
|
"loss": 2.4333, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.32935560859188545, |
|
"grad_norm": 2.0613701343536377, |
|
"learning_rate": 9.645387162638652e-05, |
|
"loss": 2.4313, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3317422434367542, |
|
"grad_norm": 2.278815746307373, |
|
"learning_rate": 9.638051825409453e-05, |
|
"loss": 2.5416, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3341288782816229, |
|
"grad_norm": 2.1906769275665283, |
|
"learning_rate": 9.630644243039207e-05, |
|
"loss": 2.3424, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33651551312649164, |
|
"grad_norm": 2.412473678588867, |
|
"learning_rate": 9.623164530912963e-05, |
|
"loss": 2.4984, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.33890214797136037, |
|
"grad_norm": 2.44022274017334, |
|
"learning_rate": 9.615612805539305e-05, |
|
"loss": 2.3352, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3412887828162291, |
|
"grad_norm": 2.2409064769744873, |
|
"learning_rate": 9.607989184548543e-05, |
|
"loss": 2.435, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3436754176610978, |
|
"grad_norm": 1.8773585557937622, |
|
"learning_rate": 9.600293786690872e-05, |
|
"loss": 2.3689, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3460620525059666, |
|
"grad_norm": 2.026489496231079, |
|
"learning_rate": 9.592526731834537e-05, |
|
"loss": 2.4707, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34844868735083534, |
|
"grad_norm": 1.9061930179595947, |
|
"learning_rate": 9.584688140963944e-05, |
|
"loss": 2.1919, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.35083532219570407, |
|
"grad_norm": 1.9705097675323486, |
|
"learning_rate": 9.576778136177798e-05, |
|
"loss": 2.3773, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3532219570405728, |
|
"grad_norm": 2.263256788253784, |
|
"learning_rate": 9.568796840687184e-05, |
|
"loss": 2.2805, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3556085918854415, |
|
"grad_norm": 2.4777801036834717, |
|
"learning_rate": 9.560744378813659e-05, |
|
"loss": 2.4083, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.35799522673031026, |
|
"grad_norm": 2.305082082748413, |
|
"learning_rate": 9.552620875987311e-05, |
|
"loss": 2.2696, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.360381861575179, |
|
"grad_norm": 2.2283718585968018, |
|
"learning_rate": 9.544426458744804e-05, |
|
"loss": 2.2327, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.3627684964200477, |
|
"grad_norm": 2.152367115020752, |
|
"learning_rate": 9.536161254727408e-05, |
|
"loss": 2.2686, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.36515513126491644, |
|
"grad_norm": 2.222525119781494, |
|
"learning_rate": 9.527825392679012e-05, |
|
"loss": 2.3051, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.36754176610978523, |
|
"grad_norm": 2.0518105030059814, |
|
"learning_rate": 9.51941900244412e-05, |
|
"loss": 2.2914, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.36992840095465396, |
|
"grad_norm": 1.8403911590576172, |
|
"learning_rate": 9.51094221496582e-05, |
|
"loss": 2.2763, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3723150357995227, |
|
"grad_norm": 2.0134127140045166, |
|
"learning_rate": 9.502395162283759e-05, |
|
"loss": 2.1292, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3747016706443914, |
|
"grad_norm": 2.2648725509643555, |
|
"learning_rate": 9.493777977532072e-05, |
|
"loss": 2.2943, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.37708830548926014, |
|
"grad_norm": 2.598200559616089, |
|
"learning_rate": 9.485090794937319e-05, |
|
"loss": 2.2811, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3794749403341289, |
|
"grad_norm": 2.188739776611328, |
|
"learning_rate": 9.476333749816382e-05, |
|
"loss": 2.2055, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3818615751789976, |
|
"grad_norm": 2.106133460998535, |
|
"learning_rate": 9.467506978574371e-05, |
|
"loss": 2.1652, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.38424821002386633, |
|
"grad_norm": 2.550200939178467, |
|
"learning_rate": 9.45861061870249e-05, |
|
"loss": 2.3259, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.38663484486873506, |
|
"grad_norm": 2.1999354362487793, |
|
"learning_rate": 9.449644808775902e-05, |
|
"loss": 2.2477, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.38902147971360385, |
|
"grad_norm": 1.8450204133987427, |
|
"learning_rate": 9.44060968845156e-05, |
|
"loss": 2.1774, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3914081145584726, |
|
"grad_norm": 2.097698211669922, |
|
"learning_rate": 9.431505398466045e-05, |
|
"loss": 2.3338, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3937947494033413, |
|
"grad_norm": 2.0692379474639893, |
|
"learning_rate": 9.42233208063336e-05, |
|
"loss": 2.1268, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.39618138424821003, |
|
"grad_norm": 2.289555788040161, |
|
"learning_rate": 9.413089877842736e-05, |
|
"loss": 2.3312, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.39856801909307876, |
|
"grad_norm": 2.145961284637451, |
|
"learning_rate": 9.403778934056391e-05, |
|
"loss": 2.3026, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4009546539379475, |
|
"grad_norm": 1.9669148921966553, |
|
"learning_rate": 9.394399394307303e-05, |
|
"loss": 2.4298, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4033412887828162, |
|
"grad_norm": 2.412475824356079, |
|
"learning_rate": 9.384951404696933e-05, |
|
"loss": 2.1335, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.40572792362768495, |
|
"grad_norm": 1.9955891370773315, |
|
"learning_rate": 9.375435112392969e-05, |
|
"loss": 2.2833, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4081145584725537, |
|
"grad_norm": 2.0198495388031006, |
|
"learning_rate": 9.365850665627016e-05, |
|
"loss": 2.2196, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4105011933174224, |
|
"grad_norm": 1.874887228012085, |
|
"learning_rate": 9.356198213692297e-05, |
|
"loss": 2.1938, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4128878281622912, |
|
"grad_norm": 1.9954278469085693, |
|
"learning_rate": 9.346477906941331e-05, |
|
"loss": 2.1082, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4152744630071599, |
|
"grad_norm": 1.9344687461853027, |
|
"learning_rate": 9.336689896783573e-05, |
|
"loss": 2.1265, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.41766109785202865, |
|
"grad_norm": 2.1499831676483154, |
|
"learning_rate": 9.32683433568308e-05, |
|
"loss": 2.2211, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.4200477326968974, |
|
"grad_norm": 2.411583662033081, |
|
"learning_rate": 9.316911377156117e-05, |
|
"loss": 2.2793, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4224343675417661, |
|
"grad_norm": 2.0739586353302, |
|
"learning_rate": 9.306921175768775e-05, |
|
"loss": 2.1653, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.42482100238663484, |
|
"grad_norm": 2.0313665866851807, |
|
"learning_rate": 9.29686388713456e-05, |
|
"loss": 2.3611, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.42720763723150357, |
|
"grad_norm": 1.8887441158294678, |
|
"learning_rate": 9.286739667911972e-05, |
|
"loss": 2.201, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4295942720763723, |
|
"grad_norm": 1.928774118423462, |
|
"learning_rate": 9.276548675802059e-05, |
|
"loss": 2.2504, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.431980906921241, |
|
"grad_norm": 1.7933377027511597, |
|
"learning_rate": 9.266291069545972e-05, |
|
"loss": 2.2219, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4343675417661098, |
|
"grad_norm": 1.877989649772644, |
|
"learning_rate": 9.255967008922474e-05, |
|
"loss": 2.2547, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.43675417661097854, |
|
"grad_norm": 1.6794133186340332, |
|
"learning_rate": 9.245576654745471e-05, |
|
"loss": 2.3152, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.43914081145584727, |
|
"grad_norm": 2.024888515472412, |
|
"learning_rate": 9.235120168861496e-05, |
|
"loss": 2.0559, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.441527446300716, |
|
"grad_norm": 2.1907565593719482, |
|
"learning_rate": 9.224597714147186e-05, |
|
"loss": 2.2414, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.4439140811455847, |
|
"grad_norm": 1.9804993867874146, |
|
"learning_rate": 9.214009454506753e-05, |
|
"loss": 2.0826, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.44630071599045346, |
|
"grad_norm": 2.1654927730560303, |
|
"learning_rate": 9.203355554869428e-05, |
|
"loss": 2.2048, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4486873508353222, |
|
"grad_norm": 2.4933547973632812, |
|
"learning_rate": 9.192636181186888e-05, |
|
"loss": 2.0166, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4510739856801909, |
|
"grad_norm": 2.0309464931488037, |
|
"learning_rate": 9.181851500430673e-05, |
|
"loss": 2.0729, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.45346062052505964, |
|
"grad_norm": 2.191399574279785, |
|
"learning_rate": 9.171001680589588e-05, |
|
"loss": 2.0675, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45584725536992843, |
|
"grad_norm": 2.108330249786377, |
|
"learning_rate": 9.160086890667086e-05, |
|
"loss": 2.1535, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.45823389021479716, |
|
"grad_norm": 2.01182222366333, |
|
"learning_rate": 9.14910730067863e-05, |
|
"loss": 2.1768, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4606205250596659, |
|
"grad_norm": 1.80693519115448, |
|
"learning_rate": 9.138063081649051e-05, |
|
"loss": 2.2843, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4630071599045346, |
|
"grad_norm": 1.9676529169082642, |
|
"learning_rate": 9.126954405609882e-05, |
|
"loss": 2.1211, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.46539379474940334, |
|
"grad_norm": 1.9550760984420776, |
|
"learning_rate": 9.115781445596676e-05, |
|
"loss": 2.0111, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4677804295942721, |
|
"grad_norm": 2.1613616943359375, |
|
"learning_rate": 9.104544375646313e-05, |
|
"loss": 2.3365, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4701670644391408, |
|
"grad_norm": 2.0168464183807373, |
|
"learning_rate": 9.093243370794291e-05, |
|
"loss": 2.1486, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.47255369928400953, |
|
"grad_norm": 2.0406484603881836, |
|
"learning_rate": 9.081878607071996e-05, |
|
"loss": 2.2437, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.47494033412887826, |
|
"grad_norm": 2.1495018005371094, |
|
"learning_rate": 9.07045026150396e-05, |
|
"loss": 2.122, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.477326968973747, |
|
"grad_norm": 2.0007262229919434, |
|
"learning_rate": 9.058958512105104e-05, |
|
"loss": 2.121, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4797136038186158, |
|
"grad_norm": 2.7373595237731934, |
|
"learning_rate": 9.047403537877971e-05, |
|
"loss": 2.083, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.4821002386634845, |
|
"grad_norm": 1.8298907279968262, |
|
"learning_rate": 9.035785518809927e-05, |
|
"loss": 2.1547, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.48448687350835323, |
|
"grad_norm": 1.9181143045425415, |
|
"learning_rate": 9.024104635870368e-05, |
|
"loss": 2.0938, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.48687350835322196, |
|
"grad_norm": 2.0003180503845215, |
|
"learning_rate": 9.012361071007891e-05, |
|
"loss": 2.1254, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4892601431980907, |
|
"grad_norm": 1.9533605575561523, |
|
"learning_rate": 9.000555007147469e-05, |
|
"loss": 2.1393, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4916467780429594, |
|
"grad_norm": 2.0818209648132324, |
|
"learning_rate": 8.988686628187597e-05, |
|
"loss": 2.1339, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.49403341288782815, |
|
"grad_norm": 1.762052059173584, |
|
"learning_rate": 8.976756118997427e-05, |
|
"loss": 2.1497, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.4964200477326969, |
|
"grad_norm": 2.371490001678467, |
|
"learning_rate": 8.964763665413893e-05, |
|
"loss": 1.9532, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4988066825775656, |
|
"grad_norm": 2.1913626194000244, |
|
"learning_rate": 8.952709454238808e-05, |
|
"loss": 2.1899, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5011933174224343, |
|
"grad_norm": 1.85502290725708, |
|
"learning_rate": 8.940593673235962e-05, |
|
"loss": 2.1804, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5035799522673031, |
|
"grad_norm": 1.9168248176574707, |
|
"learning_rate": 8.928416511128195e-05, |
|
"loss": 2.0406, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5059665871121718, |
|
"grad_norm": 1.9352686405181885, |
|
"learning_rate": 8.916178157594453e-05, |
|
"loss": 2.0952, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5083532219570406, |
|
"grad_norm": 2.1342227458953857, |
|
"learning_rate": 8.903878803266841e-05, |
|
"loss": 2.2643, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5107398568019093, |
|
"grad_norm": 1.8305190801620483, |
|
"learning_rate": 8.891518639727649e-05, |
|
"loss": 1.9711, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.513126491646778, |
|
"grad_norm": 1.9878753423690796, |
|
"learning_rate": 8.879097859506372e-05, |
|
"loss": 2.1531, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5155131264916468, |
|
"grad_norm": 1.7866030931472778, |
|
"learning_rate": 8.866616656076696e-05, |
|
"loss": 2.0549, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5178997613365155, |
|
"grad_norm": 1.8742599487304688, |
|
"learning_rate": 8.854075223853508e-05, |
|
"loss": 2.1491, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5202863961813843, |
|
"grad_norm": 2.051337242126465, |
|
"learning_rate": 8.841473758189854e-05, |
|
"loss": 2.0055, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.522673031026253, |
|
"grad_norm": 2.0818710327148438, |
|
"learning_rate": 8.828812455373891e-05, |
|
"loss": 2.1989, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5250596658711217, |
|
"grad_norm": 1.859626293182373, |
|
"learning_rate": 8.816091512625843e-05, |
|
"loss": 2.129, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5274463007159904, |
|
"grad_norm": 2.454052686691284, |
|
"learning_rate": 8.803311128094918e-05, |
|
"loss": 2.1351, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5298329355608592, |
|
"grad_norm": 2.2742836475372314, |
|
"learning_rate": 8.790471500856228e-05, |
|
"loss": 2.2181, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5322195704057279, |
|
"grad_norm": 1.7958076000213623, |
|
"learning_rate": 8.777572830907684e-05, |
|
"loss": 2.2685, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5346062052505967, |
|
"grad_norm": 1.7897841930389404, |
|
"learning_rate": 8.764615319166886e-05, |
|
"loss": 2.1735, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5369928400954654, |
|
"grad_norm": 1.8910062313079834, |
|
"learning_rate": 8.751599167467985e-05, |
|
"loss": 1.9897, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5393794749403341, |
|
"grad_norm": 1.875690221786499, |
|
"learning_rate": 8.738524578558547e-05, |
|
"loss": 2.05, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5417661097852029, |
|
"grad_norm": 1.935346007347107, |
|
"learning_rate": 8.72539175609639e-05, |
|
"loss": 1.9948, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5441527446300716, |
|
"grad_norm": 1.7295023202896118, |
|
"learning_rate": 8.712200904646416e-05, |
|
"loss": 1.9492, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5465393794749404, |
|
"grad_norm": 1.7163113355636597, |
|
"learning_rate": 8.698952229677422e-05, |
|
"loss": 2.0111, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.548926014319809, |
|
"grad_norm": 2.101062536239624, |
|
"learning_rate": 8.685645937558896e-05, |
|
"loss": 2.2221, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5513126491646778, |
|
"grad_norm": 1.937011480331421, |
|
"learning_rate": 8.67228223555781e-05, |
|
"loss": 2.0276, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5536992840095465, |
|
"grad_norm": 1.9230318069458008, |
|
"learning_rate": 8.658861331835385e-05, |
|
"loss": 2.0541, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5560859188544153, |
|
"grad_norm": 1.91681969165802, |
|
"learning_rate": 8.645383435443852e-05, |
|
"loss": 2.0046, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5584725536992841, |
|
"grad_norm": 1.711011528968811, |
|
"learning_rate": 8.631848756323197e-05, |
|
"loss": 2.0765, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5608591885441527, |
|
"grad_norm": 1.817881464958191, |
|
"learning_rate": 8.618257505297886e-05, |
|
"loss": 2.0712, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5632458233890215, |
|
"grad_norm": 1.8954594135284424, |
|
"learning_rate": 8.604609894073584e-05, |
|
"loss": 1.8753, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5656324582338902, |
|
"grad_norm": 1.7681952714920044, |
|
"learning_rate": 8.590906135233854e-05, |
|
"loss": 2.005, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.568019093078759, |
|
"grad_norm": 1.9686832427978516, |
|
"learning_rate": 8.577146442236857e-05, |
|
"loss": 2.0602, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5704057279236276, |
|
"grad_norm": 1.82900071144104, |
|
"learning_rate": 8.563331029412012e-05, |
|
"loss": 2.1201, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5727923627684964, |
|
"grad_norm": 1.9767824411392212, |
|
"learning_rate": 8.549460111956664e-05, |
|
"loss": 2.0324, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5751789976133651, |
|
"grad_norm": 1.8764835596084595, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 2.0167, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5775656324582339, |
|
"grad_norm": 2.0710604190826416, |
|
"learning_rate": 8.521552628263362e-05, |
|
"loss": 2.0242, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5799522673031027, |
|
"grad_norm": 1.8737850189208984, |
|
"learning_rate": 8.507516496729495e-05, |
|
"loss": 1.9975, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5823389021479713, |
|
"grad_norm": 1.7871778011322021, |
|
"learning_rate": 8.493425729966534e-05, |
|
"loss": 2.0898, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5847255369928401, |
|
"grad_norm": 1.8007279634475708, |
|
"learning_rate": 8.479280547460907e-05, |
|
"loss": 1.9798, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5871121718377088, |
|
"grad_norm": 1.6072956323623657, |
|
"learning_rate": 8.465081169546659e-05, |
|
"loss": 2.1404, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5894988066825776, |
|
"grad_norm": 1.7656550407409668, |
|
"learning_rate": 8.450827817402011e-05, |
|
"loss": 2.1144, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5918854415274463, |
|
"grad_norm": 2.1177773475646973, |
|
"learning_rate": 8.436520713045922e-05, |
|
"loss": 1.9443, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.594272076372315, |
|
"grad_norm": 1.5557399988174438, |
|
"learning_rate": 8.422160079334628e-05, |
|
"loss": 1.9939, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5966587112171837, |
|
"grad_norm": 2.5375919342041016, |
|
"learning_rate": 8.40774613995817e-05, |
|
"loss": 2.0561, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5990453460620525, |
|
"grad_norm": 2.0415713787078857, |
|
"learning_rate": 8.393279119436912e-05, |
|
"loss": 2.0833, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6014319809069213, |
|
"grad_norm": 1.6955374479293823, |
|
"learning_rate": 8.378759243118044e-05, |
|
"loss": 2.2045, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.60381861575179, |
|
"grad_norm": 1.6944271326065063, |
|
"learning_rate": 8.364186737172068e-05, |
|
"loss": 1.9618, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6062052505966588, |
|
"grad_norm": 1.7665637731552124, |
|
"learning_rate": 8.349561828589277e-05, |
|
"loss": 2.1562, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6085918854415274, |
|
"grad_norm": 1.9578077793121338, |
|
"learning_rate": 8.33488474517622e-05, |
|
"loss": 1.9817, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6109785202863962, |
|
"grad_norm": 1.7413527965545654, |
|
"learning_rate": 8.320155715552155e-05, |
|
"loss": 1.9573, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6133651551312649, |
|
"grad_norm": 2.020991325378418, |
|
"learning_rate": 8.305374969145488e-05, |
|
"loss": 2.107, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6157517899761337, |
|
"grad_norm": 1.8580269813537598, |
|
"learning_rate": 8.290542736190188e-05, |
|
"loss": 1.8968, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6181384248210023, |
|
"grad_norm": 1.6956955194473267, |
|
"learning_rate": 8.275659247722222e-05, |
|
"loss": 1.9221, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6205250596658711, |
|
"grad_norm": 1.873700499534607, |
|
"learning_rate": 8.260724735575933e-05, |
|
"loss": 2.0205, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6229116945107399, |
|
"grad_norm": 1.998443603515625, |
|
"learning_rate": 8.24573943238045e-05, |
|
"loss": 2.0767, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6252983293556086, |
|
"grad_norm": 1.5726591348648071, |
|
"learning_rate": 8.230703571556048e-05, |
|
"loss": 1.887, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6276849642004774, |
|
"grad_norm": 1.5653208494186401, |
|
"learning_rate": 8.215617387310524e-05, |
|
"loss": 1.9488, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.630071599045346, |
|
"grad_norm": 1.8755910396575928, |
|
"learning_rate": 8.200481114635536e-05, |
|
"loss": 1.9843, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6324582338902148, |
|
"grad_norm": 1.6157357692718506, |
|
"learning_rate": 8.185294989302958e-05, |
|
"loss": 1.9286, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6348448687350835, |
|
"grad_norm": 1.6673681735992432, |
|
"learning_rate": 8.170059247861194e-05, |
|
"loss": 1.9185, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6372315035799523, |
|
"grad_norm": 2.0018393993377686, |
|
"learning_rate": 8.154774127631501e-05, |
|
"loss": 1.8868, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.639618138424821, |
|
"grad_norm": 1.6845289468765259, |
|
"learning_rate": 8.139439866704293e-05, |
|
"loss": 1.973, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6420047732696897, |
|
"grad_norm": 2.231600522994995, |
|
"learning_rate": 8.124056703935423e-05, |
|
"loss": 1.9579, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6443914081145584, |
|
"grad_norm": 1.8767783641815186, |
|
"learning_rate": 8.108624878942477e-05, |
|
"loss": 1.9769, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6467780429594272, |
|
"grad_norm": 1.685530424118042, |
|
"learning_rate": 8.093144632101026e-05, |
|
"loss": 1.8638, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.649164677804296, |
|
"grad_norm": 1.8997292518615723, |
|
"learning_rate": 8.077616204540897e-05, |
|
"loss": 1.955, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6515513126491647, |
|
"grad_norm": 1.982273817062378, |
|
"learning_rate": 8.062039838142402e-05, |
|
"loss": 1.9263, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6539379474940334, |
|
"grad_norm": 2.659235954284668, |
|
"learning_rate": 8.046415775532585e-05, |
|
"loss": 1.9621, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6563245823389021, |
|
"grad_norm": 1.7202818393707275, |
|
"learning_rate": 8.030744260081426e-05, |
|
"loss": 1.9706, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6587112171837709, |
|
"grad_norm": 1.7414380311965942, |
|
"learning_rate": 8.015025535898073e-05, |
|
"loss": 2.0403, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6610978520286396, |
|
"grad_norm": 1.7690259218215942, |
|
"learning_rate": 7.999259847827015e-05, |
|
"loss": 2.0361, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6634844868735084, |
|
"grad_norm": 1.6158711910247803, |
|
"learning_rate": 7.983447441444281e-05, |
|
"loss": 1.8728, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.665871121718377, |
|
"grad_norm": 2.032346487045288, |
|
"learning_rate": 7.967588563053616e-05, |
|
"loss": 1.9029, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6682577565632458, |
|
"grad_norm": 1.7840015888214111, |
|
"learning_rate": 7.951683459682641e-05, |
|
"loss": 1.9341, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6706443914081146, |
|
"grad_norm": 1.7461919784545898, |
|
"learning_rate": 7.935732379079008e-05, |
|
"loss": 2.1495, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6730310262529833, |
|
"grad_norm": 1.7609453201293945, |
|
"learning_rate": 7.919735569706533e-05, |
|
"loss": 2.0172, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6754176610978521, |
|
"grad_norm": 1.7367274761199951, |
|
"learning_rate": 7.903693280741331e-05, |
|
"loss": 1.9978, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6778042959427207, |
|
"grad_norm": 1.6769988536834717, |
|
"learning_rate": 7.887605762067945e-05, |
|
"loss": 1.871, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6801909307875895, |
|
"grad_norm": 1.6532725095748901, |
|
"learning_rate": 7.871473264275429e-05, |
|
"loss": 1.8663, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6825775656324582, |
|
"grad_norm": 1.6891016960144043, |
|
"learning_rate": 7.855296038653475e-05, |
|
"loss": 2.1671, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.684964200477327, |
|
"grad_norm": 1.6832776069641113, |
|
"learning_rate": 7.83907433718847e-05, |
|
"loss": 1.9273, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6873508353221957, |
|
"grad_norm": 1.6840589046478271, |
|
"learning_rate": 7.82280841255959e-05, |
|
"loss": 1.8862, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6897374701670644, |
|
"grad_norm": 1.644808292388916, |
|
"learning_rate": 7.80649851813486e-05, |
|
"loss": 2.0491, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6921241050119332, |
|
"grad_norm": 1.915778636932373, |
|
"learning_rate": 7.790144907967201e-05, |
|
"loss": 1.9142, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6945107398568019, |
|
"grad_norm": 1.957671880722046, |
|
"learning_rate": 7.773747836790481e-05, |
|
"loss": 2.1069, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6968973747016707, |
|
"grad_norm": 1.8935115337371826, |
|
"learning_rate": 7.757307560015538e-05, |
|
"loss": 1.9058, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6992840095465394, |
|
"grad_norm": 2.2118947505950928, |
|
"learning_rate": 7.740824333726213e-05, |
|
"loss": 1.8754, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7016706443914081, |
|
"grad_norm": 1.820563793182373, |
|
"learning_rate": 7.724298414675353e-05, |
|
"loss": 1.9056, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7040572792362768, |
|
"grad_norm": 2.0297231674194336, |
|
"learning_rate": 7.707730060280812e-05, |
|
"loss": 2.043, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7064439140811456, |
|
"grad_norm": 1.5307203531265259, |
|
"learning_rate": 7.691119528621444e-05, |
|
"loss": 1.9592, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7088305489260143, |
|
"grad_norm": 1.9287526607513428, |
|
"learning_rate": 7.674467078433081e-05, |
|
"loss": 2.0573, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.711217183770883, |
|
"grad_norm": 1.9057129621505737, |
|
"learning_rate": 7.657772969104508e-05, |
|
"loss": 1.78, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7136038186157518, |
|
"grad_norm": 1.8515708446502686, |
|
"learning_rate": 7.641037460673412e-05, |
|
"loss": 1.7595, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7159904534606205, |
|
"grad_norm": 1.5945055484771729, |
|
"learning_rate": 7.624260813822342e-05, |
|
"loss": 1.8427, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7183770883054893, |
|
"grad_norm": 1.8130457401275635, |
|
"learning_rate": 7.607443289874642e-05, |
|
"loss": 1.9802, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.720763723150358, |
|
"grad_norm": 1.740313172340393, |
|
"learning_rate": 7.590585150790389e-05, |
|
"loss": 2.0377, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7231503579952268, |
|
"grad_norm": 1.4544378519058228, |
|
"learning_rate": 7.573686659162293e-05, |
|
"loss": 1.9641, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7255369928400954, |
|
"grad_norm": 1.7715884447097778, |
|
"learning_rate": 7.556748078211635e-05, |
|
"loss": 2.0572, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7279236276849642, |
|
"grad_norm": 1.5698533058166504, |
|
"learning_rate": 7.53976967178414e-05, |
|
"loss": 1.9866, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7303102625298329, |
|
"grad_norm": 1.4722906351089478, |
|
"learning_rate": 7.522751704345887e-05, |
|
"loss": 1.9815, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7326968973747017, |
|
"grad_norm": 1.628419280052185, |
|
"learning_rate": 7.505694440979178e-05, |
|
"loss": 1.9551, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7350835322195705, |
|
"grad_norm": 1.8405951261520386, |
|
"learning_rate": 7.488598147378416e-05, |
|
"loss": 1.82, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7374701670644391, |
|
"grad_norm": 1.9501157999038696, |
|
"learning_rate": 7.471463089845956e-05, |
|
"loss": 1.8727, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7398568019093079, |
|
"grad_norm": 1.797590970993042, |
|
"learning_rate": 7.454289535287968e-05, |
|
"loss": 1.8462, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7422434367541766, |
|
"grad_norm": 1.7332159280776978, |
|
"learning_rate": 7.437077751210279e-05, |
|
"loss": 2.0425, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7446300715990454, |
|
"grad_norm": 1.8471993207931519, |
|
"learning_rate": 7.419828005714194e-05, |
|
"loss": 1.9414, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.747016706443914, |
|
"grad_norm": 1.7861772775650024, |
|
"learning_rate": 7.402540567492337e-05, |
|
"loss": 1.9029, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7494033412887828, |
|
"grad_norm": 1.5510649681091309, |
|
"learning_rate": 7.385215705824449e-05, |
|
"loss": 2.1046, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.7517899761336515, |
|
"grad_norm": 1.687177300453186, |
|
"learning_rate": 7.367853690573208e-05, |
|
"loss": 1.7673, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7541766109785203, |
|
"grad_norm": 1.8639237880706787, |
|
"learning_rate": 7.350454792180016e-05, |
|
"loss": 1.859, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7565632458233891, |
|
"grad_norm": 1.7479451894760132, |
|
"learning_rate": 7.333019281660789e-05, |
|
"loss": 2.043, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7589498806682577, |
|
"grad_norm": 1.6814374923706055, |
|
"learning_rate": 7.31554743060174e-05, |
|
"loss": 1.8431, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7613365155131265, |
|
"grad_norm": 1.6872406005859375, |
|
"learning_rate": 7.298039511155138e-05, |
|
"loss": 1.9233, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7637231503579952, |
|
"grad_norm": 1.6230123043060303, |
|
"learning_rate": 7.280495796035079e-05, |
|
"loss": 1.9329, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.766109785202864, |
|
"grad_norm": 1.5747705698013306, |
|
"learning_rate": 7.262916558513237e-05, |
|
"loss": 1.7736, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7684964200477327, |
|
"grad_norm": 1.9275933504104614, |
|
"learning_rate": 7.245302072414601e-05, |
|
"loss": 1.9087, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7708830548926014, |
|
"grad_norm": 1.7358119487762451, |
|
"learning_rate": 7.227652612113213e-05, |
|
"loss": 1.7724, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7732696897374701, |
|
"grad_norm": 1.5825779438018799, |
|
"learning_rate": 7.209968452527896e-05, |
|
"loss": 1.8674, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7756563245823389, |
|
"grad_norm": 1.8964987993240356, |
|
"learning_rate": 7.192249869117971e-05, |
|
"loss": 1.9597, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7780429594272077, |
|
"grad_norm": 1.873633861541748, |
|
"learning_rate": 7.174497137878966e-05, |
|
"loss": 1.8599, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7804295942720764, |
|
"grad_norm": 1.6681768894195557, |
|
"learning_rate": 7.156710535338312e-05, |
|
"loss": 2.0096, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7828162291169452, |
|
"grad_norm": 1.7456769943237305, |
|
"learning_rate": 7.138890338551048e-05, |
|
"loss": 1.9242, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7852028639618138, |
|
"grad_norm": 1.683982491493225, |
|
"learning_rate": 7.121036825095492e-05, |
|
"loss": 1.8168, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7875894988066826, |
|
"grad_norm": 1.7995425462722778, |
|
"learning_rate": 7.103150273068921e-05, |
|
"loss": 1.8701, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7899761336515513, |
|
"grad_norm": 1.594572901725769, |
|
"learning_rate": 7.085230961083249e-05, |
|
"loss": 1.9488, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7923627684964201, |
|
"grad_norm": 2.3273961544036865, |
|
"learning_rate": 7.067279168260671e-05, |
|
"loss": 1.9518, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7947494033412887, |
|
"grad_norm": 1.6921088695526123, |
|
"learning_rate": 7.04929517422933e-05, |
|
"loss": 1.8953, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7971360381861575, |
|
"grad_norm": 1.571298360824585, |
|
"learning_rate": 7.031279259118946e-05, |
|
"loss": 1.7606, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7995226730310262, |
|
"grad_norm": 2.253617286682129, |
|
"learning_rate": 7.013231703556471e-05, |
|
"loss": 1.9815, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.801909307875895, |
|
"grad_norm": 1.8123805522918701, |
|
"learning_rate": 6.995152788661705e-05, |
|
"loss": 1.9012, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8042959427207638, |
|
"grad_norm": 1.5949262380599976, |
|
"learning_rate": 6.977042796042917e-05, |
|
"loss": 1.862, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8066825775656324, |
|
"grad_norm": 1.508523941040039, |
|
"learning_rate": 6.958902007792466e-05, |
|
"loss": 1.8586, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8090692124105012, |
|
"grad_norm": 2.1280345916748047, |
|
"learning_rate": 6.940730706482399e-05, |
|
"loss": 1.9284, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8114558472553699, |
|
"grad_norm": 1.6476510763168335, |
|
"learning_rate": 6.922529175160054e-05, |
|
"loss": 1.8232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8138424821002387, |
|
"grad_norm": 1.483221173286438, |
|
"learning_rate": 6.904297697343655e-05, |
|
"loss": 2.0177, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8162291169451074, |
|
"grad_norm": 1.637446403503418, |
|
"learning_rate": 6.886036557017881e-05, |
|
"loss": 1.9592, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8186157517899761, |
|
"grad_norm": 1.7579176425933838, |
|
"learning_rate": 6.867746038629462e-05, |
|
"loss": 2.0381, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8210023866348448, |
|
"grad_norm": 1.6292965412139893, |
|
"learning_rate": 6.849426427082735e-05, |
|
"loss": 1.8949, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8233890214797136, |
|
"grad_norm": 1.5425759553909302, |
|
"learning_rate": 6.83107800773521e-05, |
|
"loss": 1.9462, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8257756563245824, |
|
"grad_norm": 1.5830105543136597, |
|
"learning_rate": 6.812701066393124e-05, |
|
"loss": 1.8403, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8281622911694511, |
|
"grad_norm": 2.093899726867676, |
|
"learning_rate": 6.79429588930699e-05, |
|
"loss": 1.8663, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8305489260143198, |
|
"grad_norm": 2.133967161178589, |
|
"learning_rate": 6.775862763167142e-05, |
|
"loss": 1.8503, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8329355608591885, |
|
"grad_norm": 1.640627145767212, |
|
"learning_rate": 6.757401975099262e-05, |
|
"loss": 1.7844, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8353221957040573, |
|
"grad_norm": 3.505713939666748, |
|
"learning_rate": 6.738913812659912e-05, |
|
"loss": 2.0046, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.837708830548926, |
|
"grad_norm": 1.8108558654785156, |
|
"learning_rate": 6.720398563832055e-05, |
|
"loss": 1.8705, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8400954653937948, |
|
"grad_norm": 1.6313164234161377, |
|
"learning_rate": 6.701856517020565e-05, |
|
"loss": 1.9745, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8424821002386634, |
|
"grad_norm": 1.5467928647994995, |
|
"learning_rate": 6.683287961047742e-05, |
|
"loss": 2.0668, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8448687350835322, |
|
"grad_norm": 1.6917724609375, |
|
"learning_rate": 6.664693185148807e-05, |
|
"loss": 1.8494, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.847255369928401, |
|
"grad_norm": 1.695573329925537, |
|
"learning_rate": 6.646072478967397e-05, |
|
"loss": 2.0077, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8496420047732697, |
|
"grad_norm": 1.6147156953811646, |
|
"learning_rate": 6.627426132551058e-05, |
|
"loss": 1.8638, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8520286396181385, |
|
"grad_norm": 1.6783572435379028, |
|
"learning_rate": 6.608754436346725e-05, |
|
"loss": 1.8168, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8544152744630071, |
|
"grad_norm": 2.0277252197265625, |
|
"learning_rate": 6.590057681196191e-05, |
|
"loss": 1.7963, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8568019093078759, |
|
"grad_norm": 1.7208962440490723, |
|
"learning_rate": 6.571336158331589e-05, |
|
"loss": 2.0736, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8591885441527446, |
|
"grad_norm": 1.736999750137329, |
|
"learning_rate": 6.552590159370844e-05, |
|
"loss": 1.7492, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8615751789976134, |
|
"grad_norm": 1.6817790269851685, |
|
"learning_rate": 6.53381997631314e-05, |
|
"loss": 1.9629, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.863961813842482, |
|
"grad_norm": 1.6716388463974, |
|
"learning_rate": 6.515025901534364e-05, |
|
"loss": 1.8712, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8663484486873508, |
|
"grad_norm": 1.500181794166565, |
|
"learning_rate": 6.496208227782556e-05, |
|
"loss": 1.8531, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8687350835322196, |
|
"grad_norm": 1.6236196756362915, |
|
"learning_rate": 6.477367248173352e-05, |
|
"loss": 1.8949, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8711217183770883, |
|
"grad_norm": 1.5496482849121094, |
|
"learning_rate": 6.458503256185404e-05, |
|
"loss": 1.8663, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8735083532219571, |
|
"grad_norm": 1.5112017393112183, |
|
"learning_rate": 6.439616545655834e-05, |
|
"loss": 1.7794, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8758949880668258, |
|
"grad_norm": 1.5853941440582275, |
|
"learning_rate": 6.420707410775626e-05, |
|
"loss": 1.9137, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8782816229116945, |
|
"grad_norm": 1.7348613739013672, |
|
"learning_rate": 6.401776146085072e-05, |
|
"loss": 2.181, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8806682577565632, |
|
"grad_norm": 1.8379839658737183, |
|
"learning_rate": 6.382823046469167e-05, |
|
"loss": 1.9456, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.883054892601432, |
|
"grad_norm": 1.6896251440048218, |
|
"learning_rate": 6.363848407153016e-05, |
|
"loss": 1.7248, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8854415274463007, |
|
"grad_norm": 1.8158091306686401, |
|
"learning_rate": 6.344852523697247e-05, |
|
"loss": 1.9658, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8878281622911695, |
|
"grad_norm": 1.7598294019699097, |
|
"learning_rate": 6.325835691993394e-05, |
|
"loss": 1.6611, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8902147971360382, |
|
"grad_norm": 1.7450791597366333, |
|
"learning_rate": 6.306798208259297e-05, |
|
"loss": 1.8354, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8926014319809069, |
|
"grad_norm": 1.71793532371521, |
|
"learning_rate": 6.287740369034485e-05, |
|
"loss": 1.6597, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8949880668257757, |
|
"grad_norm": 1.8093537092208862, |
|
"learning_rate": 6.26866247117555e-05, |
|
"loss": 1.716, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8973747016706444, |
|
"grad_norm": 1.7055341005325317, |
|
"learning_rate": 6.249564811851543e-05, |
|
"loss": 1.9221, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8997613365155132, |
|
"grad_norm": 1.5543915033340454, |
|
"learning_rate": 6.230447688539316e-05, |
|
"loss": 1.7363, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9021479713603818, |
|
"grad_norm": 1.7169188261032104, |
|
"learning_rate": 6.211311399018916e-05, |
|
"loss": 1.8639, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9045346062052506, |
|
"grad_norm": 1.5219560861587524, |
|
"learning_rate": 6.192156241368929e-05, |
|
"loss": 1.8671, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9069212410501193, |
|
"grad_norm": 1.6446306705474854, |
|
"learning_rate": 6.172982513961845e-05, |
|
"loss": 1.8123, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9093078758949881, |
|
"grad_norm": 1.7986334562301636, |
|
"learning_rate": 6.153790515459404e-05, |
|
"loss": 1.7454, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9116945107398569, |
|
"grad_norm": 1.6070222854614258, |
|
"learning_rate": 6.13458054480795e-05, |
|
"loss": 1.9428, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9140811455847255, |
|
"grad_norm": 1.6163593530654907, |
|
"learning_rate": 6.115352901233779e-05, |
|
"loss": 1.9039, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9164677804295943, |
|
"grad_norm": 1.5768218040466309, |
|
"learning_rate": 6.096107884238458e-05, |
|
"loss": 1.7472, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.918854415274463, |
|
"grad_norm": 1.4955110549926758, |
|
"learning_rate": 6.0768457935941817e-05, |
|
"loss": 1.8869, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9212410501193318, |
|
"grad_norm": 1.939584493637085, |
|
"learning_rate": 6.0575669293390954e-05, |
|
"loss": 1.908, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9236276849642004, |
|
"grad_norm": 1.7790099382400513, |
|
"learning_rate": 6.038271591772615e-05, |
|
"loss": 1.9309, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9260143198090692, |
|
"grad_norm": 1.8958290815353394, |
|
"learning_rate": 6.0189600814507604e-05, |
|
"loss": 1.9888, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9284009546539379, |
|
"grad_norm": 1.4420616626739502, |
|
"learning_rate": 5.9996326991814654e-05, |
|
"loss": 1.7128, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9307875894988067, |
|
"grad_norm": 1.9376964569091797, |
|
"learning_rate": 5.980289746019892e-05, |
|
"loss": 1.9666, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9331742243436754, |
|
"grad_norm": 1.6446843147277832, |
|
"learning_rate": 5.9609315232637483e-05, |
|
"loss": 1.6969, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9355608591885441, |
|
"grad_norm": 1.5497177839279175, |
|
"learning_rate": 5.941558332448589e-05, |
|
"loss": 1.7452, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9379474940334129, |
|
"grad_norm": 1.5692472457885742, |
|
"learning_rate": 5.922170475343125e-05, |
|
"loss": 1.7873, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9403341288782816, |
|
"grad_norm": 1.8818715810775757, |
|
"learning_rate": 5.9027682539445104e-05, |
|
"loss": 1.8012, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9427207637231504, |
|
"grad_norm": 1.5524557828903198, |
|
"learning_rate": 5.883351970473654e-05, |
|
"loss": 1.9377, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9451073985680191, |
|
"grad_norm": 1.6942998170852661, |
|
"learning_rate": 5.863921927370498e-05, |
|
"loss": 1.8297, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9474940334128878, |
|
"grad_norm": 1.5411245822906494, |
|
"learning_rate": 5.8444784272893175e-05, |
|
"loss": 1.7801, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9498806682577565, |
|
"grad_norm": 1.609163522720337, |
|
"learning_rate": 5.8250217730939973e-05, |
|
"loss": 1.7861, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9522673031026253, |
|
"grad_norm": 1.5381650924682617, |
|
"learning_rate": 5.8055522678533225e-05, |
|
"loss": 1.7624, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.954653937947494, |
|
"grad_norm": 1.7099480628967285, |
|
"learning_rate": 5.786070214836254e-05, |
|
"loss": 1.732, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9570405727923628, |
|
"grad_norm": 1.4979294538497925, |
|
"learning_rate": 5.7665759175072034e-05, |
|
"loss": 1.9665, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9594272076372315, |
|
"grad_norm": 1.5802431106567383, |
|
"learning_rate": 5.747069679521305e-05, |
|
"loss": 1.8585, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9618138424821002, |
|
"grad_norm": 1.6456499099731445, |
|
"learning_rate": 5.727551804719693e-05, |
|
"loss": 1.8085, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.964200477326969, |
|
"grad_norm": 1.7286982536315918, |
|
"learning_rate": 5.708022597124758e-05, |
|
"loss": 1.7829, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9665871121718377, |
|
"grad_norm": 1.9187157154083252, |
|
"learning_rate": 5.688482360935423e-05, |
|
"loss": 1.9609, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9689737470167065, |
|
"grad_norm": 1.4411349296569824, |
|
"learning_rate": 5.668931400522396e-05, |
|
"loss": 1.8859, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9713603818615751, |
|
"grad_norm": 1.7264066934585571, |
|
"learning_rate": 5.649370020423431e-05, |
|
"loss": 1.7933, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9737470167064439, |
|
"grad_norm": 1.6054697036743164, |
|
"learning_rate": 5.629798525338589e-05, |
|
"loss": 1.8167, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9761336515513126, |
|
"grad_norm": 1.7534525394439697, |
|
"learning_rate": 5.6102172201254835e-05, |
|
"loss": 1.8407, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9785202863961814, |
|
"grad_norm": 1.4455212354660034, |
|
"learning_rate": 5.5906264097945407e-05, |
|
"loss": 1.8722, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9809069212410502, |
|
"grad_norm": 1.5002778768539429, |
|
"learning_rate": 5.5710263995042434e-05, |
|
"loss": 1.9403, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9832935560859188, |
|
"grad_norm": 1.5572600364685059, |
|
"learning_rate": 5.551417494556376e-05, |
|
"loss": 1.8539, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9856801909307876, |
|
"grad_norm": 1.4347857236862183, |
|
"learning_rate": 5.531800000391275e-05, |
|
"loss": 1.8457, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9880668257756563, |
|
"grad_norm": 1.5811883211135864, |
|
"learning_rate": 5.5121742225830665e-05, |
|
"loss": 1.9307, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9904534606205251, |
|
"grad_norm": 1.734519124031067, |
|
"learning_rate": 5.4925404668349076e-05, |
|
"loss": 1.8851, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9928400954653938, |
|
"grad_norm": 1.4903361797332764, |
|
"learning_rate": 5.472899038974225e-05, |
|
"loss": 1.8052, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9952267303102625, |
|
"grad_norm": 1.638721227645874, |
|
"learning_rate": 5.45325024494795e-05, |
|
"loss": 1.8556, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9976133651551312, |
|
"grad_norm": 1.70747971534729, |
|
"learning_rate": 5.433594390817756e-05, |
|
"loss": 1.9593, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.315592050552368, |
|
"learning_rate": 5.413931782755283e-05, |
|
"loss": 1.9452, |
|
"step": 1257 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 2514, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1257, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.127117763309732e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|