|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 4200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004761904761904762, |
|
"grad_norm": 40.45813118239569, |
|
"learning_rate": 4.7619047619047627e-08, |
|
"loss": 2.1387, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002380952380952381, |
|
"grad_norm": 39.63780477662549, |
|
"learning_rate": 2.3809523809523811e-07, |
|
"loss": 2.0871, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004761904761904762, |
|
"grad_norm": 43.499844517947736, |
|
"learning_rate": 4.7619047619047623e-07, |
|
"loss": 2.0652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007142857142857143, |
|
"grad_norm": 10.896090871742048, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 1.9004, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009523809523809525, |
|
"grad_norm": 6.2002952611415845, |
|
"learning_rate": 9.523809523809525e-07, |
|
"loss": 1.7859, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011904761904761904, |
|
"grad_norm": 4.97841465233645, |
|
"learning_rate": 1.1904761904761906e-06, |
|
"loss": 1.6739, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014285714285714285, |
|
"grad_norm": 4.293746378401411, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 1.4907, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016666666666666666, |
|
"grad_norm": 2.299524492927895, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 1.3909, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01904761904761905, |
|
"grad_norm": 2.3149107855519064, |
|
"learning_rate": 1.904761904761905e-06, |
|
"loss": 1.2509, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02142857142857143, |
|
"grad_norm": 2.251875691331579, |
|
"learning_rate": 2.1428571428571427e-06, |
|
"loss": 1.1418, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.023809523809523808, |
|
"grad_norm": 1.293593518327437, |
|
"learning_rate": 2.380952380952381e-06, |
|
"loss": 1.0812, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02619047619047619, |
|
"grad_norm": 1.1337840513174344, |
|
"learning_rate": 2.6190476190476192e-06, |
|
"loss": 1.0113, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02857142857142857, |
|
"grad_norm": 1.1685369096623774, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.9634, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.030952380952380953, |
|
"grad_norm": 1.070331563673874, |
|
"learning_rate": 3.0952380952380957e-06, |
|
"loss": 0.9291, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03333333333333333, |
|
"grad_norm": 1.059586065170695, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.91, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03571428571428571, |
|
"grad_norm": 1.131151685372533, |
|
"learning_rate": 3.5714285714285718e-06, |
|
"loss": 0.8976, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0380952380952381, |
|
"grad_norm": 1.193443615687326, |
|
"learning_rate": 3.80952380952381e-06, |
|
"loss": 0.8781, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04047619047619048, |
|
"grad_norm": 1.0875818440706093, |
|
"learning_rate": 4.047619047619048e-06, |
|
"loss": 0.8631, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04285714285714286, |
|
"grad_norm": 1.1503140744906168, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.8646, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04523809523809524, |
|
"grad_norm": 1.7469535134849759, |
|
"learning_rate": 4.523809523809524e-06, |
|
"loss": 0.8426, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.047619047619047616, |
|
"grad_norm": 1.299723199427286, |
|
"learning_rate": 4.761904761904762e-06, |
|
"loss": 0.8305, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.40840933352402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8316, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05238095238095238, |
|
"grad_norm": 1.0810746876875799, |
|
"learning_rate": 5.2380952380952384e-06, |
|
"loss": 0.8225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05476190476190476, |
|
"grad_norm": 1.1168753096844857, |
|
"learning_rate": 5.476190476190477e-06, |
|
"loss": 0.8141, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05714285714285714, |
|
"grad_norm": 1.117741427127342, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.8128, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05952380952380952, |
|
"grad_norm": 1.117824418032963, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.8104, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06190476190476191, |
|
"grad_norm": 1.1765989152315592, |
|
"learning_rate": 6.1904761904761914e-06, |
|
"loss": 0.8186, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06428571428571428, |
|
"grad_norm": 1.14607344297389, |
|
"learning_rate": 6.4285714285714295e-06, |
|
"loss": 0.8094, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 1.0689364794376695, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.7889, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06904761904761905, |
|
"grad_norm": 1.0802475624680903, |
|
"learning_rate": 6.9047619047619055e-06, |
|
"loss": 0.8072, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 1.1977014078487442, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.7951, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07380952380952381, |
|
"grad_norm": 1.0956749755790958, |
|
"learning_rate": 7.380952380952382e-06, |
|
"loss": 0.7961, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0761904761904762, |
|
"grad_norm": 1.0269565395277456, |
|
"learning_rate": 7.61904761904762e-06, |
|
"loss": 0.7883, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07857142857142857, |
|
"grad_norm": 1.2693412905779187, |
|
"learning_rate": 7.857142857142858e-06, |
|
"loss": 0.7941, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.08095238095238096, |
|
"grad_norm": 1.4270103378714147, |
|
"learning_rate": 8.095238095238097e-06, |
|
"loss": 0.7976, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 1.3965101517157619, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.7907, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08571428571428572, |
|
"grad_norm": 1.1737877049103937, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.7772, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0880952380952381, |
|
"grad_norm": 0.9620328652517623, |
|
"learning_rate": 8.80952380952381e-06, |
|
"loss": 0.7895, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.09047619047619047, |
|
"grad_norm": 0.9954694445859312, |
|
"learning_rate": 9.047619047619049e-06, |
|
"loss": 0.7728, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09285714285714286, |
|
"grad_norm": 0.9933982715674028, |
|
"learning_rate": 9.285714285714288e-06, |
|
"loss": 0.7784, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09523809523809523, |
|
"grad_norm": 1.2380225457608798, |
|
"learning_rate": 9.523809523809525e-06, |
|
"loss": 0.762, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09761904761904762, |
|
"grad_norm": 1.1645854540151788, |
|
"learning_rate": 9.761904761904762e-06, |
|
"loss": 0.7708, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.0347109854779557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7777, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.10238095238095238, |
|
"grad_norm": 1.007801398524834, |
|
"learning_rate": 1.0238095238095238e-05, |
|
"loss": 0.7696, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.10476190476190476, |
|
"grad_norm": 1.1084444980598738, |
|
"learning_rate": 1.0476190476190477e-05, |
|
"loss": 0.7667, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10714285714285714, |
|
"grad_norm": 0.9232282482964133, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 0.7682, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10952380952380952, |
|
"grad_norm": 1.0266526312590731, |
|
"learning_rate": 1.0952380952380955e-05, |
|
"loss": 0.76, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11190476190476191, |
|
"grad_norm": 0.9800538664923668, |
|
"learning_rate": 1.1190476190476192e-05, |
|
"loss": 0.7787, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11428571428571428, |
|
"grad_norm": 1.0852968890165064, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.7584, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11666666666666667, |
|
"grad_norm": 1.101079132960328, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.7532, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11904761904761904, |
|
"grad_norm": 1.0434818420770975, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12142857142857143, |
|
"grad_norm": 0.8949846922479692, |
|
"learning_rate": 1.2142857142857142e-05, |
|
"loss": 0.764, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.12380952380952381, |
|
"grad_norm": 0.9144371936075102, |
|
"learning_rate": 1.2380952380952383e-05, |
|
"loss": 0.7518, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1261904761904762, |
|
"grad_norm": 1.098930660048903, |
|
"learning_rate": 1.261904761904762e-05, |
|
"loss": 0.7549, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12857142857142856, |
|
"grad_norm": 1.0213006883333104, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 0.7514, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13095238095238096, |
|
"grad_norm": 0.9116166496696912, |
|
"learning_rate": 1.3095238095238096e-05, |
|
"loss": 0.7515, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 1.0474312553231273, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.7663, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1357142857142857, |
|
"grad_norm": 0.8710985201179252, |
|
"learning_rate": 1.3571428571428574e-05, |
|
"loss": 0.7608, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1380952380952381, |
|
"grad_norm": 1.3401884340352725, |
|
"learning_rate": 1.3809523809523811e-05, |
|
"loss": 0.7554, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14047619047619048, |
|
"grad_norm": 1.0740872828901855, |
|
"learning_rate": 1.4047619047619048e-05, |
|
"loss": 0.7582, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 0.9126548421949322, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.7434, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14523809523809525, |
|
"grad_norm": 1.1120651258567102, |
|
"learning_rate": 1.4523809523809524e-05, |
|
"loss": 0.7388, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.14761904761904762, |
|
"grad_norm": 0.9950599559753186, |
|
"learning_rate": 1.4761904761904763e-05, |
|
"loss": 0.7585, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.8107934316232559, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.7355, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.1523809523809524, |
|
"grad_norm": 0.808619303284316, |
|
"learning_rate": 1.523809523809524e-05, |
|
"loss": 0.7504, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15476190476190477, |
|
"grad_norm": 0.8677410171473149, |
|
"learning_rate": 1.5476190476190476e-05, |
|
"loss": 0.7607, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15714285714285714, |
|
"grad_norm": 0.8160199476236252, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 0.7464, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1595238095238095, |
|
"grad_norm": 0.8264103314451071, |
|
"learning_rate": 1.5952380952380954e-05, |
|
"loss": 0.7416, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.1619047619047619, |
|
"grad_norm": 0.9284321857373765, |
|
"learning_rate": 1.6190476190476193e-05, |
|
"loss": 0.7649, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16428571428571428, |
|
"grad_norm": 0.9711426336534976, |
|
"learning_rate": 1.642857142857143e-05, |
|
"loss": 0.7388, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 1.0029532403604327, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7495, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16904761904761906, |
|
"grad_norm": 0.8655401816761288, |
|
"learning_rate": 1.6904761904761906e-05, |
|
"loss": 0.7431, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.17142857142857143, |
|
"grad_norm": 0.811823602075138, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.7409, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1738095238095238, |
|
"grad_norm": 0.7850380081345122, |
|
"learning_rate": 1.7380952380952384e-05, |
|
"loss": 0.7496, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1761904761904762, |
|
"grad_norm": 0.8871490424351564, |
|
"learning_rate": 1.761904761904762e-05, |
|
"loss": 0.7502, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17857142857142858, |
|
"grad_norm": 0.857041601013508, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.7344, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18095238095238095, |
|
"grad_norm": 0.8798387422023826, |
|
"learning_rate": 1.8095238095238097e-05, |
|
"loss": 0.7451, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18333333333333332, |
|
"grad_norm": 0.8208318602053298, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.7519, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.18571428571428572, |
|
"grad_norm": 0.9816478125661463, |
|
"learning_rate": 1.8571428571428575e-05, |
|
"loss": 0.7438, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1880952380952381, |
|
"grad_norm": 0.8587183984289388, |
|
"learning_rate": 1.880952380952381e-05, |
|
"loss": 0.74, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.8766655919062369, |
|
"learning_rate": 1.904761904761905e-05, |
|
"loss": 0.7506, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19285714285714287, |
|
"grad_norm": 0.8309437973338848, |
|
"learning_rate": 1.928571428571429e-05, |
|
"loss": 0.745, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.19523809523809524, |
|
"grad_norm": 0.8106184766065911, |
|
"learning_rate": 1.9523809523809524e-05, |
|
"loss": 0.7475, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1976190476190476, |
|
"grad_norm": 0.809891965368119, |
|
"learning_rate": 1.9761904761904763e-05, |
|
"loss": 0.7335, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.7555036664771403, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7424, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.20238095238095238, |
|
"grad_norm": 0.7650413836527804, |
|
"learning_rate": 1.999991365731819e-05, |
|
"loss": 0.7501, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.20476190476190476, |
|
"grad_norm": 0.7312208748548112, |
|
"learning_rate": 1.999965463076377e-05, |
|
"loss": 0.7334, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.20714285714285716, |
|
"grad_norm": 0.821378907440853, |
|
"learning_rate": 1.999922292480975e-05, |
|
"loss": 0.7398, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.20952380952380953, |
|
"grad_norm": 0.7961564796311429, |
|
"learning_rate": 1.999861854691106e-05, |
|
"loss": 0.7383, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2119047619047619, |
|
"grad_norm": 0.864941128195607, |
|
"learning_rate": 1.999784150750442e-05, |
|
"loss": 0.7453, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 0.9432973729407097, |
|
"learning_rate": 1.9996891820008165e-05, |
|
"loss": 0.7428, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.21666666666666667, |
|
"grad_norm": 0.744201840220319, |
|
"learning_rate": 1.9995769500822007e-05, |
|
"loss": 0.732, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.21904761904761905, |
|
"grad_norm": 0.8051356121636954, |
|
"learning_rate": 1.999447456932676e-05, |
|
"loss": 0.7452, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22142857142857142, |
|
"grad_norm": 0.7446879893905328, |
|
"learning_rate": 1.9993007047883988e-05, |
|
"loss": 0.7406, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.22380952380952382, |
|
"grad_norm": 0.8200085500457187, |
|
"learning_rate": 1.9991366961835643e-05, |
|
"loss": 0.7342, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2261904761904762, |
|
"grad_norm": 0.6879793430211231, |
|
"learning_rate": 1.9989554339503612e-05, |
|
"loss": 0.737, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.22857142857142856, |
|
"grad_norm": 0.6767209429381076, |
|
"learning_rate": 1.9987569212189224e-05, |
|
"loss": 0.7365, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23095238095238096, |
|
"grad_norm": 0.7417349665434714, |
|
"learning_rate": 1.9985411614172728e-05, |
|
"loss": 0.7173, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.23333333333333334, |
|
"grad_norm": 0.7446106009527734, |
|
"learning_rate": 1.9983081582712684e-05, |
|
"loss": 0.7423, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2357142857142857, |
|
"grad_norm": 0.7442452133783487, |
|
"learning_rate": 1.9980579158045322e-05, |
|
"loss": 0.7165, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.23809523809523808, |
|
"grad_norm": 0.7769424686532669, |
|
"learning_rate": 1.997790438338385e-05, |
|
"loss": 0.749, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24047619047619048, |
|
"grad_norm": 0.8034839265796356, |
|
"learning_rate": 1.997505730491772e-05, |
|
"loss": 0.7385, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.24285714285714285, |
|
"grad_norm": 0.7676614166297908, |
|
"learning_rate": 1.9972037971811802e-05, |
|
"loss": 0.7383, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.24523809523809523, |
|
"grad_norm": 0.9359149520411593, |
|
"learning_rate": 1.9968846436205566e-05, |
|
"loss": 0.7498, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.24761904761904763, |
|
"grad_norm": 0.7783563085530937, |
|
"learning_rate": 1.9965482753212154e-05, |
|
"loss": 0.7466, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.7058199644697823, |
|
"learning_rate": 1.9961946980917457e-05, |
|
"loss": 0.7327, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2523809523809524, |
|
"grad_norm": 0.6768276735212928, |
|
"learning_rate": 1.995823918037908e-05, |
|
"loss": 0.7396, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.25476190476190474, |
|
"grad_norm": 0.7530681690630258, |
|
"learning_rate": 1.9954359415625313e-05, |
|
"loss": 0.736, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2571428571428571, |
|
"grad_norm": 0.790729948461572, |
|
"learning_rate": 1.9950307753654016e-05, |
|
"loss": 0.7367, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.25952380952380955, |
|
"grad_norm": 0.8098472943164579, |
|
"learning_rate": 1.994608426443146e-05, |
|
"loss": 0.7484, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.2619047619047619, |
|
"grad_norm": 0.6594040663378262, |
|
"learning_rate": 1.994168902089112e-05, |
|
"loss": 0.7256, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2642857142857143, |
|
"grad_norm": 0.7061644477917397, |
|
"learning_rate": 1.9937122098932428e-05, |
|
"loss": 0.7293, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 0.6552145622334516, |
|
"learning_rate": 1.9932383577419432e-05, |
|
"loss": 0.7235, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.26904761904761904, |
|
"grad_norm": 0.7850549934752342, |
|
"learning_rate": 1.9927473538179467e-05, |
|
"loss": 0.7242, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.2714285714285714, |
|
"grad_norm": 0.6946715346854551, |
|
"learning_rate": 1.9922392066001724e-05, |
|
"loss": 0.7352, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.27380952380952384, |
|
"grad_norm": 0.6701958153898289, |
|
"learning_rate": 1.9917139248635788e-05, |
|
"loss": 0.7358, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.2761904761904762, |
|
"grad_norm": 0.6957543084857959, |
|
"learning_rate": 1.991171517679013e-05, |
|
"loss": 0.7287, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2785714285714286, |
|
"grad_norm": 0.7412462140647854, |
|
"learning_rate": 1.9906119944130527e-05, |
|
"loss": 0.7314, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.28095238095238095, |
|
"grad_norm": 0.7180123780939238, |
|
"learning_rate": 1.9900353647278466e-05, |
|
"loss": 0.7049, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2833333333333333, |
|
"grad_norm": 0.6346311822511415, |
|
"learning_rate": 1.9894416385809444e-05, |
|
"loss": 0.7371, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.9181856171310567, |
|
"learning_rate": 1.9888308262251286e-05, |
|
"loss": 0.7537, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28809523809523807, |
|
"grad_norm": 0.7646478108344241, |
|
"learning_rate": 1.9882029382082342e-05, |
|
"loss": 0.724, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2904761904761905, |
|
"grad_norm": 0.7332726331013449, |
|
"learning_rate": 1.9875579853729677e-05, |
|
"loss": 0.7273, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.29285714285714287, |
|
"grad_norm": 0.8337211626177949, |
|
"learning_rate": 1.9868959788567213e-05, |
|
"loss": 0.7359, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.29523809523809524, |
|
"grad_norm": 0.72291541850261, |
|
"learning_rate": 1.9862169300913784e-05, |
|
"loss": 0.7246, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 0.6775385452101815, |
|
"learning_rate": 1.9855208508031173e-05, |
|
"loss": 0.719, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.6899659929159091, |
|
"learning_rate": 1.9848077530122083e-05, |
|
"loss": 0.7311, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.30238095238095236, |
|
"grad_norm": 0.6092353332947169, |
|
"learning_rate": 1.9840776490328067e-05, |
|
"loss": 0.7283, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.3047619047619048, |
|
"grad_norm": 0.6170832705569598, |
|
"learning_rate": 1.9833305514727396e-05, |
|
"loss": 0.7341, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.30714285714285716, |
|
"grad_norm": 0.6299433950672038, |
|
"learning_rate": 1.9825664732332886e-05, |
|
"loss": 0.7224, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.30952380952380953, |
|
"grad_norm": 0.620304145431242, |
|
"learning_rate": 1.981785427508966e-05, |
|
"loss": 0.7325, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3119047619047619, |
|
"grad_norm": 0.6551690352072297, |
|
"learning_rate": 1.9809874277872886e-05, |
|
"loss": 0.7042, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3142857142857143, |
|
"grad_norm": 0.6965123962977487, |
|
"learning_rate": 1.9801724878485438e-05, |
|
"loss": 0.7142, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.31666666666666665, |
|
"grad_norm": 0.6522958895192698, |
|
"learning_rate": 1.9793406217655516e-05, |
|
"loss": 0.7188, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.319047619047619, |
|
"grad_norm": 0.6710935693138489, |
|
"learning_rate": 1.9784918439034216e-05, |
|
"loss": 0.7127, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.32142857142857145, |
|
"grad_norm": 0.6394392122127832, |
|
"learning_rate": 1.977626168919305e-05, |
|
"loss": 0.7301, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3238095238095238, |
|
"grad_norm": 0.7388339763922518, |
|
"learning_rate": 1.9767436117621416e-05, |
|
"loss": 0.7217, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3261904761904762, |
|
"grad_norm": 0.6684391122808601, |
|
"learning_rate": 1.975844187672402e-05, |
|
"loss": 0.6975, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.32857142857142857, |
|
"grad_norm": 0.6933743953258287, |
|
"learning_rate": 1.9749279121818235e-05, |
|
"loss": 0.712, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.33095238095238094, |
|
"grad_norm": 0.6999429639549631, |
|
"learning_rate": 1.9739948011131438e-05, |
|
"loss": 0.7275, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.6381718516637829, |
|
"learning_rate": 1.973044870579824e-05, |
|
"loss": 0.7269, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3357142857142857, |
|
"grad_norm": 0.6640845851267309, |
|
"learning_rate": 1.9720781369857747e-05, |
|
"loss": 0.7254, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3380952380952381, |
|
"grad_norm": 0.6592163354528279, |
|
"learning_rate": 1.9710946170250702e-05, |
|
"loss": 0.7071, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3404761904761905, |
|
"grad_norm": 0.7655182235424155, |
|
"learning_rate": 1.9700943276816602e-05, |
|
"loss": 0.7241, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.34285714285714286, |
|
"grad_norm": 0.643273448107574, |
|
"learning_rate": 1.969077286229078e-05, |
|
"loss": 0.7286, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.34523809523809523, |
|
"grad_norm": 0.6857609061991075, |
|
"learning_rate": 1.9680435102301412e-05, |
|
"loss": 0.7079, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3476190476190476, |
|
"grad_norm": 0.8672880237155081, |
|
"learning_rate": 1.9669930175366474e-05, |
|
"loss": 0.7112, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.6654257223083666, |
|
"learning_rate": 1.9659258262890683e-05, |
|
"loss": 0.726, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3523809523809524, |
|
"grad_norm": 0.6635633178878109, |
|
"learning_rate": 1.964841954916235e-05, |
|
"loss": 0.7095, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3547619047619048, |
|
"grad_norm": 0.7692774931160247, |
|
"learning_rate": 1.9637414221350198e-05, |
|
"loss": 0.7078, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 0.6290937699685107, |
|
"learning_rate": 1.962624246950012e-05, |
|
"loss": 0.7228, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3595238095238095, |
|
"grad_norm": 0.645738278667474, |
|
"learning_rate": 1.9614904486531935e-05, |
|
"loss": 0.7134, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3619047619047619, |
|
"grad_norm": 0.6234306399886239, |
|
"learning_rate": 1.9603400468236e-05, |
|
"loss": 0.7132, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.36428571428571427, |
|
"grad_norm": 0.6027074648266817, |
|
"learning_rate": 1.9591730613269878e-05, |
|
"loss": 0.7019, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.36666666666666664, |
|
"grad_norm": 0.5952054087646503, |
|
"learning_rate": 1.957989512315489e-05, |
|
"loss": 0.7134, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.36904761904761907, |
|
"grad_norm": 0.5954424676547249, |
|
"learning_rate": 1.9567894202272623e-05, |
|
"loss": 0.7102, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.37142857142857144, |
|
"grad_norm": 0.6146466190073, |
|
"learning_rate": 1.955572805786141e-05, |
|
"loss": 0.7054, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3738095238095238, |
|
"grad_norm": 0.6196401686238979, |
|
"learning_rate": 1.9543396900012763e-05, |
|
"loss": 0.7223, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3761904761904762, |
|
"grad_norm": 0.6376202895536227, |
|
"learning_rate": 1.9530900941667733e-05, |
|
"loss": 0.7227, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.37857142857142856, |
|
"grad_norm": 0.6600915780771018, |
|
"learning_rate": 1.9518240398613226e-05, |
|
"loss": 0.6997, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.38095238095238093, |
|
"grad_norm": 0.5882182414716866, |
|
"learning_rate": 1.9505415489478293e-05, |
|
"loss": 0.7268, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38333333333333336, |
|
"grad_norm": 0.6384408692158889, |
|
"learning_rate": 1.949242643573034e-05, |
|
"loss": 0.712, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.38571428571428573, |
|
"grad_norm": 0.6036619986439133, |
|
"learning_rate": 1.947927346167132e-05, |
|
"loss": 0.7175, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3880952380952381, |
|
"grad_norm": 0.7029032826216544, |
|
"learning_rate": 1.9465956794433837e-05, |
|
"loss": 0.707, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3904761904761905, |
|
"grad_norm": 0.6457510326949268, |
|
"learning_rate": 1.945247666397725e-05, |
|
"loss": 0.7036, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.39285714285714285, |
|
"grad_norm": 0.6878440257755464, |
|
"learning_rate": 1.9438833303083677e-05, |
|
"loss": 0.6989, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3952380952380952, |
|
"grad_norm": 0.6188605951379363, |
|
"learning_rate": 1.9425026947353994e-05, |
|
"loss": 0.7105, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3976190476190476, |
|
"grad_norm": 0.5840120067803837, |
|
"learning_rate": 1.9411057835203756e-05, |
|
"loss": 0.7058, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.5937069559439255, |
|
"learning_rate": 1.9396926207859085e-05, |
|
"loss": 0.695, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4023809523809524, |
|
"grad_norm": 0.7110875706843621, |
|
"learning_rate": 1.9382632309352503e-05, |
|
"loss": 0.7227, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.40476190476190477, |
|
"grad_norm": 0.6311314888265864, |
|
"learning_rate": 1.936817638651871e-05, |
|
"loss": 0.7011, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.40714285714285714, |
|
"grad_norm": 0.6286300385263309, |
|
"learning_rate": 1.935355868899034e-05, |
|
"loss": 0.6979, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.4095238095238095, |
|
"grad_norm": 0.6209403160626118, |
|
"learning_rate": 1.9338779469193638e-05, |
|
"loss": 0.6996, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4119047619047619, |
|
"grad_norm": 0.5727631219976955, |
|
"learning_rate": 1.9323838982344092e-05, |
|
"loss": 0.7114, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4142857142857143, |
|
"grad_norm": 0.6417775935264546, |
|
"learning_rate": 1.9308737486442045e-05, |
|
"loss": 0.702, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.6620199236263371, |
|
"learning_rate": 1.9293475242268224e-05, |
|
"loss": 0.7226, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.41904761904761906, |
|
"grad_norm": 0.6399880589166196, |
|
"learning_rate": 1.9278052513379256e-05, |
|
"loss": 0.6925, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.42142857142857143, |
|
"grad_norm": 0.6138453722448008, |
|
"learning_rate": 1.926246956610309e-05, |
|
"loss": 0.7118, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4238095238095238, |
|
"grad_norm": 0.597989080002243, |
|
"learning_rate": 1.9246726669534416e-05, |
|
"loss": 0.73, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4261904761904762, |
|
"grad_norm": 0.5901338899915719, |
|
"learning_rate": 1.923082409553002e-05, |
|
"loss": 0.7067, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 0.6016114619725854, |
|
"learning_rate": 1.921476211870408e-05, |
|
"loss": 0.7103, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.430952380952381, |
|
"grad_norm": 0.5950314108298304, |
|
"learning_rate": 1.919854101642342e-05, |
|
"loss": 0.7052, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.43333333333333335, |
|
"grad_norm": 0.5879370937942204, |
|
"learning_rate": 1.9182161068802742e-05, |
|
"loss": 0.6995, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4357142857142857, |
|
"grad_norm": 0.6266968125559099, |
|
"learning_rate": 1.9165622558699763e-05, |
|
"loss": 0.7097, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4380952380952381, |
|
"grad_norm": 0.6639659528952871, |
|
"learning_rate": 1.9148925771710347e-05, |
|
"loss": 0.7009, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.44047619047619047, |
|
"grad_norm": 0.6321493695976013, |
|
"learning_rate": 1.913207099616357e-05, |
|
"loss": 0.7053, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.44285714285714284, |
|
"grad_norm": 0.6782903937955044, |
|
"learning_rate": 1.9115058523116734e-05, |
|
"loss": 0.7004, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4452380952380952, |
|
"grad_norm": 0.6716356876066842, |
|
"learning_rate": 1.9097888646350347e-05, |
|
"loss": 0.7058, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.44761904761904764, |
|
"grad_norm": 0.5994163783124282, |
|
"learning_rate": 1.908056166236305e-05, |
|
"loss": 0.6965, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.7011305510255346, |
|
"learning_rate": 1.9063077870366504e-05, |
|
"loss": 0.6973, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4523809523809524, |
|
"grad_norm": 0.6382034265421508, |
|
"learning_rate": 1.9045437572280193e-05, |
|
"loss": 0.7122, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.45476190476190476, |
|
"grad_norm": 0.652835650762035, |
|
"learning_rate": 1.902764107272626e-05, |
|
"loss": 0.7154, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.45714285714285713, |
|
"grad_norm": 0.5737401205474331, |
|
"learning_rate": 1.900968867902419e-05, |
|
"loss": 0.7094, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4595238095238095, |
|
"grad_norm": 0.6510894452271442, |
|
"learning_rate": 1.8991580701185564e-05, |
|
"loss": 0.7126, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.46190476190476193, |
|
"grad_norm": 0.7386017741154395, |
|
"learning_rate": 1.8973317451908642e-05, |
|
"loss": 0.7007, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.4642857142857143, |
|
"grad_norm": 0.6752036021573017, |
|
"learning_rate": 1.895489924657301e-05, |
|
"loss": 0.7041, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4666666666666667, |
|
"grad_norm": 0.597408051286458, |
|
"learning_rate": 1.8936326403234125e-05, |
|
"loss": 0.7212, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.46904761904761905, |
|
"grad_norm": 0.5796714555019369, |
|
"learning_rate": 1.8917599242617796e-05, |
|
"loss": 0.7159, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4714285714285714, |
|
"grad_norm": 0.5688480490870661, |
|
"learning_rate": 1.8898718088114688e-05, |
|
"loss": 0.6997, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4738095238095238, |
|
"grad_norm": 0.6090212361351263, |
|
"learning_rate": 1.8879683265774695e-05, |
|
"loss": 0.7152, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.5638933903277263, |
|
"learning_rate": 1.8860495104301346e-05, |
|
"loss": 0.689, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4785714285714286, |
|
"grad_norm": 0.5775879905939127, |
|
"learning_rate": 1.8841153935046098e-05, |
|
"loss": 0.7071, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.48095238095238096, |
|
"grad_norm": 0.5955667805199039, |
|
"learning_rate": 1.8821660092002642e-05, |
|
"loss": 0.718, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.48333333333333334, |
|
"grad_norm": 0.6578844006654452, |
|
"learning_rate": 1.880201391180111e-05, |
|
"loss": 0.7203, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.4857142857142857, |
|
"grad_norm": 0.6288999781322553, |
|
"learning_rate": 1.8782215733702286e-05, |
|
"loss": 0.7005, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4880952380952381, |
|
"grad_norm": 0.6154852254510217, |
|
"learning_rate": 1.8762265899591724e-05, |
|
"loss": 0.7112, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.49047619047619045, |
|
"grad_norm": 0.5946678081221346, |
|
"learning_rate": 1.874216475397386e-05, |
|
"loss": 0.7072, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4928571428571429, |
|
"grad_norm": 0.5542144431999712, |
|
"learning_rate": 1.8721912643966055e-05, |
|
"loss": 0.7234, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.49523809523809526, |
|
"grad_norm": 0.6327874278600627, |
|
"learning_rate": 1.870150991929261e-05, |
|
"loss": 0.7199, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.4976190476190476, |
|
"grad_norm": 0.6050382016312242, |
|
"learning_rate": 1.868095693227872e-05, |
|
"loss": 0.6989, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.6060484644475487, |
|
"learning_rate": 1.866025403784439e-05, |
|
"loss": 0.7094, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5023809523809524, |
|
"grad_norm": 0.5578204676405346, |
|
"learning_rate": 1.86394015934983e-05, |
|
"loss": 0.6998, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.5047619047619047, |
|
"grad_norm": 0.5912489553089566, |
|
"learning_rate": 1.8618399959331642e-05, |
|
"loss": 0.704, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5071428571428571, |
|
"grad_norm": 0.6302255111379419, |
|
"learning_rate": 1.8597249498011906e-05, |
|
"loss": 0.7273, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.5095238095238095, |
|
"grad_norm": 0.6377098555634427, |
|
"learning_rate": 1.8575950574776595e-05, |
|
"loss": 0.7118, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5119047619047619, |
|
"grad_norm": 0.6306289974408017, |
|
"learning_rate": 1.855450355742695e-05, |
|
"loss": 0.6975, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5142857142857142, |
|
"grad_norm": 0.6118126960406806, |
|
"learning_rate": 1.8532908816321557e-05, |
|
"loss": 0.7089, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5166666666666667, |
|
"grad_norm": 0.5631635531801148, |
|
"learning_rate": 1.8511166724369997e-05, |
|
"loss": 0.7027, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5190476190476191, |
|
"grad_norm": 0.5712800757429177, |
|
"learning_rate": 1.8489277657026377e-05, |
|
"loss": 0.7068, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5214285714285715, |
|
"grad_norm": 0.560805851835222, |
|
"learning_rate": 1.8467241992282842e-05, |
|
"loss": 0.7012, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5238095238095238, |
|
"grad_norm": 0.6607109669389528, |
|
"learning_rate": 1.844506011066308e-05, |
|
"loss": 0.6919, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5261904761904762, |
|
"grad_norm": 0.5900866880976501, |
|
"learning_rate": 1.8422732395215717e-05, |
|
"loss": 0.6903, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5285714285714286, |
|
"grad_norm": 0.5897342260968936, |
|
"learning_rate": 1.8400259231507716e-05, |
|
"loss": 0.6908, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.530952380952381, |
|
"grad_norm": 0.5723672657261824, |
|
"learning_rate": 1.8377641007617724e-05, |
|
"loss": 0.6954, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.611058093387684, |
|
"learning_rate": 1.8354878114129368e-05, |
|
"loss": 0.7017, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5357142857142857, |
|
"grad_norm": 0.5318902267208334, |
|
"learning_rate": 1.833197094412449e-05, |
|
"loss": 0.6953, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5380952380952381, |
|
"grad_norm": 0.5570664080835552, |
|
"learning_rate": 1.8308919893176397e-05, |
|
"loss": 0.7066, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5404761904761904, |
|
"grad_norm": 0.5885337969920328, |
|
"learning_rate": 1.8285725359343e-05, |
|
"loss": 0.706, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5428571428571428, |
|
"grad_norm": 0.5855314615908318, |
|
"learning_rate": 1.826238774315995e-05, |
|
"loss": 0.701, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5452380952380952, |
|
"grad_norm": 0.5851946825105788, |
|
"learning_rate": 1.8238907447633716e-05, |
|
"loss": 0.692, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5476190476190477, |
|
"grad_norm": 0.5927970007531321, |
|
"learning_rate": 1.8215284878234644e-05, |
|
"loss": 0.716, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.5263874307074303, |
|
"learning_rate": 1.819152044288992e-05, |
|
"loss": 0.6904, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5523809523809524, |
|
"grad_norm": 0.6124793068078697, |
|
"learning_rate": 1.816761455197657e-05, |
|
"loss": 0.6864, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5547619047619048, |
|
"grad_norm": 0.5828934954007933, |
|
"learning_rate": 1.8143567618314336e-05, |
|
"loss": 0.6904, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5571428571428572, |
|
"grad_norm": 0.5831980381697426, |
|
"learning_rate": 1.811938005715857e-05, |
|
"loss": 0.7086, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5595238095238095, |
|
"grad_norm": 0.6435462278644365, |
|
"learning_rate": 1.8095052286193044e-05, |
|
"loss": 0.6976, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5619047619047619, |
|
"grad_norm": 0.5815432160069361, |
|
"learning_rate": 1.8070584725522763e-05, |
|
"loss": 0.6925, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5642857142857143, |
|
"grad_norm": 0.5401285846482883, |
|
"learning_rate": 1.8045977797666685e-05, |
|
"loss": 0.6999, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5666666666666667, |
|
"grad_norm": 0.5677019239930577, |
|
"learning_rate": 1.802123192755044e-05, |
|
"loss": 0.6923, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.569047619047619, |
|
"grad_norm": 0.5683306800833698, |
|
"learning_rate": 1.7996347542498983e-05, |
|
"loss": 0.7017, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.5113013942929165, |
|
"learning_rate": 1.7971325072229227e-05, |
|
"loss": 0.6921, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5738095238095238, |
|
"grad_norm": 0.5841499935734249, |
|
"learning_rate": 1.7946164948842604e-05, |
|
"loss": 0.6915, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5761904761904761, |
|
"grad_norm": 0.6279630088060286, |
|
"learning_rate": 1.7920867606817625e-05, |
|
"loss": 0.7061, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5785714285714286, |
|
"grad_norm": 0.5599484344769574, |
|
"learning_rate": 1.7895433483002356e-05, |
|
"loss": 0.715, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.580952380952381, |
|
"grad_norm": 0.5661003553169632, |
|
"learning_rate": 1.7869863016606893e-05, |
|
"loss": 0.6986, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 0.5716166266125108, |
|
"learning_rate": 1.784415664919576e-05, |
|
"loss": 0.7031, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5857142857142857, |
|
"grad_norm": 0.5567338032160342, |
|
"learning_rate": 1.78183148246803e-05, |
|
"loss": 0.7068, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5880952380952381, |
|
"grad_norm": 0.5683883961420689, |
|
"learning_rate": 1.7792337989311e-05, |
|
"loss": 0.689, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5904761904761905, |
|
"grad_norm": 0.5477282163005394, |
|
"learning_rate": 1.7766226591669787e-05, |
|
"loss": 0.689, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5928571428571429, |
|
"grad_norm": 0.5311691713580732, |
|
"learning_rate": 1.7739981082662275e-05, |
|
"loss": 0.7215, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 0.598618505069467, |
|
"learning_rate": 1.771360191551e-05, |
|
"loss": 0.7151, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5976190476190476, |
|
"grad_norm": 0.5543663594273839, |
|
"learning_rate": 1.768708954574256e-05, |
|
"loss": 0.701, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.5321278870334822, |
|
"learning_rate": 1.766044443118978e-05, |
|
"loss": 0.6935, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6023809523809524, |
|
"grad_norm": 0.5564736107581606, |
|
"learning_rate": 1.7633667031973793e-05, |
|
"loss": 0.6925, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.6047619047619047, |
|
"grad_norm": 0.623003830977377, |
|
"learning_rate": 1.760675781050109e-05, |
|
"loss": 0.6904, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6071428571428571, |
|
"grad_norm": 0.5425831672559595, |
|
"learning_rate": 1.757971723145453e-05, |
|
"loss": 0.6941, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6095238095238096, |
|
"grad_norm": 0.5691326789130872, |
|
"learning_rate": 1.755254576178535e-05, |
|
"loss": 0.6943, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.611904761904762, |
|
"grad_norm": 0.5472161245788499, |
|
"learning_rate": 1.7525243870705052e-05, |
|
"loss": 0.7019, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.6142857142857143, |
|
"grad_norm": 0.5126814767906358, |
|
"learning_rate": 1.7497812029677344e-05, |
|
"loss": 0.6979, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6166666666666667, |
|
"grad_norm": 0.5808568867050167, |
|
"learning_rate": 1.7470250712409963e-05, |
|
"loss": 0.7017, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.6190476190476191, |
|
"grad_norm": 0.5731714166045258, |
|
"learning_rate": 1.7442560394846518e-05, |
|
"loss": 0.6896, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6214285714285714, |
|
"grad_norm": 0.5416282781910543, |
|
"learning_rate": 1.741474155515827e-05, |
|
"loss": 0.6881, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.6238095238095238, |
|
"grad_norm": 0.5989028487744685, |
|
"learning_rate": 1.738679467373586e-05, |
|
"loss": 0.6853, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6261904761904762, |
|
"grad_norm": 0.5928143358170742, |
|
"learning_rate": 1.7358720233181023e-05, |
|
"loss": 0.6945, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6285714285714286, |
|
"grad_norm": 0.5917504797279695, |
|
"learning_rate": 1.7330518718298263e-05, |
|
"loss": 0.708, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6309523809523809, |
|
"grad_norm": 0.5735050094781514, |
|
"learning_rate": 1.7302190616086464e-05, |
|
"loss": 0.7048, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6333333333333333, |
|
"grad_norm": 0.57827020923432, |
|
"learning_rate": 1.7273736415730488e-05, |
|
"loss": 0.6793, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6357142857142857, |
|
"grad_norm": 0.5880932620123437, |
|
"learning_rate": 1.7245156608592727e-05, |
|
"loss": 0.6915, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.638095238095238, |
|
"grad_norm": 0.5890769914796811, |
|
"learning_rate": 1.7216451688204623e-05, |
|
"loss": 0.69, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6404761904761904, |
|
"grad_norm": 0.527942811449849, |
|
"learning_rate": 1.718762215025813e-05, |
|
"loss": 0.682, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 0.6301973133824051, |
|
"learning_rate": 1.7158668492597186e-05, |
|
"loss": 0.6934, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6452380952380953, |
|
"grad_norm": 0.5410524060403492, |
|
"learning_rate": 1.712959121520907e-05, |
|
"loss": 0.6892, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6476190476190476, |
|
"grad_norm": 0.7801149059212953, |
|
"learning_rate": 1.7100390820215805e-05, |
|
"loss": 0.6999, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.5546640495559166, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 0.6742, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6523809523809524, |
|
"grad_norm": 0.5722087188570253, |
|
"learning_rate": 1.704162269652352e-05, |
|
"loss": 0.7179, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6547619047619048, |
|
"grad_norm": 0.5249847693462395, |
|
"learning_rate": 1.701205598266398e-05, |
|
"loss": 0.6945, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.6571428571428571, |
|
"grad_norm": 0.5639053989400045, |
|
"learning_rate": 1.698236818086073e-05, |
|
"loss": 0.6925, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.6595238095238095, |
|
"grad_norm": 0.5897582949134731, |
|
"learning_rate": 1.6952559803778656e-05, |
|
"loss": 0.6958, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6619047619047619, |
|
"grad_norm": 0.5506537272905166, |
|
"learning_rate": 1.6922631366164795e-05, |
|
"loss": 0.6744, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6642857142857143, |
|
"grad_norm": 0.5613120451869654, |
|
"learning_rate": 1.689258338483947e-05, |
|
"loss": 0.687, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.5687059713184212, |
|
"learning_rate": 1.686241637868734e-05, |
|
"loss": 0.7011, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.669047619047619, |
|
"grad_norm": 0.5267403463210103, |
|
"learning_rate": 1.683213086864843e-05, |
|
"loss": 0.6903, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6714285714285714, |
|
"grad_norm": 0.5655713255912641, |
|
"learning_rate": 1.6801727377709195e-05, |
|
"loss": 0.6896, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6738095238095239, |
|
"grad_norm": 0.5285552000123795, |
|
"learning_rate": 1.6771206430893408e-05, |
|
"loss": 0.6946, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6761904761904762, |
|
"grad_norm": 0.5761004876016846, |
|
"learning_rate": 1.6740568555253153e-05, |
|
"loss": 0.6895, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6785714285714286, |
|
"grad_norm": 0.5390507685841307, |
|
"learning_rate": 1.67098142798597e-05, |
|
"loss": 0.6942, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.680952380952381, |
|
"grad_norm": 0.5132897821784491, |
|
"learning_rate": 1.6678944135794375e-05, |
|
"loss": 0.6948, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6833333333333333, |
|
"grad_norm": 0.5469471858786907, |
|
"learning_rate": 1.6647958656139377e-05, |
|
"loss": 0.6944, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6857142857142857, |
|
"grad_norm": 0.553201497920686, |
|
"learning_rate": 1.6616858375968596e-05, |
|
"loss": 0.6788, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6880952380952381, |
|
"grad_norm": 0.5623963206097778, |
|
"learning_rate": 1.6585643832338342e-05, |
|
"loss": 0.6815, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6904761904761905, |
|
"grad_norm": 0.5778391136516833, |
|
"learning_rate": 1.6554315564278102e-05, |
|
"loss": 0.6843, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6928571428571428, |
|
"grad_norm": 0.5551499013662371, |
|
"learning_rate": 1.6522874112781213e-05, |
|
"loss": 0.6872, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6952380952380952, |
|
"grad_norm": 0.5309204279264464, |
|
"learning_rate": 1.649132002079552e-05, |
|
"loss": 0.6873, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6976190476190476, |
|
"grad_norm": 0.508876104576006, |
|
"learning_rate": 1.645965383321401e-05, |
|
"loss": 0.6792, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.5802541953819853, |
|
"learning_rate": 1.6427876096865394e-05, |
|
"loss": 0.6881, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7023809523809523, |
|
"grad_norm": 0.5641741696009784, |
|
"learning_rate": 1.6395987360504667e-05, |
|
"loss": 0.6789, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.7047619047619048, |
|
"grad_norm": 0.5366027622698923, |
|
"learning_rate": 1.6363988174803638e-05, |
|
"loss": 0.686, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7071428571428572, |
|
"grad_norm": 0.5234128778556285, |
|
"learning_rate": 1.6331879092341402e-05, |
|
"loss": 0.7134, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.7095238095238096, |
|
"grad_norm": 0.5759728577947257, |
|
"learning_rate": 1.6299660667594814e-05, |
|
"loss": 0.701, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7119047619047619, |
|
"grad_norm": 0.5756551282837324, |
|
"learning_rate": 1.626733345692892e-05, |
|
"loss": 0.6843, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 0.5426722958339261, |
|
"learning_rate": 1.6234898018587336e-05, |
|
"loss": 0.6906, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7166666666666667, |
|
"grad_norm": 0.48554563250860505, |
|
"learning_rate": 1.6202354912682602e-05, |
|
"loss": 0.6824, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.719047619047619, |
|
"grad_norm": 0.5744429442854848, |
|
"learning_rate": 1.6169704701186528e-05, |
|
"loss": 0.6979, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7214285714285714, |
|
"grad_norm": 0.5277461906754672, |
|
"learning_rate": 1.6136947947920477e-05, |
|
"loss": 0.682, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.7238095238095238, |
|
"grad_norm": 0.5681961605460194, |
|
"learning_rate": 1.6104085218545633e-05, |
|
"loss": 0.6893, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7261904761904762, |
|
"grad_norm": 0.58856327424583, |
|
"learning_rate": 1.6071117080553236e-05, |
|
"loss": 0.694, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7285714285714285, |
|
"grad_norm": 0.5585907608313626, |
|
"learning_rate": 1.6038044103254775e-05, |
|
"loss": 0.6807, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7309523809523809, |
|
"grad_norm": 0.4975987871579692, |
|
"learning_rate": 1.600486685777216e-05, |
|
"loss": 0.6839, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.7333333333333333, |
|
"grad_norm": 0.5683357922839659, |
|
"learning_rate": 1.5971585917027864e-05, |
|
"loss": 0.6924, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7357142857142858, |
|
"grad_norm": 0.5507550407504584, |
|
"learning_rate": 1.5938201855735017e-05, |
|
"loss": 0.6952, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.7380952380952381, |
|
"grad_norm": 0.5184672194625306, |
|
"learning_rate": 1.5904715250387498e-05, |
|
"loss": 0.6864, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7404761904761905, |
|
"grad_norm": 0.5315141830604754, |
|
"learning_rate": 1.5871126679249977e-05, |
|
"loss": 0.686, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.7428571428571429, |
|
"grad_norm": 0.5292902163348473, |
|
"learning_rate": 1.5837436722347902e-05, |
|
"loss": 0.6826, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7452380952380953, |
|
"grad_norm": 0.570110548449842, |
|
"learning_rate": 1.5803645961457522e-05, |
|
"loss": 0.6792, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.7476190476190476, |
|
"grad_norm": 0.5543717733657283, |
|
"learning_rate": 1.576975498009583e-05, |
|
"loss": 0.6754, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.5222973089292835, |
|
"learning_rate": 1.573576436351046e-05, |
|
"loss": 0.6902, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7523809523809524, |
|
"grad_norm": 0.5112696664324252, |
|
"learning_rate": 1.570167469866962e-05, |
|
"loss": 0.6972, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7547619047619047, |
|
"grad_norm": 0.5807388645411898, |
|
"learning_rate": 1.5667486574251916e-05, |
|
"loss": 0.6863, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7571428571428571, |
|
"grad_norm": 0.5522196746267701, |
|
"learning_rate": 1.563320058063622e-05, |
|
"loss": 0.6705, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7595238095238095, |
|
"grad_norm": 0.5302665914230218, |
|
"learning_rate": 1.5598817309891466e-05, |
|
"loss": 0.6842, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.5286809702848861, |
|
"learning_rate": 1.5564337355766412e-05, |
|
"loss": 0.6976, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7642857142857142, |
|
"grad_norm": 0.5393439786479364, |
|
"learning_rate": 1.5529761313679396e-05, |
|
"loss": 0.7063, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7666666666666667, |
|
"grad_norm": 0.479895247668522, |
|
"learning_rate": 1.5495089780708062e-05, |
|
"loss": 0.6737, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.7690476190476191, |
|
"grad_norm": 0.5401818745358772, |
|
"learning_rate": 1.5460323355579035e-05, |
|
"loss": 0.6808, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.7714285714285715, |
|
"grad_norm": 0.6132409197814304, |
|
"learning_rate": 1.5425462638657597e-05, |
|
"loss": 0.6912, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7738095238095238, |
|
"grad_norm": 0.5605359051745877, |
|
"learning_rate": 1.53905082319373e-05, |
|
"loss": 0.6912, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7761904761904762, |
|
"grad_norm": 0.5095591675298641, |
|
"learning_rate": 1.5355460739029585e-05, |
|
"loss": 0.6854, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7785714285714286, |
|
"grad_norm": 0.5124981468976912, |
|
"learning_rate": 1.5320320765153367e-05, |
|
"loss": 0.6805, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.780952380952381, |
|
"grad_norm": 0.5543805272032538, |
|
"learning_rate": 1.5285088917124555e-05, |
|
"loss": 0.6872, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7833333333333333, |
|
"grad_norm": 0.5240863656017781, |
|
"learning_rate": 1.5249765803345602e-05, |
|
"loss": 0.6812, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 0.5597831529794077, |
|
"learning_rate": 1.5214352033794981e-05, |
|
"loss": 0.6838, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7880952380952381, |
|
"grad_norm": 0.5239372373145886, |
|
"learning_rate": 1.517884822001666e-05, |
|
"loss": 0.6884, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7904761904761904, |
|
"grad_norm": 0.5776231529708008, |
|
"learning_rate": 1.5143254975109538e-05, |
|
"loss": 0.6728, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7928571428571428, |
|
"grad_norm": 0.5215371586345835, |
|
"learning_rate": 1.5107572913716859e-05, |
|
"loss": 0.6748, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7952380952380952, |
|
"grad_norm": 0.5282942517891518, |
|
"learning_rate": 1.5071802652015592e-05, |
|
"loss": 0.6713, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7976190476190477, |
|
"grad_norm": 0.5541374243554975, |
|
"learning_rate": 1.503594480770581e-05, |
|
"loss": 0.6597, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.5521158876521693, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.686, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8023809523809524, |
|
"grad_norm": 0.5683308647861297, |
|
"learning_rate": 1.496396884961238e-05, |
|
"loss": 0.6693, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.8047619047619048, |
|
"grad_norm": 0.573228337305753, |
|
"learning_rate": 1.4927851978748177e-05, |
|
"loss": 0.6926, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8071428571428572, |
|
"grad_norm": 0.521492632745778, |
|
"learning_rate": 1.4891650011092896e-05, |
|
"loss": 0.6947, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.8095238095238095, |
|
"grad_norm": 0.5236798235457268, |
|
"learning_rate": 1.4855363571801523e-05, |
|
"loss": 0.679, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8119047619047619, |
|
"grad_norm": 0.5183500676344015, |
|
"learning_rate": 1.481899328748776e-05, |
|
"loss": 0.6657, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.8142857142857143, |
|
"grad_norm": 0.5018602569821992, |
|
"learning_rate": 1.4782539786213184e-05, |
|
"loss": 0.6836, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8166666666666667, |
|
"grad_norm": 0.5469580528625957, |
|
"learning_rate": 1.4746003697476406e-05, |
|
"loss": 0.6841, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.819047619047619, |
|
"grad_norm": 0.5610880961409519, |
|
"learning_rate": 1.4709385652202204e-05, |
|
"loss": 0.6864, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8214285714285714, |
|
"grad_norm": 0.5376175612680147, |
|
"learning_rate": 1.4672686282730622e-05, |
|
"loss": 0.6709, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8238095238095238, |
|
"grad_norm": 0.49458630831613376, |
|
"learning_rate": 1.4635906222806058e-05, |
|
"loss": 0.6832, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8261904761904761, |
|
"grad_norm": 0.5502214264070843, |
|
"learning_rate": 1.4599046107566314e-05, |
|
"loss": 0.68, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.8285714285714286, |
|
"grad_norm": 0.5611151303400349, |
|
"learning_rate": 1.4562106573531632e-05, |
|
"loss": 0.698, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.830952380952381, |
|
"grad_norm": 0.5735560840092161, |
|
"learning_rate": 1.4525088258593695e-05, |
|
"loss": 0.6839, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.5510543712119496, |
|
"learning_rate": 1.4487991802004625e-05, |
|
"loss": 0.6911, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8357142857142857, |
|
"grad_norm": 0.5018246480371149, |
|
"learning_rate": 1.4450817844365924e-05, |
|
"loss": 0.6752, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.8380952380952381, |
|
"grad_norm": 0.5016576788767521, |
|
"learning_rate": 1.4413567027617442e-05, |
|
"loss": 0.683, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8404761904761905, |
|
"grad_norm": 0.5384327340092925, |
|
"learning_rate": 1.4376239995026254e-05, |
|
"loss": 0.6941, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.8428571428571429, |
|
"grad_norm": 0.5344821729910321, |
|
"learning_rate": 1.4338837391175582e-05, |
|
"loss": 0.6754, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8452380952380952, |
|
"grad_norm": 0.529015386728389, |
|
"learning_rate": 1.4301359861953652e-05, |
|
"loss": 0.6718, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8476190476190476, |
|
"grad_norm": 0.5467267886434187, |
|
"learning_rate": 1.4263808054542541e-05, |
|
"loss": 0.6885, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.5209380288088742, |
|
"learning_rate": 1.4226182617406996e-05, |
|
"loss": 0.6725, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.8523809523809524, |
|
"grad_norm": 0.5120625589068878, |
|
"learning_rate": 1.418848420028325e-05, |
|
"loss": 0.678, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8547619047619047, |
|
"grad_norm": 0.5177286158188428, |
|
"learning_rate": 1.4150713454167788e-05, |
|
"loss": 0.6795, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.5180241473157509, |
|
"learning_rate": 1.4112871031306118e-05, |
|
"loss": 0.685, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8595238095238096, |
|
"grad_norm": 0.5376670786030856, |
|
"learning_rate": 1.4074957585181488e-05, |
|
"loss": 0.6823, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.861904761904762, |
|
"grad_norm": 0.5159889497243569, |
|
"learning_rate": 1.4036973770503623e-05, |
|
"loss": 0.6684, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8642857142857143, |
|
"grad_norm": 0.5328385205015794, |
|
"learning_rate": 1.3998920243197408e-05, |
|
"loss": 0.6785, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8666666666666667, |
|
"grad_norm": 0.5596052359171065, |
|
"learning_rate": 1.396079766039157e-05, |
|
"loss": 0.6971, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8690476190476191, |
|
"grad_norm": 0.5476844927900769, |
|
"learning_rate": 1.3922606680407307e-05, |
|
"loss": 0.6843, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8714285714285714, |
|
"grad_norm": 0.49918261945442965, |
|
"learning_rate": 1.3884347962746949e-05, |
|
"loss": 0.6761, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.8738095238095238, |
|
"grad_norm": 0.5116149514821221, |
|
"learning_rate": 1.3846022168082553e-05, |
|
"loss": 0.6763, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.8761904761904762, |
|
"grad_norm": 0.5280902250134181, |
|
"learning_rate": 1.3807629958244498e-05, |
|
"loss": 0.667, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8785714285714286, |
|
"grad_norm": 0.48857909294423746, |
|
"learning_rate": 1.3769171996210053e-05, |
|
"loss": 0.6707, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8809523809523809, |
|
"grad_norm": 0.4975153014854871, |
|
"learning_rate": 1.373064894609194e-05, |
|
"loss": 0.688, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8833333333333333, |
|
"grad_norm": 0.5206570618857133, |
|
"learning_rate": 1.3692061473126845e-05, |
|
"loss": 0.687, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8857142857142857, |
|
"grad_norm": 0.47088196958418377, |
|
"learning_rate": 1.3653410243663953e-05, |
|
"loss": 0.6659, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.888095238095238, |
|
"grad_norm": 0.5098589712869, |
|
"learning_rate": 1.361469592515342e-05, |
|
"loss": 0.6784, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8904761904761904, |
|
"grad_norm": 0.47648371702756814, |
|
"learning_rate": 1.3575919186134862e-05, |
|
"loss": 0.682, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 0.5003536067341952, |
|
"learning_rate": 1.3537080696225815e-05, |
|
"loss": 0.6671, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8952380952380953, |
|
"grad_norm": 0.519430135596028, |
|
"learning_rate": 1.349818112611015e-05, |
|
"loss": 0.6885, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8976190476190476, |
|
"grad_norm": 0.49741488661881644, |
|
"learning_rate": 1.3459221147526504e-05, |
|
"loss": 0.6754, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.5408088216797241, |
|
"learning_rate": 1.342020143325669e-05, |
|
"loss": 0.6757, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9023809523809524, |
|
"grad_norm": 0.502061784820922, |
|
"learning_rate": 1.3381122657114059e-05, |
|
"loss": 0.669, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.9047619047619048, |
|
"grad_norm": 0.5031558920583495, |
|
"learning_rate": 1.3341985493931877e-05, |
|
"loss": 0.6756, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9071428571428571, |
|
"grad_norm": 0.45651178296082845, |
|
"learning_rate": 1.3302790619551673e-05, |
|
"loss": 0.6629, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.9095238095238095, |
|
"grad_norm": 0.5111760063303825, |
|
"learning_rate": 1.3263538710811559e-05, |
|
"loss": 0.6787, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9119047619047619, |
|
"grad_norm": 0.5038700388575827, |
|
"learning_rate": 1.3224230445534544e-05, |
|
"loss": 0.6814, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.9142857142857143, |
|
"grad_norm": 0.5023414779956755, |
|
"learning_rate": 1.3184866502516846e-05, |
|
"loss": 0.6859, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 0.5154019438212942, |
|
"learning_rate": 1.3145447561516138e-05, |
|
"loss": 0.6717, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.919047619047619, |
|
"grad_norm": 0.5621900790890961, |
|
"learning_rate": 1.3105974303239838e-05, |
|
"loss": 0.7009, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9214285714285714, |
|
"grad_norm": 0.5137612757351608, |
|
"learning_rate": 1.3066447409333345e-05, |
|
"loss": 0.6957, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.9238095238095239, |
|
"grad_norm": 0.509045076292474, |
|
"learning_rate": 1.3026867562368262e-05, |
|
"loss": 0.6804, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9261904761904762, |
|
"grad_norm": 0.5067359253184963, |
|
"learning_rate": 1.2987235445830612e-05, |
|
"loss": 0.6707, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 0.5488540810447187, |
|
"learning_rate": 1.2947551744109044e-05, |
|
"loss": 0.6784, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.930952380952381, |
|
"grad_norm": 0.49916198703229403, |
|
"learning_rate": 1.2907817142483002e-05, |
|
"loss": 0.6533, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.9333333333333333, |
|
"grad_norm": 0.5306836509898268, |
|
"learning_rate": 1.2868032327110904e-05, |
|
"loss": 0.6694, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.9357142857142857, |
|
"grad_norm": 0.5192454887104005, |
|
"learning_rate": 1.2828197985018276e-05, |
|
"loss": 0.6814, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.9380952380952381, |
|
"grad_norm": 0.48011848457133666, |
|
"learning_rate": 1.2788314804085904e-05, |
|
"loss": 0.6656, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9404761904761905, |
|
"grad_norm": 0.501551598406461, |
|
"learning_rate": 1.2748383473037948e-05, |
|
"loss": 0.6709, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9428571428571428, |
|
"grad_norm": 0.4798488372067313, |
|
"learning_rate": 1.2708404681430054e-05, |
|
"loss": 0.6884, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9452380952380952, |
|
"grad_norm": 0.4954430271705226, |
|
"learning_rate": 1.266837911963743e-05, |
|
"loss": 0.6789, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.9476190476190476, |
|
"grad_norm": 0.5134775709739979, |
|
"learning_rate": 1.2628307478842955e-05, |
|
"loss": 0.6743, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.4932466173893802, |
|
"learning_rate": 1.2588190451025209e-05, |
|
"loss": 0.6741, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.49375841486555283, |
|
"learning_rate": 1.2548028728946548e-05, |
|
"loss": 0.682, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9547619047619048, |
|
"grad_norm": 0.553381199639126, |
|
"learning_rate": 1.2507823006141128e-05, |
|
"loss": 0.704, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.9571428571428572, |
|
"grad_norm": 0.5286463574396035, |
|
"learning_rate": 1.2467573976902936e-05, |
|
"loss": 0.6642, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9595238095238096, |
|
"grad_norm": 0.5330908591194152, |
|
"learning_rate": 1.24272823362738e-05, |
|
"loss": 0.6973, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.9619047619047619, |
|
"grad_norm": 0.5338462915299015, |
|
"learning_rate": 1.238694878003138e-05, |
|
"loss": 0.6608, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9642857142857143, |
|
"grad_norm": 0.5626548093456667, |
|
"learning_rate": 1.2346574004677154e-05, |
|
"loss": 0.6807, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.9666666666666667, |
|
"grad_norm": 0.5363264212587037, |
|
"learning_rate": 1.2306158707424402e-05, |
|
"loss": 0.6742, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.969047619047619, |
|
"grad_norm": 0.5223504672217075, |
|
"learning_rate": 1.2265703586186158e-05, |
|
"loss": 0.6756, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9714285714285714, |
|
"grad_norm": 0.5026492923180158, |
|
"learning_rate": 1.2225209339563144e-05, |
|
"loss": 0.6857, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9738095238095238, |
|
"grad_norm": 0.4958319271212678, |
|
"learning_rate": 1.2184676666831741e-05, |
|
"loss": 0.6638, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9761904761904762, |
|
"grad_norm": 0.49120488962765346, |
|
"learning_rate": 1.2144106267931877e-05, |
|
"loss": 0.6767, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9785714285714285, |
|
"grad_norm": 0.56338802960644, |
|
"learning_rate": 1.210349884345496e-05, |
|
"loss": 0.6657, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9809523809523809, |
|
"grad_norm": 0.5506153013895527, |
|
"learning_rate": 1.2062855094631777e-05, |
|
"loss": 0.6708, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9833333333333333, |
|
"grad_norm": 0.4911898234196216, |
|
"learning_rate": 1.2022175723320382e-05, |
|
"loss": 0.6736, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.9857142857142858, |
|
"grad_norm": 0.5037996502262528, |
|
"learning_rate": 1.1981461431993978e-05, |
|
"loss": 0.6613, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.9880952380952381, |
|
"grad_norm": 0.48983503414857615, |
|
"learning_rate": 1.1940712923728784e-05, |
|
"loss": 0.6705, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9904761904761905, |
|
"grad_norm": 0.5089658881380381, |
|
"learning_rate": 1.1899930902191904e-05, |
|
"loss": 0.6554, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9928571428571429, |
|
"grad_norm": 0.5353432851904248, |
|
"learning_rate": 1.1859116071629148e-05, |
|
"loss": 0.6713, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9952380952380953, |
|
"grad_norm": 0.5083284406725272, |
|
"learning_rate": 1.181826913685291e-05, |
|
"loss": 0.6593, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9976190476190476, |
|
"grad_norm": 0.5338566827047485, |
|
"learning_rate": 1.1777390803229964e-05, |
|
"loss": 0.6771, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5259573856012386, |
|
"learning_rate": 1.1736481776669307e-05, |
|
"loss": 0.6603, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7571888566017151, |
|
"eval_runtime": 110.7511, |
|
"eval_samples_per_second": 77.299, |
|
"eval_steps_per_second": 1.21, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0023809523809524, |
|
"grad_norm": 0.5901038052706317, |
|
"learning_rate": 1.1695542763609944e-05, |
|
"loss": 0.5957, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.0047619047619047, |
|
"grad_norm": 0.5438749703677144, |
|
"learning_rate": 1.1654574471008712e-05, |
|
"loss": 0.5972, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0071428571428571, |
|
"grad_norm": 0.5432897226752162, |
|
"learning_rate": 1.1613577606328068e-05, |
|
"loss": 0.595, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.0095238095238095, |
|
"grad_norm": 0.5692170924338006, |
|
"learning_rate": 1.1572552877523855e-05, |
|
"loss": 0.5869, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0119047619047619, |
|
"grad_norm": 0.5633601882768045, |
|
"learning_rate": 1.1531500993033094e-05, |
|
"loss": 0.59, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.0142857142857142, |
|
"grad_norm": 0.5402220697166501, |
|
"learning_rate": 1.1490422661761744e-05, |
|
"loss": 0.5928, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0166666666666666, |
|
"grad_norm": 0.6014408487693759, |
|
"learning_rate": 1.1449318593072468e-05, |
|
"loss": 0.5943, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.019047619047619, |
|
"grad_norm": 0.48300700922294626, |
|
"learning_rate": 1.1408189496772369e-05, |
|
"loss": 0.593, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.0214285714285714, |
|
"grad_norm": 0.5461515536108149, |
|
"learning_rate": 1.1367036083100735e-05, |
|
"loss": 0.6118, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.0238095238095237, |
|
"grad_norm": 0.5225774711520927, |
|
"learning_rate": 1.1325859062716795e-05, |
|
"loss": 0.5934, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.026190476190476, |
|
"grad_norm": 0.5652770991249514, |
|
"learning_rate": 1.1284659146687416e-05, |
|
"loss": 0.5906, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.0285714285714285, |
|
"grad_norm": 0.49282124739115296, |
|
"learning_rate": 1.1243437046474854e-05, |
|
"loss": 0.6029, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.0309523809523808, |
|
"grad_norm": 0.517411950290186, |
|
"learning_rate": 1.120219347392444e-05, |
|
"loss": 0.6105, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.0333333333333334, |
|
"grad_norm": 0.5547803815907365, |
|
"learning_rate": 1.1160929141252303e-05, |
|
"loss": 0.5888, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.0357142857142858, |
|
"grad_norm": 0.5448805389067126, |
|
"learning_rate": 1.1119644761033079e-05, |
|
"loss": 0.606, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.0380952380952382, |
|
"grad_norm": 0.5270587508515787, |
|
"learning_rate": 1.1078341046187588e-05, |
|
"loss": 0.5978, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.0404761904761906, |
|
"grad_norm": 0.5470808129733004, |
|
"learning_rate": 1.1037018709970528e-05, |
|
"loss": 0.6079, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.042857142857143, |
|
"grad_norm": 0.5185275354171291, |
|
"learning_rate": 1.0995678465958168e-05, |
|
"loss": 0.5998, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.0452380952380953, |
|
"grad_norm": 0.5222569126782542, |
|
"learning_rate": 1.0954321028036013e-05, |
|
"loss": 0.6065, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.0476190476190477, |
|
"grad_norm": 0.5902869820958465, |
|
"learning_rate": 1.0912947110386484e-05, |
|
"loss": 0.5952, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5049079899744102, |
|
"learning_rate": 1.0871557427476585e-05, |
|
"loss": 0.5984, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.0523809523809524, |
|
"grad_norm": 0.5211587946973806, |
|
"learning_rate": 1.0830152694045553e-05, |
|
"loss": 0.5822, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.0547619047619048, |
|
"grad_norm": 0.5411611115879803, |
|
"learning_rate": 1.078873362509254e-05, |
|
"loss": 0.5882, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.0571428571428572, |
|
"grad_norm": 0.5442183494774795, |
|
"learning_rate": 1.0747300935864245e-05, |
|
"loss": 0.5996, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.0595238095238095, |
|
"grad_norm": 0.5612089724718119, |
|
"learning_rate": 1.0705855341842564e-05, |
|
"loss": 0.6, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.061904761904762, |
|
"grad_norm": 0.5293855485137681, |
|
"learning_rate": 1.0664397558732245e-05, |
|
"loss": 0.5918, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.0642857142857143, |
|
"grad_norm": 0.5907188005151464, |
|
"learning_rate": 1.0622928302448523e-05, |
|
"loss": 0.608, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.0666666666666667, |
|
"grad_norm": 0.5251701766583902, |
|
"learning_rate": 1.0581448289104759e-05, |
|
"loss": 0.6058, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.069047619047619, |
|
"grad_norm": 0.5250031426883407, |
|
"learning_rate": 1.0539958235000075e-05, |
|
"loss": 0.6064, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 0.5217021022246795, |
|
"learning_rate": 1.0498458856606972e-05, |
|
"loss": 0.6098, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.0738095238095238, |
|
"grad_norm": 0.5671230242631112, |
|
"learning_rate": 1.0456950870558982e-05, |
|
"loss": 0.5985, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.0761904761904761, |
|
"grad_norm": 0.5640807879300845, |
|
"learning_rate": 1.0415434993638269e-05, |
|
"loss": 0.5992, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.0785714285714285, |
|
"grad_norm": 0.5349112926063765, |
|
"learning_rate": 1.037391194276326e-05, |
|
"loss": 0.6061, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.0809523809523809, |
|
"grad_norm": 0.5592160403982473, |
|
"learning_rate": 1.0332382434976267e-05, |
|
"loss": 0.5864, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 0.5235545437475504, |
|
"learning_rate": 1.0290847187431115e-05, |
|
"loss": 0.592, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.0857142857142856, |
|
"grad_norm": 0.556896126496801, |
|
"learning_rate": 1.0249306917380731e-05, |
|
"loss": 0.6005, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.088095238095238, |
|
"grad_norm": 0.522494265444706, |
|
"learning_rate": 1.0207762342164778e-05, |
|
"loss": 0.5958, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.0904761904761904, |
|
"grad_norm": 0.5307880240675681, |
|
"learning_rate": 1.0166214179197265e-05, |
|
"loss": 0.5911, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.092857142857143, |
|
"grad_norm": 0.5364461659449388, |
|
"learning_rate": 1.0124663145954152e-05, |
|
"loss": 0.6018, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.0952380952380953, |
|
"grad_norm": 0.5520100652972162, |
|
"learning_rate": 1.0083109959960974e-05, |
|
"loss": 0.5955, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0976190476190477, |
|
"grad_norm": 0.5020623533166694, |
|
"learning_rate": 1.0041555338780427e-05, |
|
"loss": 0.5961, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.52041207867341, |
|
"learning_rate": 1e-05, |
|
"loss": 0.5954, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.1023809523809525, |
|
"grad_norm": 0.569101456193797, |
|
"learning_rate": 9.958444661219578e-06, |
|
"loss": 0.6046, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.1047619047619048, |
|
"grad_norm": 0.5290911882364042, |
|
"learning_rate": 9.916890040039031e-06, |
|
"loss": 0.5891, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1071428571428572, |
|
"grad_norm": 0.5305645713582501, |
|
"learning_rate": 9.87533685404585e-06, |
|
"loss": 0.5988, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.1095238095238096, |
|
"grad_norm": 0.5469543228815086, |
|
"learning_rate": 9.833785820802739e-06, |
|
"loss": 0.5957, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.111904761904762, |
|
"grad_norm": 0.5324948019005903, |
|
"learning_rate": 9.792237657835225e-06, |
|
"loss": 0.5953, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.1142857142857143, |
|
"grad_norm": 0.5304279403221299, |
|
"learning_rate": 9.750693082619274e-06, |
|
"loss": 0.5937, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1166666666666667, |
|
"grad_norm": 0.5485078647852204, |
|
"learning_rate": 9.709152812568886e-06, |
|
"loss": 0.5987, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.119047619047619, |
|
"grad_norm": 0.552749009152491, |
|
"learning_rate": 9.667617565023734e-06, |
|
"loss": 0.5978, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.1214285714285714, |
|
"grad_norm": 0.5177836960191152, |
|
"learning_rate": 9.626088057236745e-06, |
|
"loss": 0.5973, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.1238095238095238, |
|
"grad_norm": 0.6109837673102856, |
|
"learning_rate": 9.584565006361735e-06, |
|
"loss": 0.595, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1261904761904762, |
|
"grad_norm": 0.5167360036746549, |
|
"learning_rate": 9.543049129441021e-06, |
|
"loss": 0.5891, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.1285714285714286, |
|
"grad_norm": 0.5297524492676912, |
|
"learning_rate": 9.501541143393028e-06, |
|
"loss": 0.606, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.130952380952381, |
|
"grad_norm": 0.5411957992836974, |
|
"learning_rate": 9.460041764999929e-06, |
|
"loss": 0.5973, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.1333333333333333, |
|
"grad_norm": 0.5287203316976509, |
|
"learning_rate": 9.418551710895243e-06, |
|
"loss": 0.5848, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.1357142857142857, |
|
"grad_norm": 0.6710026001993966, |
|
"learning_rate": 9.377071697551479e-06, |
|
"loss": 0.6086, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.138095238095238, |
|
"grad_norm": 0.5503478041423361, |
|
"learning_rate": 9.33560244126776e-06, |
|
"loss": 0.5941, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.1404761904761904, |
|
"grad_norm": 0.5347482917383127, |
|
"learning_rate": 9.294144658157443e-06, |
|
"loss": 0.5974, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.5497165323598144, |
|
"learning_rate": 9.252699064135759e-06, |
|
"loss": 0.6046, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.1452380952380952, |
|
"grad_norm": 0.5190090017461467, |
|
"learning_rate": 9.211266374907463e-06, |
|
"loss": 0.5869, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.1476190476190475, |
|
"grad_norm": 0.5719299143043652, |
|
"learning_rate": 9.169847305954448e-06, |
|
"loss": 0.6048, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.5672551417094259, |
|
"learning_rate": 9.128442572523418e-06, |
|
"loss": 0.6068, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.1523809523809523, |
|
"grad_norm": 0.5811171033851283, |
|
"learning_rate": 9.087052889613519e-06, |
|
"loss": 0.5982, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.1547619047619047, |
|
"grad_norm": 0.5227291415650476, |
|
"learning_rate": 9.045678971963988e-06, |
|
"loss": 0.591, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.157142857142857, |
|
"grad_norm": 0.5413206931558642, |
|
"learning_rate": 9.004321534041836e-06, |
|
"loss": 0.5916, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.1595238095238094, |
|
"grad_norm": 0.5298903577250825, |
|
"learning_rate": 8.962981290029475e-06, |
|
"loss": 0.5883, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.161904761904762, |
|
"grad_norm": 0.4876508870345226, |
|
"learning_rate": 8.921658953812416e-06, |
|
"loss": 0.5938, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.1642857142857144, |
|
"grad_norm": 0.5198015834539063, |
|
"learning_rate": 8.880355238966923e-06, |
|
"loss": 0.5975, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.5097940796805619, |
|
"learning_rate": 8.839070858747697e-06, |
|
"loss": 0.5868, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.1690476190476191, |
|
"grad_norm": 0.5663643871944489, |
|
"learning_rate": 8.797806526075566e-06, |
|
"loss": 0.5902, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.1714285714285715, |
|
"grad_norm": 0.5396345415398657, |
|
"learning_rate": 8.756562953525151e-06, |
|
"loss": 0.5958, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.1738095238095239, |
|
"grad_norm": 0.5577630837652126, |
|
"learning_rate": 8.715340853312586e-06, |
|
"loss": 0.5833, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.1761904761904762, |
|
"grad_norm": 0.5518263276547959, |
|
"learning_rate": 8.674140937283208e-06, |
|
"loss": 0.5935, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.1785714285714286, |
|
"grad_norm": 0.5084066290422407, |
|
"learning_rate": 8.632963916899268e-06, |
|
"loss": 0.5835, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.180952380952381, |
|
"grad_norm": 0.5098303646711122, |
|
"learning_rate": 8.591810503227634e-06, |
|
"loss": 0.6024, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.1833333333333333, |
|
"grad_norm": 0.5399045936688955, |
|
"learning_rate": 8.550681406927534e-06, |
|
"loss": 0.5916, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.1857142857142857, |
|
"grad_norm": 0.5139645415633143, |
|
"learning_rate": 8.509577338238255e-06, |
|
"loss": 0.5867, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.188095238095238, |
|
"grad_norm": 0.5670585522900756, |
|
"learning_rate": 8.46849900696691e-06, |
|
"loss": 0.6006, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 0.5457106741839642, |
|
"learning_rate": 8.427447122476148e-06, |
|
"loss": 0.597, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.1928571428571428, |
|
"grad_norm": 0.5571733334835492, |
|
"learning_rate": 8.386422393671934e-06, |
|
"loss": 0.5913, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.1952380952380952, |
|
"grad_norm": 0.529527429611943, |
|
"learning_rate": 8.34542552899129e-06, |
|
"loss": 0.6006, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.1976190476190476, |
|
"grad_norm": 0.5235732448086102, |
|
"learning_rate": 8.304457236390062e-06, |
|
"loss": 0.5819, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.5332920937716661, |
|
"learning_rate": 8.263518223330698e-06, |
|
"loss": 0.6024, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.2023809523809523, |
|
"grad_norm": 0.5137113000682924, |
|
"learning_rate": 8.222609196770037e-06, |
|
"loss": 0.6001, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.2047619047619047, |
|
"grad_norm": 0.5000093032244755, |
|
"learning_rate": 8.181730863147094e-06, |
|
"loss": 0.5839, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.207142857142857, |
|
"grad_norm": 0.5084439515294099, |
|
"learning_rate": 8.140883928370855e-06, |
|
"loss": 0.5987, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.2095238095238094, |
|
"grad_norm": 0.5420324353647344, |
|
"learning_rate": 8.100069097808103e-06, |
|
"loss": 0.5918, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2119047619047618, |
|
"grad_norm": 0.5051962888682799, |
|
"learning_rate": 8.059287076271216e-06, |
|
"loss": 0.5929, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.2142857142857142, |
|
"grad_norm": 0.5112423923421076, |
|
"learning_rate": 8.018538568006027e-06, |
|
"loss": 0.5851, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2166666666666668, |
|
"grad_norm": 0.5404164861280687, |
|
"learning_rate": 7.977824276679623e-06, |
|
"loss": 0.6097, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.2190476190476192, |
|
"grad_norm": 0.5318602860411683, |
|
"learning_rate": 7.937144905368226e-06, |
|
"loss": 0.5952, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.2214285714285715, |
|
"grad_norm": 0.5521690395673133, |
|
"learning_rate": 7.896501156545044e-06, |
|
"loss": 0.594, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.223809523809524, |
|
"grad_norm": 0.4999490189546094, |
|
"learning_rate": 7.855893732068124e-06, |
|
"loss": 0.5934, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.2261904761904763, |
|
"grad_norm": 0.5048993618252382, |
|
"learning_rate": 7.815323333168262e-06, |
|
"loss": 0.5959, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.2285714285714286, |
|
"grad_norm": 0.5109949262663191, |
|
"learning_rate": 7.774790660436857e-06, |
|
"loss": 0.5988, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.230952380952381, |
|
"grad_norm": 0.5143824021233596, |
|
"learning_rate": 7.734296413813847e-06, |
|
"loss": 0.5844, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.2333333333333334, |
|
"grad_norm": 0.5337082713078726, |
|
"learning_rate": 7.6938412925756e-06, |
|
"loss": 0.5898, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.2357142857142858, |
|
"grad_norm": 0.5024058244542078, |
|
"learning_rate": 7.653425995322852e-06, |
|
"loss": 0.5959, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.2380952380952381, |
|
"grad_norm": 0.5089373868097246, |
|
"learning_rate": 7.613051219968624e-06, |
|
"loss": 0.6007, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.2404761904761905, |
|
"grad_norm": 0.5292691125658913, |
|
"learning_rate": 7.5727176637262034e-06, |
|
"loss": 0.5893, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.2428571428571429, |
|
"grad_norm": 0.5105236474902384, |
|
"learning_rate": 7.532426023097063e-06, |
|
"loss": 0.5991, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.2452380952380953, |
|
"grad_norm": 0.5289609790386512, |
|
"learning_rate": 7.492176993858873e-06, |
|
"loss": 0.5833, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.2476190476190476, |
|
"grad_norm": 0.5289162389327398, |
|
"learning_rate": 7.451971271053455e-06, |
|
"loss": 0.5852, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.570777487333234, |
|
"learning_rate": 7.411809548974792e-06, |
|
"loss": 0.5965, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.2523809523809524, |
|
"grad_norm": 0.5208585348103933, |
|
"learning_rate": 7.371692521157048e-06, |
|
"loss": 0.6133, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.2547619047619047, |
|
"grad_norm": 0.5279916869252734, |
|
"learning_rate": 7.331620880362571e-06, |
|
"loss": 0.5949, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.2571428571428571, |
|
"grad_norm": 0.5269636791495603, |
|
"learning_rate": 7.291595318569951e-06, |
|
"loss": 0.6078, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.2595238095238095, |
|
"grad_norm": 0.527352227349602, |
|
"learning_rate": 7.2516165269620534e-06, |
|
"loss": 0.5869, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.2619047619047619, |
|
"grad_norm": 0.5144295584353445, |
|
"learning_rate": 7.2116851959140965e-06, |
|
"loss": 0.5918, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.2642857142857142, |
|
"grad_norm": 0.5601926258663379, |
|
"learning_rate": 7.171802014981726e-06, |
|
"loss": 0.585, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 0.5379381373225919, |
|
"learning_rate": 7.131967672889101e-06, |
|
"loss": 0.595, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.269047619047619, |
|
"grad_norm": 0.5251125215829732, |
|
"learning_rate": 7.092182857516998e-06, |
|
"loss": 0.5839, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.2714285714285714, |
|
"grad_norm": 0.4965704448975545, |
|
"learning_rate": 7.052448255890958e-06, |
|
"loss": 0.5991, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.2738095238095237, |
|
"grad_norm": 0.5450909049558729, |
|
"learning_rate": 7.012764554169393e-06, |
|
"loss": 0.6083, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.276190476190476, |
|
"grad_norm": 0.5915810150427402, |
|
"learning_rate": 6.973132437631743e-06, |
|
"loss": 0.5899, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.2785714285714285, |
|
"grad_norm": 0.5300752289151952, |
|
"learning_rate": 6.933552590666659e-06, |
|
"loss": 0.5876, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.2809523809523808, |
|
"grad_norm": 0.5026232106359814, |
|
"learning_rate": 6.8940256967601625e-06, |
|
"loss": 0.5853, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.2833333333333332, |
|
"grad_norm": 0.5197308254582468, |
|
"learning_rate": 6.854552438483866e-06, |
|
"loss": 0.5798, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.2857142857142856, |
|
"grad_norm": 0.5211709556282136, |
|
"learning_rate": 6.815133497483157e-06, |
|
"loss": 0.5708, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.288095238095238, |
|
"grad_norm": 0.4782326251158853, |
|
"learning_rate": 6.775769554465455e-06, |
|
"loss": 0.6023, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.2904761904761906, |
|
"grad_norm": 0.5151978950364201, |
|
"learning_rate": 6.736461289188445e-06, |
|
"loss": 0.6054, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.292857142857143, |
|
"grad_norm": 0.544144875536584, |
|
"learning_rate": 6.697209380448333e-06, |
|
"loss": 0.5983, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.2952380952380953, |
|
"grad_norm": 0.525706847543693, |
|
"learning_rate": 6.6580145060681255e-06, |
|
"loss": 0.5909, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.2976190476190477, |
|
"grad_norm": 0.5337499789796499, |
|
"learning_rate": 6.618877342885945e-06, |
|
"loss": 0.5932, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.4979999689542144, |
|
"learning_rate": 6.579798566743314e-06, |
|
"loss": 0.5872, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.3023809523809524, |
|
"grad_norm": 0.5293968795002583, |
|
"learning_rate": 6.540778852473497e-06, |
|
"loss": 0.595, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.3047619047619048, |
|
"grad_norm": 0.513270923657658, |
|
"learning_rate": 6.501818873889856e-06, |
|
"loss": 0.5951, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.3071428571428572, |
|
"grad_norm": 0.5284113230022897, |
|
"learning_rate": 6.462919303774186e-06, |
|
"loss": 0.5886, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.3095238095238095, |
|
"grad_norm": 0.511386553250273, |
|
"learning_rate": 6.424080813865139e-06, |
|
"loss": 0.5785, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.311904761904762, |
|
"grad_norm": 0.5612313704527047, |
|
"learning_rate": 6.3853040748465855e-06, |
|
"loss": 0.5934, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.3142857142857143, |
|
"grad_norm": 0.49635536234133354, |
|
"learning_rate": 6.34658975633605e-06, |
|
"loss": 0.5804, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.3166666666666667, |
|
"grad_norm": 0.5158067784575548, |
|
"learning_rate": 6.3079385268731575e-06, |
|
"loss": 0.5982, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.319047619047619, |
|
"grad_norm": 0.5424701107493409, |
|
"learning_rate": 6.269351053908061e-06, |
|
"loss": 0.5873, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.3214285714285714, |
|
"grad_norm": 0.5095803553683687, |
|
"learning_rate": 6.230828003789949e-06, |
|
"loss": 0.59, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.3238095238095238, |
|
"grad_norm": 0.5004483050726994, |
|
"learning_rate": 6.192370041755505e-06, |
|
"loss": 0.588, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.3261904761904761, |
|
"grad_norm": 0.5307962744979307, |
|
"learning_rate": 6.153977831917451e-06, |
|
"loss": 0.5879, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.3285714285714285, |
|
"grad_norm": 0.5520488950744148, |
|
"learning_rate": 6.115652037253054e-06, |
|
"loss": 0.5967, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.3309523809523809, |
|
"grad_norm": 0.4933954521187605, |
|
"learning_rate": 6.077393319592697e-06, |
|
"loss": 0.5922, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.5376192804239446, |
|
"learning_rate": 6.039202339608432e-06, |
|
"loss": 0.577, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.3357142857142856, |
|
"grad_norm": 0.5213896422824574, |
|
"learning_rate": 6.001079756802592e-06, |
|
"loss": 0.58, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.3380952380952382, |
|
"grad_norm": 0.5605679212505607, |
|
"learning_rate": 5.963026229496378e-06, |
|
"loss": 0.5801, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.3404761904761906, |
|
"grad_norm": 0.5500894406128793, |
|
"learning_rate": 5.925042414818514e-06, |
|
"loss": 0.5893, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.342857142857143, |
|
"grad_norm": 0.5051376225418218, |
|
"learning_rate": 5.887128968693887e-06, |
|
"loss": 0.5804, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.3452380952380953, |
|
"grad_norm": 0.5241811747282762, |
|
"learning_rate": 5.849286545832211e-06, |
|
"loss": 0.576, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.3476190476190477, |
|
"grad_norm": 0.5163613845049234, |
|
"learning_rate": 5.811515799716754e-06, |
|
"loss": 0.585, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.5236385208944061, |
|
"learning_rate": 5.773817382593008e-06, |
|
"loss": 0.6006, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.3523809523809525, |
|
"grad_norm": 0.532779005684654, |
|
"learning_rate": 5.736191945457463e-06, |
|
"loss": 0.5946, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.3547619047619048, |
|
"grad_norm": 0.5259472612299654, |
|
"learning_rate": 5.698640138046349e-06, |
|
"loss": 0.5974, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.3571428571428572, |
|
"grad_norm": 0.5109032533795757, |
|
"learning_rate": 5.66116260882442e-06, |
|
"loss": 0.5936, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.3595238095238096, |
|
"grad_norm": 0.5122818131471542, |
|
"learning_rate": 5.623760004973749e-06, |
|
"loss": 0.5795, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.361904761904762, |
|
"grad_norm": 0.4768696514947288, |
|
"learning_rate": 5.586432972382561e-06, |
|
"loss": 0.5758, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.3642857142857143, |
|
"grad_norm": 0.53080481784541, |
|
"learning_rate": 5.549182155634076e-06, |
|
"loss": 0.5767, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.3666666666666667, |
|
"grad_norm": 0.5038341903214718, |
|
"learning_rate": 5.512008197995379e-06, |
|
"loss": 0.5933, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.369047619047619, |
|
"grad_norm": 0.5229825704537906, |
|
"learning_rate": 5.47491174140631e-06, |
|
"loss": 0.5824, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.3714285714285714, |
|
"grad_norm": 0.49627233889726186, |
|
"learning_rate": 5.43789342646837e-06, |
|
"loss": 0.5988, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.3738095238095238, |
|
"grad_norm": 0.514904186125751, |
|
"learning_rate": 5.4009538924336864e-06, |
|
"loss": 0.5876, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.3761904761904762, |
|
"grad_norm": 0.5509458010595618, |
|
"learning_rate": 5.364093777193944e-06, |
|
"loss": 0.6045, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.3785714285714286, |
|
"grad_norm": 0.5039270525726298, |
|
"learning_rate": 5.32731371726938e-06, |
|
"loss": 0.585, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.380952380952381, |
|
"grad_norm": 0.5437109056015712, |
|
"learning_rate": 5.290614347797802e-06, |
|
"loss": 0.5924, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.3833333333333333, |
|
"grad_norm": 0.5280438662821184, |
|
"learning_rate": 5.253996302523596e-06, |
|
"loss": 0.5808, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.3857142857142857, |
|
"grad_norm": 0.5411985643744547, |
|
"learning_rate": 5.217460213786822e-06, |
|
"loss": 0.5909, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.388095238095238, |
|
"grad_norm": 0.4928419336681969, |
|
"learning_rate": 5.181006712512245e-06, |
|
"loss": 0.5969, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.3904761904761904, |
|
"grad_norm": 0.49801415174834524, |
|
"learning_rate": 5.144636428198477e-06, |
|
"loss": 0.5869, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.3928571428571428, |
|
"grad_norm": 0.5060212093373364, |
|
"learning_rate": 5.108349988907111e-06, |
|
"loss": 0.5827, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.3952380952380952, |
|
"grad_norm": 0.4959605026950924, |
|
"learning_rate": 5.072148021251822e-06, |
|
"loss": 0.5904, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.3976190476190475, |
|
"grad_norm": 0.5256166273733636, |
|
"learning_rate": 5.036031150387624e-06, |
|
"loss": 0.582, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.5644237371840894, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.5876, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.4023809523809523, |
|
"grad_norm": 0.5544605233225762, |
|
"learning_rate": 4.964055192294187e-06, |
|
"loss": 0.5736, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.4047619047619047, |
|
"grad_norm": 0.5111614229837703, |
|
"learning_rate": 4.92819734798441e-06, |
|
"loss": 0.5872, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.407142857142857, |
|
"grad_norm": 0.4853589383206433, |
|
"learning_rate": 4.892427086283147e-06, |
|
"loss": 0.5899, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.4095238095238094, |
|
"grad_norm": 0.5024366384928733, |
|
"learning_rate": 4.856745024890466e-06, |
|
"loss": 0.5739, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4119047619047618, |
|
"grad_norm": 0.5165017541670609, |
|
"learning_rate": 4.821151779983343e-06, |
|
"loss": 0.5889, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.4142857142857144, |
|
"grad_norm": 0.5326057098439853, |
|
"learning_rate": 4.78564796620502e-06, |
|
"loss": 0.5984, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.4166666666666667, |
|
"grad_norm": 0.5578793439842268, |
|
"learning_rate": 4.7502341966544e-06, |
|
"loss": 0.5919, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.4190476190476191, |
|
"grad_norm": 0.5266644847974143, |
|
"learning_rate": 4.714911082875446e-06, |
|
"loss": 0.5811, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.4214285714285715, |
|
"grad_norm": 0.5405356548584016, |
|
"learning_rate": 4.679679234846636e-06, |
|
"loss": 0.5852, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.4238095238095239, |
|
"grad_norm": 0.5332823348361332, |
|
"learning_rate": 4.644539260970417e-06, |
|
"loss": 0.5904, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.4261904761904762, |
|
"grad_norm": 0.5289204371473316, |
|
"learning_rate": 4.609491768062705e-06, |
|
"loss": 0.5715, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.49718290349750854, |
|
"learning_rate": 4.5745373613424075e-06, |
|
"loss": 0.5875, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.430952380952381, |
|
"grad_norm": 0.5370577064296572, |
|
"learning_rate": 4.539676644420966e-06, |
|
"loss": 0.5838, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.4333333333333333, |
|
"grad_norm": 0.5318499526470056, |
|
"learning_rate": 4.504910219291941e-06, |
|
"loss": 0.6081, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.4357142857142857, |
|
"grad_norm": 0.5051255352622448, |
|
"learning_rate": 4.470238686320606e-06, |
|
"loss": 0.5796, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.438095238095238, |
|
"grad_norm": 0.5318507665411399, |
|
"learning_rate": 4.435662644233594e-06, |
|
"loss": 0.5763, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.4404761904761905, |
|
"grad_norm": 0.5337465779813616, |
|
"learning_rate": 4.4011826901085346e-06, |
|
"loss": 0.5856, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.4428571428571428, |
|
"grad_norm": 0.5165852420124566, |
|
"learning_rate": 4.3667994193637794e-06, |
|
"loss": 0.5879, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.4452380952380952, |
|
"grad_norm": 0.5155869722101444, |
|
"learning_rate": 4.3325134257480905e-06, |
|
"loss": 0.5831, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.4476190476190476, |
|
"grad_norm": 0.4775386551110262, |
|
"learning_rate": 4.298325301330383e-06, |
|
"loss": 0.5866, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.5054313057714112, |
|
"learning_rate": 4.264235636489542e-06, |
|
"loss": 0.5892, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.4523809523809523, |
|
"grad_norm": 0.527424288981721, |
|
"learning_rate": 4.23024501990417e-06, |
|
"loss": 0.581, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.4547619047619047, |
|
"grad_norm": 0.5089540127795982, |
|
"learning_rate": 4.196354038542476e-06, |
|
"loss": 0.5892, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.457142857142857, |
|
"grad_norm": 0.5207863838757609, |
|
"learning_rate": 4.162563277652104e-06, |
|
"loss": 0.5865, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.4595238095238094, |
|
"grad_norm": 0.5075766650861325, |
|
"learning_rate": 4.128873320750027e-06, |
|
"loss": 0.5756, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.461904761904762, |
|
"grad_norm": 0.5025228767994807, |
|
"learning_rate": 4.095284749612504e-06, |
|
"loss": 0.5872, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.4642857142857144, |
|
"grad_norm": 0.49216468636258304, |
|
"learning_rate": 4.061798144264986e-06, |
|
"loss": 0.5724, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.4666666666666668, |
|
"grad_norm": 0.5053765390136462, |
|
"learning_rate": 4.028414082972141e-06, |
|
"loss": 0.5725, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.4690476190476192, |
|
"grad_norm": 0.5698025824976778, |
|
"learning_rate": 3.995133142227843e-06, |
|
"loss": 0.5817, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.4714285714285715, |
|
"grad_norm": 0.510204796748791, |
|
"learning_rate": 3.961955896745224e-06, |
|
"loss": 0.5777, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.473809523809524, |
|
"grad_norm": 0.5091694662046772, |
|
"learning_rate": 3.928882919446767e-06, |
|
"loss": 0.5857, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.4761904761904763, |
|
"grad_norm": 0.5151359736079936, |
|
"learning_rate": 3.89591478145437e-06, |
|
"loss": 0.5801, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.4785714285714286, |
|
"grad_norm": 0.5182734669196073, |
|
"learning_rate": 3.8630520520795275e-06, |
|
"loss": 0.5693, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.480952380952381, |
|
"grad_norm": 0.5129439390544596, |
|
"learning_rate": 3.830295298813475e-06, |
|
"loss": 0.586, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.4833333333333334, |
|
"grad_norm": 0.5576172854485092, |
|
"learning_rate": 3.797645087317401e-06, |
|
"loss": 0.5753, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.4857142857142858, |
|
"grad_norm": 0.5067485290590222, |
|
"learning_rate": 3.7651019814126656e-06, |
|
"loss": 0.5958, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 0.5319104622052202, |
|
"learning_rate": 3.7326665430710798e-06, |
|
"loss": 0.5868, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.4904761904761905, |
|
"grad_norm": 0.49375049435832247, |
|
"learning_rate": 3.7003393324051874e-06, |
|
"loss": 0.5757, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.4928571428571429, |
|
"grad_norm": 0.5074776283156045, |
|
"learning_rate": 3.6681209076586035e-06, |
|
"loss": 0.5845, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.4952380952380953, |
|
"grad_norm": 0.5065655047968505, |
|
"learning_rate": 3.636011825196365e-06, |
|
"loss": 0.5836, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.4976190476190476, |
|
"grad_norm": 0.517677275546858, |
|
"learning_rate": 3.6040126394953334e-06, |
|
"loss": 0.5794, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.4918393882322778, |
|
"learning_rate": 3.5721239031346067e-06, |
|
"loss": 0.5894, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5023809523809524, |
|
"grad_norm": 0.5004981541944984, |
|
"learning_rate": 3.540346166785994e-06, |
|
"loss": 0.5731, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.5047619047619047, |
|
"grad_norm": 0.5117157105186525, |
|
"learning_rate": 3.5086799792044812e-06, |
|
"loss": 0.5856, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.5071428571428571, |
|
"grad_norm": 0.5239013800449769, |
|
"learning_rate": 3.4771258872187917e-06, |
|
"loss": 0.5784, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.5095238095238095, |
|
"grad_norm": 0.499421090386738, |
|
"learning_rate": 3.4456844357218977e-06, |
|
"loss": 0.5853, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5119047619047619, |
|
"grad_norm": 0.49970944259407496, |
|
"learning_rate": 3.414356167661658e-06, |
|
"loss": 0.57, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.5142857142857142, |
|
"grad_norm": 0.5258093843059876, |
|
"learning_rate": 3.3831416240314085e-06, |
|
"loss": 0.5761, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.5166666666666666, |
|
"grad_norm": 0.5244353245311157, |
|
"learning_rate": 3.3520413438606215e-06, |
|
"loss": 0.5882, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.519047619047619, |
|
"grad_norm": 0.4977521350042896, |
|
"learning_rate": 3.3210558642056277e-06, |
|
"loss": 0.5901, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.5214285714285714, |
|
"grad_norm": 0.5122476763468953, |
|
"learning_rate": 3.290185720140301e-06, |
|
"loss": 0.5811, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.5238095238095237, |
|
"grad_norm": 0.49340352912131785, |
|
"learning_rate": 3.2594314447468457e-06, |
|
"loss": 0.5754, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.526190476190476, |
|
"grad_norm": 0.4874919671111681, |
|
"learning_rate": 3.228793569106594e-06, |
|
"loss": 0.5841, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.5285714285714285, |
|
"grad_norm": 0.5324961393719349, |
|
"learning_rate": 3.1982726222908046e-06, |
|
"loss": 0.5727, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.5309523809523808, |
|
"grad_norm": 0.4995381050758276, |
|
"learning_rate": 3.1678691313515688e-06, |
|
"loss": 0.579, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 0.49736016607701533, |
|
"learning_rate": 3.1375836213126653e-06, |
|
"loss": 0.5847, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.5357142857142856, |
|
"grad_norm": 0.549492541961606, |
|
"learning_rate": 3.10741661516053e-06, |
|
"loss": 0.5814, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.538095238095238, |
|
"grad_norm": 0.49389981419744866, |
|
"learning_rate": 3.077368633835205e-06, |
|
"loss": 0.5737, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.5404761904761903, |
|
"grad_norm": 0.5564195244743101, |
|
"learning_rate": 3.0474401962213483e-06, |
|
"loss": 0.5769, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.5428571428571427, |
|
"grad_norm": 0.5123987478065914, |
|
"learning_rate": 3.017631819139273e-06, |
|
"loss": 0.591, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.545238095238095, |
|
"grad_norm": 0.5012262974081656, |
|
"learning_rate": 2.987944017336023e-06, |
|
"loss": 0.5888, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.5476190476190477, |
|
"grad_norm": 0.5152768019089213, |
|
"learning_rate": 2.958377303476483e-06, |
|
"loss": 0.5702, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.5066389868980488, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.5868, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.5523809523809524, |
|
"grad_norm": 0.5058336453434157, |
|
"learning_rate": 2.8996091797841976e-06, |
|
"loss": 0.5702, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.5547619047619048, |
|
"grad_norm": 0.4984793009615145, |
|
"learning_rate": 2.8704087847909333e-06, |
|
"loss": 0.5662, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.5571428571428572, |
|
"grad_norm": 0.5108314631589933, |
|
"learning_rate": 2.8413315074028157e-06, |
|
"loss": 0.5707, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.5595238095238095, |
|
"grad_norm": 0.5031743279940161, |
|
"learning_rate": 2.8123778497418687e-06, |
|
"loss": 0.5872, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.561904761904762, |
|
"grad_norm": 0.5140205415498605, |
|
"learning_rate": 2.783548311795379e-06, |
|
"loss": 0.5814, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.5642857142857143, |
|
"grad_norm": 0.4951045700618797, |
|
"learning_rate": 2.7548433914072736e-06, |
|
"loss": 0.5688, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.5666666666666667, |
|
"grad_norm": 0.4809510784828599, |
|
"learning_rate": 2.726263584269513e-06, |
|
"loss": 0.5822, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.569047619047619, |
|
"grad_norm": 0.5144657250346224, |
|
"learning_rate": 2.6978093839135365e-06, |
|
"loss": 0.5924, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.5714285714285714, |
|
"grad_norm": 0.5069069605136209, |
|
"learning_rate": 2.669481281701739e-06, |
|
"loss": 0.5808, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.5738095238095238, |
|
"grad_norm": 0.5080243059955405, |
|
"learning_rate": 2.641279766818977e-06, |
|
"loss": 0.5826, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.5761904761904761, |
|
"grad_norm": 0.5233910641374417, |
|
"learning_rate": 2.6132053262641467e-06, |
|
"loss": 0.5862, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.5785714285714287, |
|
"grad_norm": 0.5114261441752712, |
|
"learning_rate": 2.5852584448417327e-06, |
|
"loss": 0.582, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.580952380952381, |
|
"grad_norm": 0.5249361251608791, |
|
"learning_rate": 2.5574396051534835e-06, |
|
"loss": 0.5812, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.5833333333333335, |
|
"grad_norm": 0.5014730717082388, |
|
"learning_rate": 2.529749287590042e-06, |
|
"loss": 0.5836, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.5857142857142859, |
|
"grad_norm": 0.5031355239363027, |
|
"learning_rate": 2.502187970322657e-06, |
|
"loss": 0.5713, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.5880952380952382, |
|
"grad_norm": 0.5267100809447453, |
|
"learning_rate": 2.4747561292949496e-06, |
|
"loss": 0.5871, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.5904761904761906, |
|
"grad_norm": 0.5066033968112823, |
|
"learning_rate": 2.447454238214654e-06, |
|
"loss": 0.5777, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.592857142857143, |
|
"grad_norm": 0.5249173659213104, |
|
"learning_rate": 2.420282768545469e-06, |
|
"loss": 0.5932, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.5952380952380953, |
|
"grad_norm": 0.48114876503065274, |
|
"learning_rate": 2.3932421894989167e-06, |
|
"loss": 0.5733, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.5976190476190477, |
|
"grad_norm": 0.48457566365078014, |
|
"learning_rate": 2.366332968026207e-06, |
|
"loss": 0.581, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.49253983378100513, |
|
"learning_rate": 2.339555568810221e-06, |
|
"loss": 0.569, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.6023809523809525, |
|
"grad_norm": 0.521391190119619, |
|
"learning_rate": 2.3129104542574433e-06, |
|
"loss": 0.575, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.6047619047619048, |
|
"grad_norm": 0.4986688245603456, |
|
"learning_rate": 2.2863980844900036e-06, |
|
"loss": 0.5853, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.6071428571428572, |
|
"grad_norm": 0.5270951766715323, |
|
"learning_rate": 2.2600189173377263e-06, |
|
"loss": 0.5898, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.6095238095238096, |
|
"grad_norm": 0.46626058041434404, |
|
"learning_rate": 2.2337734083302164e-06, |
|
"loss": 0.584, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.611904761904762, |
|
"grad_norm": 0.5313981173035822, |
|
"learning_rate": 2.207662010689002e-06, |
|
"loss": 0.5822, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.6142857142857143, |
|
"grad_norm": 0.5153471281514648, |
|
"learning_rate": 2.1816851753197023e-06, |
|
"loss": 0.5797, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.6166666666666667, |
|
"grad_norm": 0.5000763124278017, |
|
"learning_rate": 2.155843350804243e-06, |
|
"loss": 0.567, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.619047619047619, |
|
"grad_norm": 0.49402952120811466, |
|
"learning_rate": 2.130136983393112e-06, |
|
"loss": 0.5697, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.6214285714285714, |
|
"grad_norm": 0.48898166681541133, |
|
"learning_rate": 2.104566516997647e-06, |
|
"loss": 0.5779, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.6238095238095238, |
|
"grad_norm": 0.5391774389924573, |
|
"learning_rate": 2.0791323931823783e-06, |
|
"loss": 0.5914, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.6261904761904762, |
|
"grad_norm": 0.5080483298837424, |
|
"learning_rate": 2.053835051157397e-06, |
|
"loss": 0.5817, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.6285714285714286, |
|
"grad_norm": 0.49302417797140397, |
|
"learning_rate": 2.0286749277707783e-06, |
|
"loss": 0.5647, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.630952380952381, |
|
"grad_norm": 0.5241769643468105, |
|
"learning_rate": 2.0036524575010176e-06, |
|
"loss": 0.5853, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.6333333333333333, |
|
"grad_norm": 0.5265692131855949, |
|
"learning_rate": 1.9787680724495617e-06, |
|
"loss": 0.5817, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.6357142857142857, |
|
"grad_norm": 0.4902606126508354, |
|
"learning_rate": 1.9540222023333165e-06, |
|
"loss": 0.5807, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.638095238095238, |
|
"grad_norm": 0.5237480696892753, |
|
"learning_rate": 1.929415274477239e-06, |
|
"loss": 0.5701, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.6404761904761904, |
|
"grad_norm": 0.5357308687997984, |
|
"learning_rate": 1.9049477138069606e-06, |
|
"loss": 0.5804, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.6428571428571428, |
|
"grad_norm": 0.5263899257687392, |
|
"learning_rate": 1.880619942841435e-06, |
|
"loss": 0.5883, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.6452380952380952, |
|
"grad_norm": 0.5115574850941745, |
|
"learning_rate": 1.856432381685669e-06, |
|
"loss": 0.59, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.6476190476190475, |
|
"grad_norm": 0.5173104409710929, |
|
"learning_rate": 1.8323854480234348e-06, |
|
"loss": 0.5805, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 0.5011073792449935, |
|
"learning_rate": 1.808479557110081e-06, |
|
"loss": 0.5738, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.6523809523809523, |
|
"grad_norm": 0.4933932876019805, |
|
"learning_rate": 1.7847151217653624e-06, |
|
"loss": 0.5722, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.6547619047619047, |
|
"grad_norm": 0.5181601753202433, |
|
"learning_rate": 1.7610925523662836e-06, |
|
"loss": 0.5673, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.657142857142857, |
|
"grad_norm": 0.5054495843864125, |
|
"learning_rate": 1.7376122568400533e-06, |
|
"loss": 0.5771, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.6595238095238094, |
|
"grad_norm": 0.5277775744809468, |
|
"learning_rate": 1.714274640657001e-06, |
|
"loss": 0.5747, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.6619047619047618, |
|
"grad_norm": 0.48735398585218814, |
|
"learning_rate": 1.6910801068236015e-06, |
|
"loss": 0.5847, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.6642857142857141, |
|
"grad_norm": 0.5157125695595391, |
|
"learning_rate": 1.6680290558755119e-06, |
|
"loss": 0.5828, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.5089727599844109, |
|
"learning_rate": 1.6451218858706374e-06, |
|
"loss": 0.591, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.669047619047619, |
|
"grad_norm": 0.5026463229681872, |
|
"learning_rate": 1.6223589923822768e-06, |
|
"loss": 0.5817, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.6714285714285713, |
|
"grad_norm": 0.4931313873291854, |
|
"learning_rate": 1.599740768492286e-06, |
|
"loss": 0.5713, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.6738095238095239, |
|
"grad_norm": 0.5181473464694085, |
|
"learning_rate": 1.5772676047842862e-06, |
|
"loss": 0.5759, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.6761904761904762, |
|
"grad_norm": 0.4927232886574381, |
|
"learning_rate": 1.5549398893369216e-06, |
|
"loss": 0.5817, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.6785714285714286, |
|
"grad_norm": 0.5220587931111694, |
|
"learning_rate": 1.5327580077171589e-06, |
|
"loss": 0.5782, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.680952380952381, |
|
"grad_norm": 0.5333713690182812, |
|
"learning_rate": 1.5107223429736273e-06, |
|
"loss": 0.5883, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.6833333333333333, |
|
"grad_norm": 0.5150308121762561, |
|
"learning_rate": 1.4888332756300027e-06, |
|
"loss": 0.586, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.6857142857142857, |
|
"grad_norm": 0.4978412003341323, |
|
"learning_rate": 1.467091183678444e-06, |
|
"loss": 0.5872, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.688095238095238, |
|
"grad_norm": 0.4880778638366708, |
|
"learning_rate": 1.4454964425730533e-06, |
|
"loss": 0.5708, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.6904761904761905, |
|
"grad_norm": 0.5516107606480507, |
|
"learning_rate": 1.424049425223405e-06, |
|
"loss": 0.567, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.6928571428571428, |
|
"grad_norm": 0.5693622851862525, |
|
"learning_rate": 1.4027505019880972e-06, |
|
"loss": 0.5776, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.6952380952380952, |
|
"grad_norm": 0.5070794770825253, |
|
"learning_rate": 1.3816000406683604e-06, |
|
"loss": 0.5704, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.6976190476190476, |
|
"grad_norm": 0.49091161185672627, |
|
"learning_rate": 1.3605984065017074e-06, |
|
"loss": 0.5645, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.519545490092357, |
|
"learning_rate": 1.339745962155613e-06, |
|
"loss": 0.5866, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.7023809523809523, |
|
"grad_norm": 0.5212093423730666, |
|
"learning_rate": 1.3190430677212795e-06, |
|
"loss": 0.5864, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.704761904761905, |
|
"grad_norm": 0.531691412658331, |
|
"learning_rate": 1.2984900807073919e-06, |
|
"loss": 0.5789, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.7071428571428573, |
|
"grad_norm": 0.5456532742307001, |
|
"learning_rate": 1.278087356033947e-06, |
|
"loss": 0.5722, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.7095238095238097, |
|
"grad_norm": 0.5205925476159533, |
|
"learning_rate": 1.2578352460261456e-06, |
|
"loss": 0.5778, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.711904761904762, |
|
"grad_norm": 0.5062569568474926, |
|
"learning_rate": 1.2377341004082778e-06, |
|
"loss": 0.5741, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.5175248723743612, |
|
"learning_rate": 1.2177842662977136e-06, |
|
"loss": 0.5785, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7166666666666668, |
|
"grad_norm": 0.5159262102883804, |
|
"learning_rate": 1.1979860881988903e-06, |
|
"loss": 0.5754, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.7190476190476192, |
|
"grad_norm": 0.5083274127105409, |
|
"learning_rate": 1.1783399079973578e-06, |
|
"loss": 0.5842, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.7214285714285715, |
|
"grad_norm": 0.5120999260869891, |
|
"learning_rate": 1.1588460649539036e-06, |
|
"loss": 0.5734, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.723809523809524, |
|
"grad_norm": 0.49029779483860925, |
|
"learning_rate": 1.1395048956986577e-06, |
|
"loss": 0.5899, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.7261904761904763, |
|
"grad_norm": 0.5065004286184338, |
|
"learning_rate": 1.1203167342253063e-06, |
|
"loss": 0.5728, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.7285714285714286, |
|
"grad_norm": 0.5162711801738049, |
|
"learning_rate": 1.1012819118853147e-06, |
|
"loss": 0.57, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.730952380952381, |
|
"grad_norm": 0.512398683483683, |
|
"learning_rate": 1.0824007573822025e-06, |
|
"loss": 0.573, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.7333333333333334, |
|
"grad_norm": 0.5060773003944762, |
|
"learning_rate": 1.0636735967658785e-06, |
|
"loss": 0.5758, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.7357142857142858, |
|
"grad_norm": 0.5719110243417397, |
|
"learning_rate": 1.0451007534269908e-06, |
|
"loss": 0.5706, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.7380952380952381, |
|
"grad_norm": 0.5159140197103158, |
|
"learning_rate": 1.026682548091361e-06, |
|
"loss": 0.5787, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.7404761904761905, |
|
"grad_norm": 0.5756689272489154, |
|
"learning_rate": 1.0084192988144392e-06, |
|
"loss": 0.589, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.7428571428571429, |
|
"grad_norm": 0.4987087820127243, |
|
"learning_rate": 9.903113209758098e-07, |
|
"loss": 0.5713, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.7452380952380953, |
|
"grad_norm": 0.49383352905457956, |
|
"learning_rate": 9.723589272737443e-07, |
|
"loss": 0.5692, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.7476190476190476, |
|
"grad_norm": 0.5341671518005138, |
|
"learning_rate": 9.545624277198085e-07, |
|
"loss": 0.5653, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.5264048094976569, |
|
"learning_rate": 9.369221296335007e-07, |
|
"loss": 0.5716, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.7523809523809524, |
|
"grad_norm": 0.49117865624067225, |
|
"learning_rate": 9.194383376369509e-07, |
|
"loss": 0.5755, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.7547619047619047, |
|
"grad_norm": 0.4802311797210391, |
|
"learning_rate": 9.021113536496551e-07, |
|
"loss": 0.5796, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.7571428571428571, |
|
"grad_norm": 0.5170954646484791, |
|
"learning_rate": 8.849414768832687e-07, |
|
"loss": 0.5575, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.7595238095238095, |
|
"grad_norm": 0.502792596764845, |
|
"learning_rate": 8.679290038364319e-07, |
|
"loss": 0.5833, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.7619047619047619, |
|
"grad_norm": 0.5049868760014847, |
|
"learning_rate": 8.510742282896545e-07, |
|
"loss": 0.577, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.7642857142857142, |
|
"grad_norm": 0.5037269478659716, |
|
"learning_rate": 8.343774413002382e-07, |
|
"loss": 0.5857, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.7666666666666666, |
|
"grad_norm": 0.531756805530802, |
|
"learning_rate": 8.178389311972612e-07, |
|
"loss": 0.5706, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.769047619047619, |
|
"grad_norm": 0.5354038647559776, |
|
"learning_rate": 8.014589835765807e-07, |
|
"loss": 0.5793, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.7714285714285714, |
|
"grad_norm": 0.5165889882026443, |
|
"learning_rate": 7.852378812959227e-07, |
|
"loss": 0.565, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.7738095238095237, |
|
"grad_norm": 0.5188950618246705, |
|
"learning_rate": 7.69175904469982e-07, |
|
"loss": 0.5681, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.776190476190476, |
|
"grad_norm": 0.5219125713505919, |
|
"learning_rate": 7.532733304655848e-07, |
|
"loss": 0.5823, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.7785714285714285, |
|
"grad_norm": 0.4842042459830578, |
|
"learning_rate": 7.375304338969135e-07, |
|
"loss": 0.5754, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.7809523809523808, |
|
"grad_norm": 0.5028634955697662, |
|
"learning_rate": 7.219474866207465e-07, |
|
"loss": 0.5655, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.7833333333333332, |
|
"grad_norm": 0.48467803772894236, |
|
"learning_rate": 7.065247577317747e-07, |
|
"loss": 0.5725, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 0.4918168156661026, |
|
"learning_rate": 6.912625135579587e-07, |
|
"loss": 0.5722, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.788095238095238, |
|
"grad_norm": 0.48464787354503214, |
|
"learning_rate": 6.761610176559086e-07, |
|
"loss": 0.5589, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.7904761904761903, |
|
"grad_norm": 0.5182220123762346, |
|
"learning_rate": 6.612205308063646e-07, |
|
"loss": 0.5684, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.7928571428571427, |
|
"grad_norm": 0.48991260111946716, |
|
"learning_rate": 6.464413110096601e-07, |
|
"loss": 0.5778, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.795238095238095, |
|
"grad_norm": 0.4667646370432281, |
|
"learning_rate": 6.318236134812917e-07, |
|
"loss": 0.5783, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.7976190476190477, |
|
"grad_norm": 0.5127849630517298, |
|
"learning_rate": 6.173676906475012e-07, |
|
"loss": 0.5793, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.5479174273289018, |
|
"learning_rate": 6.030737921409169e-07, |
|
"loss": 0.5789, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.8023809523809524, |
|
"grad_norm": 0.5027002463972741, |
|
"learning_rate": 5.889421647962456e-07, |
|
"loss": 0.5844, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.8047619047619048, |
|
"grad_norm": 0.47085642019543783, |
|
"learning_rate": 5.749730526460073e-07, |
|
"loss": 0.5675, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.8071428571428572, |
|
"grad_norm": 0.5072363123110816, |
|
"learning_rate": 5.611666969163243e-07, |
|
"loss": 0.5775, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.8095238095238095, |
|
"grad_norm": 0.4957008376046064, |
|
"learning_rate": 5.475233360227516e-07, |
|
"loss": 0.5811, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.811904761904762, |
|
"grad_norm": 0.5212409865496715, |
|
"learning_rate": 5.340432055661637e-07, |
|
"loss": 0.5761, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.8142857142857143, |
|
"grad_norm": 0.470997569919902, |
|
"learning_rate": 5.207265383286831e-07, |
|
"loss": 0.5727, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.8166666666666667, |
|
"grad_norm": 0.5180739672868397, |
|
"learning_rate": 5.075735642696611e-07, |
|
"loss": 0.5655, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.819047619047619, |
|
"grad_norm": 0.4888611854375848, |
|
"learning_rate": 4.945845105217118e-07, |
|
"loss": 0.5642, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.8214285714285714, |
|
"grad_norm": 0.5177332953584531, |
|
"learning_rate": 4.817596013867765e-07, |
|
"loss": 0.5845, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.8238095238095238, |
|
"grad_norm": 0.5279903730811234, |
|
"learning_rate": 4.6909905833226965e-07, |
|
"loss": 0.5775, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.8261904761904761, |
|
"grad_norm": 0.5151201728696789, |
|
"learning_rate": 4.566030999872384e-07, |
|
"loss": 0.5863, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.8285714285714287, |
|
"grad_norm": 0.5216946630712812, |
|
"learning_rate": 4.4427194213859216e-07, |
|
"loss": 0.5824, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.830952380952381, |
|
"grad_norm": 0.48997634713435234, |
|
"learning_rate": 4.3210579772738237e-07, |
|
"loss": 0.5633, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.5266753228806161, |
|
"learning_rate": 4.2010487684511105e-07, |
|
"loss": 0.5861, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.8357142857142859, |
|
"grad_norm": 0.4963390005103402, |
|
"learning_rate": 4.082693867301224e-07, |
|
"loss": 0.5866, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 1.8380952380952382, |
|
"grad_norm": 0.5075963559582308, |
|
"learning_rate": 3.965995317640026e-07, |
|
"loss": 0.566, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.8404761904761906, |
|
"grad_norm": 0.5210567644268166, |
|
"learning_rate": 3.850955134680678e-07, |
|
"loss": 0.576, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 1.842857142857143, |
|
"grad_norm": 0.521115702642029, |
|
"learning_rate": 3.7375753049987974e-07, |
|
"loss": 0.5695, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.8452380952380953, |
|
"grad_norm": 0.5060214448757876, |
|
"learning_rate": 3.625857786498055e-07, |
|
"loss": 0.5704, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.8476190476190477, |
|
"grad_norm": 0.4875613223286847, |
|
"learning_rate": 3.515804508376508e-07, |
|
"loss": 0.5697, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.4832852448893152, |
|
"learning_rate": 3.4074173710931804e-07, |
|
"loss": 0.5768, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 1.8523809523809525, |
|
"grad_norm": 0.5331240685153525, |
|
"learning_rate": 3.3006982463352764e-07, |
|
"loss": 0.5737, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.8547619047619048, |
|
"grad_norm": 0.5001592000344391, |
|
"learning_rate": 3.1956489769859213e-07, |
|
"loss": 0.572, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 1.8571428571428572, |
|
"grad_norm": 0.48713909339424555, |
|
"learning_rate": 3.0922713770922155e-07, |
|
"loss": 0.5842, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.8595238095238096, |
|
"grad_norm": 0.505832290948494, |
|
"learning_rate": 2.9905672318339963e-07, |
|
"loss": 0.5794, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 1.861904761904762, |
|
"grad_norm": 0.5001487285158832, |
|
"learning_rate": 2.8905382974930173e-07, |
|
"loss": 0.5797, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.8642857142857143, |
|
"grad_norm": 0.5134685274388742, |
|
"learning_rate": 2.7921863014225504e-07, |
|
"loss": 0.5938, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 1.8666666666666667, |
|
"grad_norm": 0.48901171207964383, |
|
"learning_rate": 2.6955129420176193e-07, |
|
"loss": 0.5804, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.869047619047619, |
|
"grad_norm": 0.4928766933172695, |
|
"learning_rate": 2.6005198886856486e-07, |
|
"loss": 0.5738, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.8714285714285714, |
|
"grad_norm": 0.4929640596310142, |
|
"learning_rate": 2.507208781817638e-07, |
|
"loss": 0.5628, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.8738095238095238, |
|
"grad_norm": 0.5172714992148169, |
|
"learning_rate": 2.4155812327598337e-07, |
|
"loss": 0.5649, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 1.8761904761904762, |
|
"grad_norm": 0.5022968661364299, |
|
"learning_rate": 2.3256388237858806e-07, |
|
"loss": 0.5681, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.8785714285714286, |
|
"grad_norm": 0.5232052126416001, |
|
"learning_rate": 2.2373831080695463e-07, |
|
"loss": 0.5745, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 1.880952380952381, |
|
"grad_norm": 0.488579854673118, |
|
"learning_rate": 2.1508156096578748e-07, |
|
"loss": 0.5729, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.8833333333333333, |
|
"grad_norm": 0.5155005458528391, |
|
"learning_rate": 2.0659378234448524e-07, |
|
"loss": 0.5732, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 1.8857142857142857, |
|
"grad_norm": 0.5024985648134219, |
|
"learning_rate": 1.9827512151456175e-07, |
|
"loss": 0.5627, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.888095238095238, |
|
"grad_norm": 0.5210350946419919, |
|
"learning_rate": 1.9012572212711467e-07, |
|
"loss": 0.5766, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 1.8904761904761904, |
|
"grad_norm": 0.505351224359543, |
|
"learning_rate": 1.82145724910342e-07, |
|
"loss": 0.5787, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.8928571428571428, |
|
"grad_norm": 0.5042030458059477, |
|
"learning_rate": 1.7433526766711727e-07, |
|
"loss": 0.5756, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.8952380952380952, |
|
"grad_norm": 0.5196433352960352, |
|
"learning_rate": 1.6669448527260602e-07, |
|
"loss": 0.5787, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.8976190476190475, |
|
"grad_norm": 0.5042961399463299, |
|
"learning_rate": 1.5922350967193524e-07, |
|
"loss": 0.5828, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.49509327596749514, |
|
"learning_rate": 1.519224698779198e-07, |
|
"loss": 0.5666, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.9023809523809523, |
|
"grad_norm": 0.4993392687429765, |
|
"learning_rate": 1.447914919688298e-07, |
|
"loss": 0.5862, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 0.5302407636844851, |
|
"learning_rate": 1.3783069908621772e-07, |
|
"loss": 0.5711, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.907142857142857, |
|
"grad_norm": 0.49474811360911164, |
|
"learning_rate": 1.3104021143278911e-07, |
|
"loss": 0.5876, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.9095238095238094, |
|
"grad_norm": 0.497576228240754, |
|
"learning_rate": 1.2442014627032318e-07, |
|
"loss": 0.5744, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.9119047619047618, |
|
"grad_norm": 0.4871944438237385, |
|
"learning_rate": 1.1797061791766207e-07, |
|
"loss": 0.5714, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 1.9142857142857141, |
|
"grad_norm": 0.4830245999351171, |
|
"learning_rate": 1.1169173774871478e-07, |
|
"loss": 0.5686, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.9166666666666665, |
|
"grad_norm": 0.5413142563859302, |
|
"learning_rate": 1.055836141905553e-07, |
|
"loss": 0.5888, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.919047619047619, |
|
"grad_norm": 0.46941478343612175, |
|
"learning_rate": 9.964635272153633e-08, |
|
"loss": 0.5786, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.9214285714285713, |
|
"grad_norm": 0.5140729728882855, |
|
"learning_rate": 9.388005586947191e-08, |
|
"loss": 0.565, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 1.9238095238095239, |
|
"grad_norm": 0.4991452996322733, |
|
"learning_rate": 8.82848232098732e-08, |
|
"loss": 0.5708, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.9261904761904762, |
|
"grad_norm": 0.505925510646528, |
|
"learning_rate": 8.286075136421435e-08, |
|
"loss": 0.5814, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 1.9285714285714286, |
|
"grad_norm": 0.5275663420688306, |
|
"learning_rate": 7.760793399827937e-08, |
|
"loss": 0.5635, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.930952380952381, |
|
"grad_norm": 0.5049137853663558, |
|
"learning_rate": 7.25264618205357e-08, |
|
"loss": 0.5547, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 1.9333333333333333, |
|
"grad_norm": 0.4878320213926508, |
|
"learning_rate": 6.761642258056977e-08, |
|
"loss": 0.5695, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.9357142857142857, |
|
"grad_norm": 0.5081138801093147, |
|
"learning_rate": 6.287790106757396e-08, |
|
"loss": 0.5683, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 1.938095238095238, |
|
"grad_norm": 0.4943589564870212, |
|
"learning_rate": 5.831097910887873e-08, |
|
"loss": 0.5658, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.9404761904761905, |
|
"grad_norm": 0.5302217136380152, |
|
"learning_rate": 5.391573556854157e-08, |
|
"loss": 0.5678, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.9428571428571428, |
|
"grad_norm": 0.5038390499507364, |
|
"learning_rate": 4.9692246345985905e-08, |
|
"loss": 0.5702, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.9452380952380952, |
|
"grad_norm": 0.5151211498515915, |
|
"learning_rate": 4.564058437468877e-08, |
|
"loss": 0.5784, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 1.9476190476190476, |
|
"grad_norm": 0.4789745893173457, |
|
"learning_rate": 4.176081962092182e-08, |
|
"loss": 0.5749, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.4997223839030282, |
|
"learning_rate": 3.805301908254455e-08, |
|
"loss": 0.5871, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 1.9523809523809523, |
|
"grad_norm": 0.49934046802623616, |
|
"learning_rate": 3.451724678784518e-08, |
|
"loss": 0.561, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.954761904761905, |
|
"grad_norm": 0.4974229497151692, |
|
"learning_rate": 3.115356379443601e-08, |
|
"loss": 0.5661, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 1.9571428571428573, |
|
"grad_norm": 0.5196515473348364, |
|
"learning_rate": 2.796202818819871e-08, |
|
"loss": 0.5856, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.9595238095238097, |
|
"grad_norm": 0.49970636623212594, |
|
"learning_rate": 2.4942695082281752e-08, |
|
"loss": 0.5797, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 1.961904761904762, |
|
"grad_norm": 0.5173329159698928, |
|
"learning_rate": 2.2095616616150117e-08, |
|
"loss": 0.5747, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.9642857142857144, |
|
"grad_norm": 0.49959590807990745, |
|
"learning_rate": 1.9420841954681525e-08, |
|
"loss": 0.5646, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.9666666666666668, |
|
"grad_norm": 0.48412036634259087, |
|
"learning_rate": 1.6918417287318245e-08, |
|
"loss": 0.5777, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.9690476190476192, |
|
"grad_norm": 0.50340974957607, |
|
"learning_rate": 1.4588385827272178e-08, |
|
"loss": 0.5762, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 1.9714285714285715, |
|
"grad_norm": 0.5095137173840767, |
|
"learning_rate": 1.2430787810776556e-08, |
|
"loss": 0.5779, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.973809523809524, |
|
"grad_norm": 0.5201276330625599, |
|
"learning_rate": 1.0445660496390952e-08, |
|
"loss": 0.5829, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 1.9761904761904763, |
|
"grad_norm": 0.4957082874285413, |
|
"learning_rate": 8.633038164358454e-09, |
|
"loss": 0.5782, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.9785714285714286, |
|
"grad_norm": 0.49005045720787455, |
|
"learning_rate": 6.992952116013918e-09, |
|
"loss": 0.5769, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 1.980952380952381, |
|
"grad_norm": 0.5163548919624951, |
|
"learning_rate": 5.525430673244403e-09, |
|
"loss": 0.574, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.9833333333333334, |
|
"grad_norm": 0.5289724215651535, |
|
"learning_rate": 4.230499177994007e-09, |
|
"loss": 0.5773, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 1.9857142857142858, |
|
"grad_norm": 0.485812450600075, |
|
"learning_rate": 3.1081799918375454e-09, |
|
"loss": 0.5835, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.9880952380952381, |
|
"grad_norm": 0.48465992123370755, |
|
"learning_rate": 2.1584924955819763e-09, |
|
"loss": 0.5851, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.9904761904761905, |
|
"grad_norm": 0.5073868341064405, |
|
"learning_rate": 1.3814530889433298e-09, |
|
"loss": 0.5656, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.9928571428571429, |
|
"grad_norm": 0.4822167121104924, |
|
"learning_rate": 7.770751902513862e-10, |
|
"loss": 0.5766, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 1.9952380952380953, |
|
"grad_norm": 0.5135567153768665, |
|
"learning_rate": 3.4536923623096353e-10, |
|
"loss": 0.5891, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.9976190476190476, |
|
"grad_norm": 0.5058535961206515, |
|
"learning_rate": 8.634268181095806e-11, |
|
"loss": 0.5653, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5344236072743405, |
|
"learning_rate": 0.0, |
|
"loss": 0.56, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.741725742816925, |
|
"eval_runtime": 110.7755, |
|
"eval_samples_per_second": 77.282, |
|
"eval_steps_per_second": 1.21, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 4200, |
|
"total_flos": 879394553856000.0, |
|
"train_loss": 0.6596568237599872, |
|
"train_runtime": 12326.0183, |
|
"train_samples_per_second": 21.801, |
|
"train_steps_per_second": 0.341 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 879394553856000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|