|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 753, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 4.2511, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 4.2595, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"loss": 3.0895, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.8882, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 0.4819, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 0.8914, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"loss": 1.1554, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 0.3562, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.91304347826087e-05, |
|
"loss": 0.4085, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.2655, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.782608695652174e-05, |
|
"loss": 0.281, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.217391304347826e-05, |
|
"loss": 0.4708, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.652173913043478e-05, |
|
"loss": 4.1421, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.086956521739131e-05, |
|
"loss": 2.4328, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.521739130434783e-05, |
|
"loss": 1.7092, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.956521739130436e-05, |
|
"loss": 0.687, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.391304347826086e-05, |
|
"loss": 0.2745, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.82608695652174e-05, |
|
"loss": 0.3214, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.260869565217392e-05, |
|
"loss": 0.4915, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.2447, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.130434782608696e-05, |
|
"loss": 0.2353, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.565217391304348e-05, |
|
"loss": 0.2507, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2238, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.99995369868095e-05, |
|
"loss": 0.2327, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999814795581328e-05, |
|
"loss": 0.5584, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.99958329327369e-05, |
|
"loss": 1.6468, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.999259196045582e-05, |
|
"loss": 5.5723, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.998842509899456e-05, |
|
"loss": 4.1912, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.998333242552556e-05, |
|
"loss": 0.9005, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.997731403436788e-05, |
|
"loss": 0.5078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.997037003698525e-05, |
|
"loss": 0.2464, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.996250056198417e-05, |
|
"loss": 0.258, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.995370575511151e-05, |
|
"loss": 0.2438, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.994398577925169e-05, |
|
"loss": 0.2159, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.993334081442381e-05, |
|
"loss": 0.1959, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.992177105777822e-05, |
|
"loss": 0.2217, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.990927672359294e-05, |
|
"loss": 0.1972, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.989585804326962e-05, |
|
"loss": 0.1947, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.988151526532929e-05, |
|
"loss": 0.2028, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.986624865540778e-05, |
|
"loss": 0.1989, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.985005849625076e-05, |
|
"loss": 0.1963, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.983294508770851e-05, |
|
"loss": 0.1935, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.981490874673039e-05, |
|
"loss": 0.1949, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.979594980735896e-05, |
|
"loss": 0.1979, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.977606862072377e-05, |
|
"loss": 0.1882, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.975526555503488e-05, |
|
"loss": 0.1911, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.973354099557606e-05, |
|
"loss": 0.1914, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.97108953446976e-05, |
|
"loss": 0.1945, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.968732902180891e-05, |
|
"loss": 0.1876, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.966284246337072e-05, |
|
"loss": 0.1893, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.963743612288701e-05, |
|
"loss": 0.1946, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.961111047089662e-05, |
|
"loss": 0.1909, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.95838659949645e-05, |
|
"loss": 0.1959, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.955570319967273e-05, |
|
"loss": 0.1922, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.952662260661115e-05, |
|
"loss": 0.195, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.949662475436765e-05, |
|
"loss": 0.1941, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.946571019851832e-05, |
|
"loss": 0.1941, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.943387951161703e-05, |
|
"loss": 0.189, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.940113328318488e-05, |
|
"loss": 0.193, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.936747211969932e-05, |
|
"loss": 0.1918, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.933289664458284e-05, |
|
"loss": 0.1939, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.929740749819152e-05, |
|
"loss": 0.1912, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.926100533780303e-05, |
|
"loss": 0.195, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.92236908376046e-05, |
|
"loss": 0.1909, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.91854646886805e-05, |
|
"loss": 0.1926, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.91463275989991e-05, |
|
"loss": 0.1911, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.910628029340003e-05, |
|
"loss": 0.1951, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.906532351358047e-05, |
|
"loss": 0.1877, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.902345801808162e-05, |
|
"loss": 0.1894, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.898068458227451e-05, |
|
"loss": 0.19, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.893700399834577e-05, |
|
"loss": 0.1927, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.889241707528285e-05, |
|
"loss": 0.1919, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.88469246388591e-05, |
|
"loss": 0.1906, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.880052753161842e-05, |
|
"loss": 0.1928, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.875322661285975e-05, |
|
"loss": 0.1887, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.870502275862104e-05, |
|
"loss": 0.1919, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.86559168616631e-05, |
|
"loss": 0.1892, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.860590983145307e-05, |
|
"loss": 0.1876, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.855500259414753e-05, |
|
"loss": 0.1946, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.850319609257535e-05, |
|
"loss": 0.1889, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.845049128622032e-05, |
|
"loss": 0.1909, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.839688915120322e-05, |
|
"loss": 0.1914, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.834239068026387e-05, |
|
"loss": 0.1923, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.828699688274275e-05, |
|
"loss": 0.1902, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.823070878456217e-05, |
|
"loss": 0.1919, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.817352742820744e-05, |
|
"loss": 0.1925, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.811545387270743e-05, |
|
"loss": 0.1871, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.805648919361504e-05, |
|
"loss": 0.1922, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.799663448298724e-05, |
|
"loss": 0.1953, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.793589084936483e-05, |
|
"loss": 0.192, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.787425941775197e-05, |
|
"loss": 0.19, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.781174132959529e-05, |
|
"loss": 0.1866, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.774833774276278e-05, |
|
"loss": 0.1906, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.768404983152229e-05, |
|
"loss": 0.1907, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.761887878651987e-05, |
|
"loss": 0.1915, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.1903, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.74858921395716e-05, |
|
"loss": 0.1917, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.741807900060858e-05, |
|
"loss": 0.1908, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.734938765380377e-05, |
|
"loss": 0.1921, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.727981937135713e-05, |
|
"loss": 0.1879, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.18334217369556427, |
|
"eval_runtime": 12.1378, |
|
"eval_samples_per_second": 164.775, |
|
"eval_steps_per_second": 0.659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.720937544170999e-05, |
|
"loss": 0.1908, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.713805716952105e-05, |
|
"loss": 0.1892, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.706586587564237e-05, |
|
"loss": 0.1902, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.699280289709478e-05, |
|
"loss": 0.1905, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.691886958704321e-05, |
|
"loss": 0.1894, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.684406731477158e-05, |
|
"loss": 0.1943, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.676839746565743e-05, |
|
"loss": 0.1883, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.669186144114625e-05, |
|
"loss": 0.1935, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.661446065872568e-05, |
|
"loss": 0.1914, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.653619655189897e-05, |
|
"loss": 0.1877, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.645707057015871e-05, |
|
"loss": 0.1898, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.637708417895983e-05, |
|
"loss": 0.1898, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.62962388596925e-05, |
|
"loss": 0.1922, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.621453610965467e-05, |
|
"loss": 0.1906, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.613197744202438e-05, |
|
"loss": 0.1881, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.604856438583174e-05, |
|
"loss": 0.1894, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.596429848593053e-05, |
|
"loss": 0.1872, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.587918130296969e-05, |
|
"loss": 0.192, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.579321441336436e-05, |
|
"loss": 0.1876, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.57063994092667e-05, |
|
"loss": 0.1914, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.561873789853637e-05, |
|
"loss": 0.1897, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.553023150471082e-05, |
|
"loss": 0.1907, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.544088186697515e-05, |
|
"loss": 0.1905, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.53506906401318e-05, |
|
"loss": 0.1918, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.525965949456988e-05, |
|
"loss": 0.1908, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.516779011623422e-05, |
|
"loss": 0.1898, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.50750842065942e-05, |
|
"loss": 0.1871, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.498154348261216e-05, |
|
"loss": 0.1906, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.48871696767117e-05, |
|
"loss": 0.1905, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.479196453674544e-05, |
|
"loss": 0.1906, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.469592982596285e-05, |
|
"loss": 0.1906, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.459906732297743e-05, |
|
"loss": 0.1929, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.450137882173384e-05, |
|
"loss": 0.1918, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.440286613147467e-05, |
|
"loss": 0.1895, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.430353107670691e-05, |
|
"loss": 0.189, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.420337549716818e-05, |
|
"loss": 0.1942, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.410240124779268e-05, |
|
"loss": 0.192, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.400061019867679e-05, |
|
"loss": 0.1908, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.389800423504442e-05, |
|
"loss": 0.1908, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.379458525721215e-05, |
|
"loss": 0.1892, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.369035518055404e-05, |
|
"loss": 0.1887, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.35853159354661e-05, |
|
"loss": 0.1901, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.347946946733055e-05, |
|
"loss": 0.1908, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.337281773647986e-05, |
|
"loss": 0.1958, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.326536271816031e-05, |
|
"loss": 0.1905, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.315710640249558e-05, |
|
"loss": 0.1932, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.304805079444971e-05, |
|
"loss": 0.1894, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.293819791379016e-05, |
|
"loss": 0.1921, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.282754979505018e-05, |
|
"loss": 0.1927, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.271610848749135e-05, |
|
"loss": 0.1971, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.260387605506547e-05, |
|
"loss": 0.1894, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.249085457637641e-05, |
|
"loss": 0.1908, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.237704614464156e-05, |
|
"loss": 0.189, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.226245286765316e-05, |
|
"loss": 0.1888, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.214707686773914e-05, |
|
"loss": 0.1883, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.203092028172391e-05, |
|
"loss": 0.1918, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.191398526088873e-05, |
|
"loss": 0.1936, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.179627397093185e-05, |
|
"loss": 0.1901, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.16777885919285e-05, |
|
"loss": 0.1891, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.15585313182904e-05, |
|
"loss": 0.1919, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.143850435872518e-05, |
|
"loss": 0.1909, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.131770993619548e-05, |
|
"loss": 0.1936, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.119615028787771e-05, |
|
"loss": 0.1867, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.107382766512071e-05, |
|
"loss": 0.1925, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.095074433340401e-05, |
|
"loss": 0.1902, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.08269025722958e-05, |
|
"loss": 0.1921, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.07023046754109e-05, |
|
"loss": 0.1869, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.057695295036805e-05, |
|
"loss": 0.1901, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.1903, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.032399731604723e-05, |
|
"loss": 0.1933, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.019639809164106e-05, |
|
"loss": 0.1929, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.006805440873383e-05, |
|
"loss": 0.1948, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.993896864431826e-05, |
|
"loss": 0.1917, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.980914318913078e-05, |
|
"loss": 0.1936, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.967858044760737e-05, |
|
"loss": 0.1902, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.954728283783886e-05, |
|
"loss": 0.1893, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.941525279152627e-05, |
|
"loss": 0.1925, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.928249275393572e-05, |
|
"loss": 0.1905, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.914900518385314e-05, |
|
"loss": 0.1912, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.901479255353879e-05, |
|
"loss": 0.1889, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.887985734868135e-05, |
|
"loss": 0.189, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.874420206835203e-05, |
|
"loss": 0.191, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.860782922495822e-05, |
|
"loss": 0.1889, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.847074134419691e-05, |
|
"loss": 0.1869, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.833294096500797e-05, |
|
"loss": 0.1857, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.819443063952713e-05, |
|
"loss": 0.1865, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.805521293303874e-05, |
|
"loss": 0.1917, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.791529042392813e-05, |
|
"loss": 0.1904, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.777466570363401e-05, |
|
"loss": 0.1861, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.763334137660042e-05, |
|
"loss": 0.1886, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.749132006022844e-05, |
|
"loss": 0.1878, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.734860438482777e-05, |
|
"loss": 0.1903, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.720519699356804e-05, |
|
"loss": 0.1881, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.70611005424298e-05, |
|
"loss": 0.1894, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.691631770015533e-05, |
|
"loss": 0.1933, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.677085114819928e-05, |
|
"loss": 0.1882, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.662470358067895e-05, |
|
"loss": 0.1853, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.647787770432439e-05, |
|
"loss": 0.1874, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.633037623842828e-05, |
|
"loss": 0.1896, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.618220191479563e-05, |
|
"loss": 0.1891, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 0.18544606864452362, |
|
"eval_runtime": 11.5227, |
|
"eval_samples_per_second": 173.57, |
|
"eval_steps_per_second": 0.694, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.603335747769306e-05, |
|
"loss": 0.1901, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.58838456837981e-05, |
|
"loss": 0.1894, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.573366930214806e-05, |
|
"loss": 0.1856, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.558283111408873e-05, |
|
"loss": 0.1866, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.5431333913223e-05, |
|
"loss": 0.1885, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.52791805053589e-05, |
|
"loss": 0.1906, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.512637370845785e-05, |
|
"loss": 0.1869, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.497291635258235e-05, |
|
"loss": 0.1894, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.481881127984361e-05, |
|
"loss": 0.19, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.466406134434887e-05, |
|
"loss": 0.1851, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.45086694121486e-05, |
|
"loss": 0.1846, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.435263836118335e-05, |
|
"loss": 0.1905, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.419597108123054e-05, |
|
"loss": 0.1872, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.403867047385081e-05, |
|
"loss": 0.1867, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.388073945233445e-05, |
|
"loss": 0.1859, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.372218094164728e-05, |
|
"loss": 0.189, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.356299787837658e-05, |
|
"loss": 0.1855, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.340319321067668e-05, |
|
"loss": 0.1844, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.324276989821434e-05, |
|
"loss": 0.1915, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.308173091211391e-05, |
|
"loss": 0.1894, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.292007923490245e-05, |
|
"loss": 0.1874, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.275781786045427e-05, |
|
"loss": 0.1887, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.259494979393563e-05, |
|
"loss": 0.1849, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.243147805174908e-05, |
|
"loss": 0.1858, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.226740566147753e-05, |
|
"loss": 0.1859, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.210273566182818e-05, |
|
"loss": 0.1871, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.193747110257637e-05, |
|
"loss": 0.1849, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.177161504450888e-05, |
|
"loss": 0.1886, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.160517055936744e-05, |
|
"loss": 0.1858, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.143814072979173e-05, |
|
"loss": 0.186, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.127052864926232e-05, |
|
"loss": 0.1888, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.110233742204339e-05, |
|
"loss": 0.1852, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.093357016312517e-05, |
|
"loss": 0.1865, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.07642299981664e-05, |
|
"loss": 0.1864, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.059432006343623e-05, |
|
"loss": 0.1816, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.042384350575632e-05, |
|
"loss": 0.1834, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.025280348244246e-05, |
|
"loss": 0.1859, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.008120316124612e-05, |
|
"loss": 0.1831, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.990904572029583e-05, |
|
"loss": 0.1879, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.973633434803817e-05, |
|
"loss": 0.1839, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.956307224317891e-05, |
|
"loss": 0.1888, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 0.1832, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.921490868141843e-05, |
|
"loss": 0.1849, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.904001367269004e-05, |
|
"loss": 0.1815, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.886458082758637e-05, |
|
"loss": 0.1827, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.868861339521624e-05, |
|
"loss": 0.1835, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.851211463458936e-05, |
|
"loss": 0.1868, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.833508781455588e-05, |
|
"loss": 0.1837, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.815753621374594e-05, |
|
"loss": 0.182, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.797946312050883e-05, |
|
"loss": 0.1827, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.780087183285223e-05, |
|
"loss": 0.1911, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.7621765658381e-05, |
|
"loss": 0.1867, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.744214791423596e-05, |
|
"loss": 0.184, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.726202192703256e-05, |
|
"loss": 0.1837, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.708139103279908e-05, |
|
"loss": 0.1864, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.690025857691498e-05, |
|
"loss": 0.1843, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.671862791404896e-05, |
|
"loss": 0.1843, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.653650240809667e-05, |
|
"loss": 0.1816, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.635388543211861e-05, |
|
"loss": 0.1887, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.617078036827752e-05, |
|
"loss": 0.1826, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.59871906077758e-05, |
|
"loss": 0.1876, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.580311955079264e-05, |
|
"loss": 0.1852, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.56185706064212e-05, |
|
"loss": 0.1837, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.543354719260523e-05, |
|
"loss": 0.1813, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.524805273607603e-05, |
|
"loss": 0.1899, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.506209067228878e-05, |
|
"loss": 0.1878, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.487566444535903e-05, |
|
"loss": 0.1852, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.468877750799886e-05, |
|
"loss": 0.1879, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.450143332145296e-05, |
|
"loss": 0.1868, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.43136353554345e-05, |
|
"loss": 0.1851, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.412538708806093e-05, |
|
"loss": 0.1835, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.393669200578943e-05, |
|
"loss": 0.1855, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.374755360335253e-05, |
|
"loss": 0.1827, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.355797538369321e-05, |
|
"loss": 0.1889, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.336796085790013e-05, |
|
"loss": 0.1799, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.317751354514255e-05, |
|
"loss": 0.1863, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.298663697260522e-05, |
|
"loss": 0.1855, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.279533467542294e-05, |
|
"loss": 0.1851, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.260361019661522e-05, |
|
"loss": 0.1851, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.241146708702053e-05, |
|
"loss": 0.1856, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.221890890523068e-05, |
|
"loss": 0.187, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.202593921752475e-05, |
|
"loss": 0.1826, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.18325615978032e-05, |
|
"loss": 0.1866, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.163877962752157e-05, |
|
"loss": 0.1859, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.144459689562418e-05, |
|
"loss": 0.1843, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.12500169984777e-05, |
|
"loss": 0.1833, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.105504353980448e-05, |
|
"loss": 0.1841, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.085968013061584e-05, |
|
"loss": 0.185, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.066393038914521e-05, |
|
"loss": 0.1838, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.046779794078108e-05, |
|
"loss": 0.1852, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.027128641799986e-05, |
|
"loss": 0.1832, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.007439946029865e-05, |
|
"loss": 0.1899, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.98771407141278e-05, |
|
"loss": 0.1841, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.967951383282334e-05, |
|
"loss": 0.1869, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.94815224765394e-05, |
|
"loss": 0.1847, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.928317031218035e-05, |
|
"loss": 0.1848, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.908446101333295e-05, |
|
"loss": 0.1849, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.888539826019825e-05, |
|
"loss": 0.1829, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.868598573952345e-05, |
|
"loss": 0.1849, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.848622714453366e-05, |
|
"loss": 0.1844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.17592795193195343, |
|
"eval_runtime": 11.447, |
|
"eval_samples_per_second": 174.719, |
|
"eval_steps_per_second": 0.699, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.828612617486347e-05, |
|
"loss": 0.1841, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.808568653648838e-05, |
|
"loss": 0.1843, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.78849119416563e-05, |
|
"loss": 0.1872, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.76838061088186e-05, |
|
"loss": 0.1856, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.748237276256143e-05, |
|
"loss": 0.1849, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.728061563353667e-05, |
|
"loss": 0.1817, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.707853845839278e-05, |
|
"loss": 0.1852, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.687614497970566e-05, |
|
"loss": 0.1837, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.667343894590935e-05, |
|
"loss": 0.1832, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.647042411122652e-05, |
|
"loss": 0.1843, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.626710423559901e-05, |
|
"loss": 0.1826, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.606348308461823e-05, |
|
"loss": 0.1848, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.585956442945532e-05, |
|
"loss": 0.1848, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.565535204679134e-05, |
|
"loss": 0.1812, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.186, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.524606123281445e-05, |
|
"loss": 0.1813, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.504099038178338e-05, |
|
"loss": 0.1814, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.483564096367451e-05, |
|
"loss": 0.1849, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.463001678166744e-05, |
|
"loss": 0.1855, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.442412164403045e-05, |
|
"loss": 0.1865, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.42179593640502e-05, |
|
"loss": 0.1828, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.401153375996081e-05, |
|
"loss": 0.1834, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.380484865487347e-05, |
|
"loss": 0.1817, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.359790787670527e-05, |
|
"loss": 0.1855, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.339071525810871e-05, |
|
"loss": 0.1837, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.318327463640037e-05, |
|
"loss": 0.1845, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.297558985348998e-05, |
|
"loss": 0.1817, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.276766475580935e-05, |
|
"loss": 0.1869, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.255950319424098e-05, |
|
"loss": 0.1838, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.235110902404679e-05, |
|
"loss": 0.1851, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.21424861047968e-05, |
|
"loss": 0.184, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.193363830029751e-05, |
|
"loss": 0.1847, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.172456947852049e-05, |
|
"loss": 0.1855, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.151528351153061e-05, |
|
"loss": 0.1854, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.130578427541441e-05, |
|
"loss": 0.183, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.109607565020828e-05, |
|
"loss": 0.1794, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.08861615198266e-05, |
|
"loss": 0.1869, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.067604577198981e-05, |
|
"loss": 0.188, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.046573229815243e-05, |
|
"loss": 0.1831, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.025522499343097e-05, |
|
"loss": 0.1886, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 6.004452775653178e-05, |
|
"loss": 0.1861, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.9833644489678816e-05, |
|
"loss": 0.1817, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.96225790985415e-05, |
|
"loss": 0.1863, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.941133549216221e-05, |
|
"loss": 0.1807, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.919991758288401e-05, |
|
"loss": 0.1846, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.898832928627811e-05, |
|
"loss": 0.1828, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.877657452107142e-05, |
|
"loss": 0.1823, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.856465720907388e-05, |
|
"loss": 0.1843, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.835258127510596e-05, |
|
"loss": 0.183, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.8140350646925845e-05, |
|
"loss": 0.1842, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.7927969255156736e-05, |
|
"loss": 0.1845, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.771544103321407e-05, |
|
"loss": 0.1794, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.7502769917232635e-05, |
|
"loss": 0.1782, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.7289959845993736e-05, |
|
"loss": 0.187, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.7077014760852164e-05, |
|
"loss": 0.1829, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.686393860566324e-05, |
|
"loss": 0.1849, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.66507353267098e-05, |
|
"loss": 0.1848, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.643740887262905e-05, |
|
"loss": 0.1825, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.6223963194339466e-05, |
|
"loss": 0.1856, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.601040224496764e-05, |
|
"loss": 0.183, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.579672997977503e-05, |
|
"loss": 0.1833, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.5582950356084726e-05, |
|
"loss": 0.1821, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.536906733320816e-05, |
|
"loss": 0.1823, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.515508487237174e-05, |
|
"loss": 0.1845, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.494100693664358e-05, |
|
"loss": 0.1825, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.4726837490859964e-05, |
|
"loss": 0.181, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.4512580501552056e-05, |
|
"loss": 0.1839, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.429823993687233e-05, |
|
"loss": 0.1834, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.4083819766521135e-05, |
|
"loss": 0.1843, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.386932396167316e-05, |
|
"loss": 0.1827, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.365475649490388e-05, |
|
"loss": 0.1844, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.3440121340116e-05, |
|
"loss": 0.1854, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.3225422472465824e-05, |
|
"loss": 0.1843, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.3010663868289655e-05, |
|
"loss": 0.1815, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.279584950503017e-05, |
|
"loss": 0.1811, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.2580983361162696e-05, |
|
"loss": 0.1856, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.23660694161216e-05, |
|
"loss": 0.1832, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.215111165022652e-05, |
|
"loss": 0.1826, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.193611404460873e-05, |
|
"loss": 0.1826, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.172108058113729e-05, |
|
"loss": 0.1845, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.1506015242345415e-05, |
|
"loss": 0.1815, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.129092201135666e-05, |
|
"loss": 0.1848, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.1075804871811115e-05, |
|
"loss": 0.1845, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.086066780779174e-05, |
|
"loss": 0.184, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.064551480375046e-05, |
|
"loss": 0.1851, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.0430349844434424e-05, |
|
"loss": 0.187, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.021517691481221e-05, |
|
"loss": 0.1817, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1835, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.978482308518779e-05, |
|
"loss": 0.1827, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.956965015556559e-05, |
|
"loss": 0.1839, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9354485196249554e-05, |
|
"loss": 0.1828, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9139332192208276e-05, |
|
"loss": 0.1843, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.892419512818889e-05, |
|
"loss": 0.187, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.870907798864337e-05, |
|
"loss": 0.1799, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.8493984757654596e-05, |
|
"loss": 0.1842, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.827891941886273e-05, |
|
"loss": 0.183, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.806388595539129e-05, |
|
"loss": 0.1813, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.784888834977347e-05, |
|
"loss": 0.1822, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.763393058387841e-05, |
|
"loss": 0.1836, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.741901663883731e-05, |
|
"loss": 0.1853, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.1743704080581665, |
|
"eval_runtime": 12.7542, |
|
"eval_samples_per_second": 156.811, |
|
"eval_steps_per_second": 0.627, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.720415049496984e-05, |
|
"loss": 0.1851, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.698933613171035e-05, |
|
"loss": 0.1835, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.6774577527534195e-05, |
|
"loss": 0.185, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.655987865988401e-05, |
|
"loss": 0.1858, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.634524350509613e-05, |
|
"loss": 0.1825, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.613067603832685e-05, |
|
"loss": 0.1885, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5916180233478883e-05, |
|
"loss": 0.1833, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5701760063127686e-05, |
|
"loss": 0.1862, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.548741949844795e-05, |
|
"loss": 0.1837, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.527316250914004e-05, |
|
"loss": 0.1873, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.505899306335643e-05, |
|
"loss": 0.1787, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.484491512762827e-05, |
|
"loss": 0.1883, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.463093266679185e-05, |
|
"loss": 0.1806, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.4417049643915286e-05, |
|
"loss": 0.183, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.420327002022498e-05, |
|
"loss": 0.1831, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.398959775503238e-05, |
|
"loss": 0.183, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.377603680566054e-05, |
|
"loss": 0.1782, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.356259112737096e-05, |
|
"loss": 0.1871, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.334926467329021e-05, |
|
"loss": 0.1825, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.313606139433676e-05, |
|
"loss": 0.1838, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.292298523914785e-05, |
|
"loss": 0.1816, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.271004015400627e-05, |
|
"loss": 0.1824, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.249723008276737e-05, |
|
"loss": 0.182, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.228455896678595e-05, |
|
"loss": 0.1817, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.207203074484328e-05, |
|
"loss": 0.1808, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.185964935307417e-05, |
|
"loss": 0.1776, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.164741872489405e-05, |
|
"loss": 0.1831, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.143534279092612e-05, |
|
"loss": 0.18, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.1223425478928594e-05, |
|
"loss": 0.1816, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.1011670713721905e-05, |
|
"loss": 0.1811, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0800082417115995e-05, |
|
"loss": 0.1805, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.05886645078378e-05, |
|
"loss": 0.1814, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0377420901458506e-05, |
|
"loss": 0.1798, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0166355510321195e-05, |
|
"loss": 0.1826, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.995547224346824e-05, |
|
"loss": 0.1818, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.9744775006569057e-05, |
|
"loss": 0.1812, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.953426770184757e-05, |
|
"loss": 0.1824, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.9323954228010196e-05, |
|
"loss": 0.181, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.911383848017341e-05, |
|
"loss": 0.1829, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.890392434979172e-05, |
|
"loss": 0.1849, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.8694215724585594e-05, |
|
"loss": 0.1821, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.848471648846939e-05, |
|
"loss": 0.1822, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.827543052147952e-05, |
|
"loss": 0.1866, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.8066361699702495e-05, |
|
"loss": 0.1826, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.785751389520323e-05, |
|
"loss": 0.1825, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.764889097595322e-05, |
|
"loss": 0.1813, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.744049680575905e-05, |
|
"loss": 0.1817, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.7232335244190656e-05, |
|
"loss": 0.1799, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.702441014651001e-05, |
|
"loss": 0.182, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.6816725363599644e-05, |
|
"loss": 0.18, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.6609284741891295e-05, |
|
"loss": 0.1793, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.640209212329473e-05, |
|
"loss": 0.1801, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.619515134512656e-05, |
|
"loss": 0.1798, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.5988466240039206e-05, |
|
"loss": 0.179, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.578204063594982e-05, |
|
"loss": 0.1827, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.5575878355969566e-05, |
|
"loss": 0.1843, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.536998321833258e-05, |
|
"loss": 0.1821, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.516435903632548e-05, |
|
"loss": 0.182, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.495900961821662e-05, |
|
"loss": 0.1792, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.475393876718555e-05, |
|
"loss": 0.1836, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.1849, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.4344647953208675e-05, |
|
"loss": 0.1823, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.41404355705447e-05, |
|
"loss": 0.1785, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.393651691538178e-05, |
|
"loss": 0.181, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.3732895764401004e-05, |
|
"loss": 0.1817, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.35295758887735e-05, |
|
"loss": 0.1807, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.332656105409066e-05, |
|
"loss": 0.1805, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.312385502029434e-05, |
|
"loss": 0.1839, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.2921461541607225e-05, |
|
"loss": 0.1811, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.271938436646334e-05, |
|
"loss": 0.1829, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.251762723743858e-05, |
|
"loss": 0.1818, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.231619389118144e-05, |
|
"loss": 0.1798, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.2115088058343725e-05, |
|
"loss": 0.1813, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.191431346351164e-05, |
|
"loss": 0.18, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.171387382513654e-05, |
|
"loss": 0.1822, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.151377285546635e-05, |
|
"loss": 0.1823, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.1314014260476553e-05, |
|
"loss": 0.1826, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.111460173980175e-05, |
|
"loss": 0.1785, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.091553898666705e-05, |
|
"loss": 0.1844, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.0716829687819643e-05, |
|
"loss": 0.1809, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.051847752346061e-05, |
|
"loss": 0.182, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.0320486167176664e-05, |
|
"loss": 0.1792, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.0122859285872214e-05, |
|
"loss": 0.183, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.9925600539701347e-05, |
|
"loss": 0.1814, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.972871358200015e-05, |
|
"loss": 0.1817, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.9532202059218933e-05, |
|
"loss": 0.1777, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.9336069610854788e-05, |
|
"loss": 0.1837, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.914031986938417e-05, |
|
"loss": 0.1805, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8944956460195515e-05, |
|
"loss": 0.18, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8749983001522302e-05, |
|
"loss": 0.18, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8555403104375827e-05, |
|
"loss": 0.1798, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8361220372478446e-05, |
|
"loss": 0.1829, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8167438402196805e-05, |
|
"loss": 0.1796, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.7974060782475258e-05, |
|
"loss": 0.1786, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.778109109476934e-05, |
|
"loss": 0.1827, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.7588532912979483e-05, |
|
"loss": 0.1789, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.739638980338479e-05, |
|
"loss": 0.1817, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.720466532457707e-05, |
|
"loss": 0.1815, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.70133630273948e-05, |
|
"loss": 0.1788, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.6822486454857453e-05, |
|
"loss": 0.1802, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.1759687066078186, |
|
"eval_runtime": 12.5156, |
|
"eval_samples_per_second": 159.801, |
|
"eval_steps_per_second": 0.639, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.6632039142099896e-05, |
|
"loss": 0.1804, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.6442024616306804e-05, |
|
"loss": 0.1814, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.62524463966475e-05, |
|
"loss": 0.1806, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.6063307994210584e-05, |
|
"loss": 0.1818, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5874612911939094e-05, |
|
"loss": 0.1796, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5686364644565486e-05, |
|
"loss": 0.1797, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.5498566678547042e-05, |
|
"loss": 0.1803, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.531122249200114e-05, |
|
"loss": 0.1814, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.5124335554640967e-05, |
|
"loss": 0.1796, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.493790932771122e-05, |
|
"loss": 0.181, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4751947263923982e-05, |
|
"loss": 0.1802, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4566452807394785e-05, |
|
"loss": 0.1806, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.438142939357882e-05, |
|
"loss": 0.1833, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4196880449207366e-05, |
|
"loss": 0.1803, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.4012809392224227e-05, |
|
"loss": 0.1813, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.38292196317225e-05, |
|
"loss": 0.1845, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3646114567881393e-05, |
|
"loss": 0.1793, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3463497591903322e-05, |
|
"loss": 0.1806, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.328137208595107e-05, |
|
"loss": 0.1822, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.309974142308502e-05, |
|
"loss": 0.179, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.291860896720094e-05, |
|
"loss": 0.181, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.273797807296744e-05, |
|
"loss": 0.1832, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.2557852085764053e-05, |
|
"loss": 0.1801, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.237823434161902e-05, |
|
"loss": 0.1807, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2199128167147785e-05, |
|
"loss": 0.1812, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2020536879491167e-05, |
|
"loss": 0.1777, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.184246378625407e-05, |
|
"loss": 0.1801, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1664912185444124e-05, |
|
"loss": 0.1759, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1487885365410644e-05, |
|
"loss": 0.184, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.1311386604783763e-05, |
|
"loss": 0.1844, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.113541917241364e-05, |
|
"loss": 0.1787, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0959986327309968e-05, |
|
"loss": 0.1798, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0785091318581577e-05, |
|
"loss": 0.178, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.18, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0436927756821094e-05, |
|
"loss": 0.1792, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0263665651961834e-05, |
|
"loss": 0.1813, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.0090954279704183e-05, |
|
"loss": 0.1829, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.9918796838753863e-05, |
|
"loss": 0.1773, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.974719651755756e-05, |
|
"loss": 0.1797, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.957615649424369e-05, |
|
"loss": 0.1785, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9405679936563786e-05, |
|
"loss": 0.1825, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9235770001833607e-05, |
|
"loss": 0.1785, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.9066429836874844e-05, |
|
"loss": 0.176, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.889766257795663e-05, |
|
"loss": 0.1788, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8729471350737693e-05, |
|
"loss": 0.1807, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.856185927020827e-05, |
|
"loss": 0.1799, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8394829440632565e-05, |
|
"loss": 0.182, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8228384955491135e-05, |
|
"loss": 0.1825, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.806252889742364e-05, |
|
"loss": 0.1755, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.789726433817182e-05, |
|
"loss": 0.1777, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7732594338522495e-05, |
|
"loss": 0.1823, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.756852194825094e-05, |
|
"loss": 0.1793, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7405050206064373e-05, |
|
"loss": 0.1794, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7242182139545744e-05, |
|
"loss": 0.1745, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7079920765097563e-05, |
|
"loss": 0.1771, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.691826908788608e-05, |
|
"loss": 0.1755, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.675723010178568e-05, |
|
"loss": 0.1806, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6596806789323315e-05, |
|
"loss": 0.1818, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6437002121623436e-05, |
|
"loss": 0.1838, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6277819058352727e-05, |
|
"loss": 0.1775, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.611926054766556e-05, |
|
"loss": 0.1781, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.596132952614918e-05, |
|
"loss": 0.1791, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5804028918769485e-05, |
|
"loss": 0.1794, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5647361638816655e-05, |
|
"loss": 0.1817, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.549133058785141e-05, |
|
"loss": 0.1797, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5335938655651122e-05, |
|
"loss": 0.1815, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5181188720156392e-05, |
|
"loss": 0.1824, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5027083647417655e-05, |
|
"loss": 0.1785, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4873626291542148e-05, |
|
"loss": 0.1811, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4720819494641109e-05, |
|
"loss": 0.1794, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4568666086777021e-05, |
|
"loss": 0.183, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4417168885911276e-05, |
|
"loss": 0.1773, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4266330697851954e-05, |
|
"loss": 0.176, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4116154316201907e-05, |
|
"loss": 0.1754, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3966642522306956e-05, |
|
"loss": 0.1771, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.381779808520438e-05, |
|
"loss": 0.1816, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3669623761571726e-05, |
|
"loss": 0.1799, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3522122295675616e-05, |
|
"loss": 0.1761, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.337529641932107e-05, |
|
"loss": 0.1803, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.3229148851800721e-05, |
|
"loss": 0.1811, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.308368229984468e-05, |
|
"loss": 0.1794, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2938899457570208e-05, |
|
"loss": 0.1805, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2794803006431982e-05, |
|
"loss": 0.1784, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.265139561517224e-05, |
|
"loss": 0.1807, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2508679939771583e-05, |
|
"loss": 0.1771, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2366658623399584e-05, |
|
"loss": 0.1801, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2225334296365987e-05, |
|
"loss": 0.1794, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2084709576071884e-05, |
|
"loss": 0.1778, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1944787066961265e-05, |
|
"loss": 0.1818, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1805569360472868e-05, |
|
"loss": 0.181, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.166705903499205e-05, |
|
"loss": 0.1812, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1529258655803116e-05, |
|
"loss": 0.1804, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1392170775041788e-05, |
|
"loss": 0.1799, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1255797931647971e-05, |
|
"loss": 0.1758, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1120142651318665e-05, |
|
"loss": 0.1787, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0985207446461221e-05, |
|
"loss": 0.1805, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0850994816146858e-05, |
|
"loss": 0.1797, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0717507246064273e-05, |
|
"loss": 0.1815, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0584747208473739e-05, |
|
"loss": 0.1799, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0452717162161141e-05, |
|
"loss": 0.1798, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.1740500032901764, |
|
"eval_runtime": 12.3559, |
|
"eval_samples_per_second": 161.866, |
|
"eval_steps_per_second": 0.647, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0321419552392636e-05, |
|
"loss": 0.1745, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0190856810869215e-05, |
|
"loss": 0.1753, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0061031355681766e-05, |
|
"loss": 0.1838, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.931945591266174e-06, |
|
"loss": 0.1854, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.803601908358944e-06, |
|
"loss": 0.1819, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.676002683952768e-06, |
|
"loss": 0.1781, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.1789, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.423047049631956e-06, |
|
"loss": 0.1802, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.297695324589106e-06, |
|
"loss": 0.1771, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.173097427704203e-06, |
|
"loss": 0.1806, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.049255666596012e-06, |
|
"loss": 0.1805, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.926172334879296e-06, |
|
"loss": 0.178, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.803849712122292e-06, |
|
"loss": 0.1794, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.682290063804526e-06, |
|
"loss": 0.1736, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.561495641274825e-06, |
|
"loss": 0.1759, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.441468681709602e-06, |
|
"loss": 0.1752, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.322211408071512e-06, |
|
"loss": 0.181, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.203726029068148e-06, |
|
"loss": 0.1796, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.086014739111297e-06, |
|
"loss": 0.1765, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.969079718276096e-06, |
|
"loss": 0.1818, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.852923132260864e-06, |
|
"loss": 0.1747, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.73754713234684e-06, |
|
"loss": 0.1779, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.6229538553584556e-06, |
|
"loss": 0.1817, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.509145423623609e-06, |
|
"loss": 0.1778, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.3961239449345355e-06, |
|
"loss": 0.1847, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.2838915125086504e-06, |
|
"loss": 0.1789, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.172450204949821e-06, |
|
"loss": 0.1795, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.061802086209857e-06, |
|
"loss": 0.1791, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.951949205550284e-06, |
|
"loss": 0.1826, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.842893597504435e-06, |
|
"loss": 0.177, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.734637281839701e-06, |
|
"loss": 0.1802, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.627182263520165e-06, |
|
"loss": 0.1785, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.52053053266945e-06, |
|
"loss": 0.1793, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.41468406453391e-06, |
|
"loss": 0.1798, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.30964481944597e-06, |
|
"loss": 0.1783, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.205414742787852e-06, |
|
"loss": 0.1783, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.1019957649555985e-06, |
|
"loss": 0.1762, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.999389801323219e-06, |
|
"loss": 0.1808, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.897598752207328e-06, |
|
"loss": 0.1756, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.796624502831821e-06, |
|
"loss": 0.1791, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.696468923293108e-06, |
|
"loss": 0.1774, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.59713386852534e-06, |
|
"loss": 0.1781, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5.498621178266167e-06, |
|
"loss": 0.1763, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.40093267702258e-06, |
|
"loss": 0.1749, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.304070174037146e-06, |
|
"loss": 0.1769, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.208035463254557e-06, |
|
"loss": 0.1763, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.112830323288314e-06, |
|
"loss": 0.1813, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 5.018456517387837e-06, |
|
"loss": 0.1764, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.924915793405799e-06, |
|
"loss": 0.1795, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.832209883765782e-06, |
|
"loss": 0.1787, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.74034050543013e-06, |
|
"loss": 0.1728, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.649309359868209e-06, |
|
"loss": 0.1754, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.559118133024853e-06, |
|
"loss": 0.1747, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.4697684952891895e-06, |
|
"loss": 0.1775, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.38126210146364e-06, |
|
"loss": 0.1758, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.29360059073331e-06, |
|
"loss": 0.182, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2067855866356475e-06, |
|
"loss": 0.1763, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.12081869703031e-06, |
|
"loss": 0.1756, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.035701514069484e-06, |
|
"loss": 0.1768, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.951435614168275e-06, |
|
"loss": 0.18, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.868022557975626e-06, |
|
"loss": 0.1781, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.7854638903453367e-06, |
|
"loss": 0.178, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.70376114030751e-06, |
|
"loss": 0.1777, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6229158210401737e-06, |
|
"loss": 0.1773, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5429294298412853e-06, |
|
"loss": 0.1762, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.463803448101033e-06, |
|
"loss": 0.179, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3855393412743352e-06, |
|
"loss": 0.1788, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.308138558853746e-06, |
|
"loss": 0.1796, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.231602534342587e-06, |
|
"loss": 0.1791, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.15593268522843e-06, |
|
"loss": 0.1765, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.081130412956795e-06, |
|
"loss": 0.1771, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.0071971029052348e-06, |
|
"loss": 0.1765, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.934134124357646e-06, |
|
"loss": 0.1746, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.86194283047897e-06, |
|
"loss": 0.177, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.7906245582900338e-06, |
|
"loss": 0.1771, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.720180628642871e-06, |
|
"loss": 0.1756, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6506123461962408e-06, |
|
"loss": 0.1767, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5819209993914184e-06, |
|
"loss": 0.1774, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.5141078604284105e-06, |
|
"loss": 0.1796, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.1821, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.381121213480131e-06, |
|
"loss": 0.1815, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.3159501684777207e-06, |
|
"loss": 0.1796, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.2516622572372414e-06, |
|
"loss": 0.1771, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.188258670404719e-06, |
|
"loss": 0.1796, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.125740582248037e-06, |
|
"loss": 0.1805, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0641091506351796e-06, |
|
"loss": 0.1793, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.003365517012773e-06, |
|
"loss": 0.1804, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.943510806384968e-06, |
|
"loss": 0.1777, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.884546127292569e-06, |
|
"loss": 0.176, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8264725717925591e-06, |
|
"loss": 0.1745, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7692912154378294e-06, |
|
"loss": 0.1754, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7130031172572614e-06, |
|
"loss": 0.1777, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6576093197361253e-06, |
|
"loss": 0.1799, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.603110848796785e-06, |
|
"loss": 0.1751, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.549508713779696e-06, |
|
"loss": 0.1756, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.4968039074246486e-06, |
|
"loss": 0.1772, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.444997405852483e-06, |
|
"loss": 0.1752, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3940901685469298e-06, |
|
"loss": 0.181, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3440831383369046e-06, |
|
"loss": 0.1781, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.294977241378975e-06, |
|
"loss": 0.1766, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.1736355423927307, |
|
"eval_runtime": 12.0759, |
|
"eval_samples_per_second": 165.62, |
|
"eval_steps_per_second": 0.662, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.2467733871402655e-06, |
|
"loss": 0.1771, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.199472468381585e-06, |
|
"loss": 0.1788, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.153075361140915e-06, |
|
"loss": 0.1777, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1075829247171598e-06, |
|
"loss": 0.1747, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0629960016542373e-06, |
|
"loss": 0.1772, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0193154177254971e-06, |
|
"loss": 0.1758, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.765419819183997e-07, |
|
"loss": 0.1785, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.346764864195335e-07, |
|
"loss": 0.1782, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.937197065999714e-07, |
|
"loss": 0.1784, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.536724010008878e-07, |
|
"loss": 0.178, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.145353113195142e-07, |
|
"loss": 0.1757, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.763091623953889e-07, |
|
"loss": 0.1776, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.389946621969679e-07, |
|
"loss": 0.176, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.0259250180848e-07, |
|
"loss": 0.1766, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.67103355417148e-07, |
|
"loss": 0.1761, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.325278803006818e-07, |
|
"loss": 0.1734, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.988667168151219e-07, |
|
"loss": 0.1789, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.661204883829763e-07, |
|
"loss": 0.1748, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.342898014816855e-07, |
|
"loss": 0.1772, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.033752456323482e-07, |
|
"loss": 0.1772, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.73377393388863e-07, |
|
"loss": 0.1761, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.4429680032726517e-07, |
|
"loss": 0.1796, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.1613400503550114e-07, |
|
"loss": 0.1789, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.888895291033867e-07, |
|
"loss": 0.1789, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.6256387711299247e-07, |
|
"loss": 0.179, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.371575366292845e-07, |
|
"loss": 0.1768, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.126709781910986e-07, |
|
"loss": 0.1772, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.8910465530240793e-07, |
|
"loss": 0.1805, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6645900442394677e-07, |
|
"loss": 0.1769, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.4473444496512233e-07, |
|
"loss": 0.1796, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.2393137927623763e-07, |
|
"loss": 0.1773, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.0405019264104762e-07, |
|
"loss": 0.1774, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.850912532696092e-07, |
|
"loss": 0.1786, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6705491229149218e-07, |
|
"loss": 0.1756, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.4994150374924575e-07, |
|
"loss": 0.1771, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.337513445922256e-07, |
|
"loss": 0.1763, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.184847346707152e-07, |
|
"loss": 0.1767, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.041419567303914e-07, |
|
"loss": 0.173, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.072327640706756e-08, |
|
"loss": 0.1774, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.822894222178633e-08, |
|
"loss": 0.1801, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.665918557620665e-08, |
|
"loss": 0.1772, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.6014220748318303e-08, |
|
"loss": 0.1734, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.6294244888500645e-08, |
|
"loss": 0.1778, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.749943801582556e-08, |
|
"loss": 0.1755, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.9629963014760065e-08, |
|
"loss": 0.1782, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2685965632135432e-08, |
|
"loss": 0.1765, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.666757447443823e-08, |
|
"loss": 0.1809, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1574901005456662e-08, |
|
"loss": 0.1772, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.4080395441877834e-09, |
|
"loss": 0.175, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.167067263105562e-09, |
|
"loss": 0.176, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8520441867231341e-09, |
|
"loss": 0.1788, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.630131904936885e-10, |
|
"loss": 0.1729, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1755, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 753, |
|
"total_flos": 0.0, |
|
"train_loss": 0.10856808526107514, |
|
"train_runtime": 9051.9774, |
|
"train_samples_per_second": 85.227, |
|
"train_steps_per_second": 0.083 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 753, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|