{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 753, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.347826086956522e-06, "loss": 4.2511, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.695652173913044e-06, "loss": 4.2595, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.3043478260869566e-05, "loss": 3.0895, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.739130434782609e-05, "loss": 0.8882, "step": 4 }, { "epoch": 0.01, "learning_rate": 2.173913043478261e-05, "loss": 0.4819, "step": 5 }, { "epoch": 0.01, "learning_rate": 2.608695652173913e-05, "loss": 0.8914, "step": 6 }, { "epoch": 0.01, "learning_rate": 3.0434782608695656e-05, "loss": 1.1554, "step": 7 }, { "epoch": 0.01, "learning_rate": 3.478260869565218e-05, "loss": 0.3562, "step": 8 }, { "epoch": 0.01, "learning_rate": 3.91304347826087e-05, "loss": 0.4085, "step": 9 }, { "epoch": 0.01, "learning_rate": 4.347826086956522e-05, "loss": 0.2655, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.782608695652174e-05, "loss": 0.281, "step": 11 }, { "epoch": 0.02, "learning_rate": 5.217391304347826e-05, "loss": 0.4708, "step": 12 }, { "epoch": 0.02, "learning_rate": 5.652173913043478e-05, "loss": 4.1421, "step": 13 }, { "epoch": 0.02, "learning_rate": 6.086956521739131e-05, "loss": 2.4328, "step": 14 }, { "epoch": 0.02, "learning_rate": 6.521739130434783e-05, "loss": 1.7092, "step": 15 }, { "epoch": 0.02, "learning_rate": 6.956521739130436e-05, "loss": 0.687, "step": 16 }, { "epoch": 0.02, "learning_rate": 7.391304347826086e-05, "loss": 0.2745, "step": 17 }, { "epoch": 0.02, "learning_rate": 7.82608695652174e-05, "loss": 0.3214, "step": 18 }, { "epoch": 0.03, "learning_rate": 8.260869565217392e-05, "loss": 0.4915, "step": 19 }, { "epoch": 0.03, "learning_rate": 8.695652173913044e-05, "loss": 0.2447, "step": 20 }, { "epoch": 0.03, "learning_rate": 9.130434782608696e-05, "loss": 0.2353, "step": 21 }, { "epoch": 0.03, "learning_rate": 9.565217391304348e-05, "loss": 0.2507, "step": 22 }, { "epoch": 0.03, "learning_rate": 0.0001, "loss": 0.2238, "step": 23 }, { "epoch": 0.03, "learning_rate": 9.99995369868095e-05, "loss": 0.2327, "step": 24 }, { "epoch": 0.03, "learning_rate": 9.999814795581328e-05, "loss": 0.5584, "step": 25 }, { "epoch": 0.03, "learning_rate": 9.99958329327369e-05, "loss": 1.6468, "step": 26 }, { "epoch": 0.04, "learning_rate": 9.999259196045582e-05, "loss": 5.5723, "step": 27 }, { "epoch": 0.04, "learning_rate": 9.998842509899456e-05, "loss": 4.1912, "step": 28 }, { "epoch": 0.04, "learning_rate": 9.998333242552556e-05, "loss": 0.9005, "step": 29 }, { "epoch": 0.04, "learning_rate": 9.997731403436788e-05, "loss": 0.5078, "step": 30 }, { "epoch": 0.04, "learning_rate": 9.997037003698525e-05, "loss": 0.2464, "step": 31 }, { "epoch": 0.04, "learning_rate": 9.996250056198417e-05, "loss": 0.258, "step": 32 }, { "epoch": 0.04, "learning_rate": 9.995370575511151e-05, "loss": 0.2438, "step": 33 }, { "epoch": 0.05, "learning_rate": 9.994398577925169e-05, "loss": 0.2159, "step": 34 }, { "epoch": 0.05, "learning_rate": 9.993334081442381e-05, "loss": 0.1959, "step": 35 }, { "epoch": 0.05, "learning_rate": 9.992177105777822e-05, "loss": 0.2217, "step": 36 }, { "epoch": 0.05, "learning_rate": 9.990927672359294e-05, "loss": 0.1972, "step": 37 }, { "epoch": 0.05, "learning_rate": 9.989585804326962e-05, "loss": 0.1947, "step": 38 }, { "epoch": 0.05, "learning_rate": 9.988151526532929e-05, "loss": 0.2028, "step": 39 }, { "epoch": 0.05, "learning_rate": 9.986624865540778e-05, "loss": 0.1989, "step": 40 }, { "epoch": 0.05, "learning_rate": 9.985005849625076e-05, "loss": 0.1963, "step": 41 }, { "epoch": 0.06, "learning_rate": 9.983294508770851e-05, "loss": 0.1935, "step": 42 }, { "epoch": 0.06, "learning_rate": 9.981490874673039e-05, "loss": 0.1949, "step": 43 }, { "epoch": 0.06, "learning_rate": 9.979594980735896e-05, "loss": 0.1979, "step": 44 }, { "epoch": 0.06, "learning_rate": 9.977606862072377e-05, "loss": 0.1882, "step": 45 }, { "epoch": 0.06, "learning_rate": 9.975526555503488e-05, "loss": 0.1911, "step": 46 }, { "epoch": 0.06, "learning_rate": 9.973354099557606e-05, "loss": 0.1914, "step": 47 }, { "epoch": 0.06, "learning_rate": 9.97108953446976e-05, "loss": 0.1945, "step": 48 }, { "epoch": 0.07, "learning_rate": 9.968732902180891e-05, "loss": 0.1876, "step": 49 }, { "epoch": 0.07, "learning_rate": 9.966284246337072e-05, "loss": 0.1893, "step": 50 }, { "epoch": 0.07, "learning_rate": 9.963743612288701e-05, "loss": 0.1946, "step": 51 }, { "epoch": 0.07, "learning_rate": 9.961111047089662e-05, "loss": 0.1909, "step": 52 }, { "epoch": 0.07, "learning_rate": 9.95838659949645e-05, "loss": 0.1959, "step": 53 }, { "epoch": 0.07, "learning_rate": 9.955570319967273e-05, "loss": 0.1922, "step": 54 }, { "epoch": 0.07, "learning_rate": 9.952662260661115e-05, "loss": 0.195, "step": 55 }, { "epoch": 0.07, "learning_rate": 9.949662475436765e-05, "loss": 0.1941, "step": 56 }, { "epoch": 0.08, "learning_rate": 9.946571019851832e-05, "loss": 0.1941, "step": 57 }, { "epoch": 0.08, "learning_rate": 9.943387951161703e-05, "loss": 0.189, "step": 58 }, { "epoch": 0.08, "learning_rate": 9.940113328318488e-05, "loss": 0.193, "step": 59 }, { "epoch": 0.08, "learning_rate": 9.936747211969932e-05, "loss": 0.1918, "step": 60 }, { "epoch": 0.08, "learning_rate": 9.933289664458284e-05, "loss": 0.1939, "step": 61 }, { "epoch": 0.08, "learning_rate": 9.929740749819152e-05, "loss": 0.1912, "step": 62 }, { "epoch": 0.08, "learning_rate": 9.926100533780303e-05, "loss": 0.195, "step": 63 }, { "epoch": 0.08, "learning_rate": 9.92236908376046e-05, "loss": 0.1909, "step": 64 }, { "epoch": 0.09, "learning_rate": 9.91854646886805e-05, "loss": 0.1926, "step": 65 }, { "epoch": 0.09, "learning_rate": 9.91463275989991e-05, "loss": 0.1911, "step": 66 }, { "epoch": 0.09, "learning_rate": 9.910628029340003e-05, "loss": 0.1951, "step": 67 }, { "epoch": 0.09, "learning_rate": 9.906532351358047e-05, "loss": 0.1877, "step": 68 }, { "epoch": 0.09, "learning_rate": 9.902345801808162e-05, "loss": 0.1894, "step": 69 }, { "epoch": 0.09, "learning_rate": 9.898068458227451e-05, "loss": 0.19, "step": 70 }, { "epoch": 0.09, "learning_rate": 9.893700399834577e-05, "loss": 0.1927, "step": 71 }, { "epoch": 0.1, "learning_rate": 9.889241707528285e-05, "loss": 0.1919, "step": 72 }, { "epoch": 0.1, "learning_rate": 9.88469246388591e-05, "loss": 0.1906, "step": 73 }, { "epoch": 0.1, "learning_rate": 9.880052753161842e-05, "loss": 0.1928, "step": 74 }, { "epoch": 0.1, "learning_rate": 9.875322661285975e-05, "loss": 0.1887, "step": 75 }, { "epoch": 0.1, "learning_rate": 9.870502275862104e-05, "loss": 0.1919, "step": 76 }, { "epoch": 0.1, "learning_rate": 9.86559168616631e-05, "loss": 0.1892, "step": 77 }, { "epoch": 0.1, "learning_rate": 9.860590983145307e-05, "loss": 0.1876, "step": 78 }, { "epoch": 0.1, "learning_rate": 9.855500259414753e-05, "loss": 0.1946, "step": 79 }, { "epoch": 0.11, "learning_rate": 9.850319609257535e-05, "loss": 0.1889, "step": 80 }, { "epoch": 0.11, "learning_rate": 9.845049128622032e-05, "loss": 0.1909, "step": 81 }, { "epoch": 0.11, "learning_rate": 9.839688915120322e-05, "loss": 0.1914, "step": 82 }, { "epoch": 0.11, "learning_rate": 9.834239068026387e-05, "loss": 0.1923, "step": 83 }, { "epoch": 0.11, "learning_rate": 9.828699688274275e-05, "loss": 0.1902, "step": 84 }, { "epoch": 0.11, "learning_rate": 9.823070878456217e-05, "loss": 0.1919, "step": 85 }, { "epoch": 0.11, "learning_rate": 9.817352742820744e-05, "loss": 0.1925, "step": 86 }, { "epoch": 0.12, "learning_rate": 9.811545387270743e-05, "loss": 0.1871, "step": 87 }, { "epoch": 0.12, "learning_rate": 9.805648919361504e-05, "loss": 0.1922, "step": 88 }, { "epoch": 0.12, "learning_rate": 9.799663448298724e-05, "loss": 0.1953, "step": 89 }, { "epoch": 0.12, "learning_rate": 9.793589084936483e-05, "loss": 0.192, "step": 90 }, { "epoch": 0.12, "learning_rate": 9.787425941775197e-05, "loss": 0.19, "step": 91 }, { "epoch": 0.12, "learning_rate": 9.781174132959529e-05, "loss": 0.1866, "step": 92 }, { "epoch": 0.12, "learning_rate": 9.774833774276278e-05, "loss": 0.1906, "step": 93 }, { "epoch": 0.12, "learning_rate": 9.768404983152229e-05, "loss": 0.1907, "step": 94 }, { "epoch": 0.13, "learning_rate": 9.761887878651987e-05, "loss": 0.1915, "step": 95 }, { "epoch": 0.13, "learning_rate": 9.755282581475769e-05, "loss": 0.1903, "step": 96 }, { "epoch": 0.13, "learning_rate": 9.74858921395716e-05, "loss": 0.1917, "step": 97 }, { "epoch": 0.13, "learning_rate": 9.741807900060858e-05, "loss": 0.1908, "step": 98 }, { "epoch": 0.13, "learning_rate": 9.734938765380377e-05, "loss": 0.1921, "step": 99 }, { "epoch": 0.13, "learning_rate": 9.727981937135713e-05, "loss": 0.1879, "step": 100 }, { "epoch": 0.13, "eval_loss": 0.18334217369556427, "eval_runtime": 12.1378, "eval_samples_per_second": 164.775, "eval_steps_per_second": 0.659, "step": 100 }, { "epoch": 0.13, "learning_rate": 9.720937544170999e-05, "loss": 0.1908, "step": 101 }, { "epoch": 0.14, "learning_rate": 9.713805716952105e-05, "loss": 0.1892, "step": 102 }, { "epoch": 0.14, "learning_rate": 9.706586587564237e-05, "loss": 0.1902, "step": 103 }, { "epoch": 0.14, "learning_rate": 9.699280289709478e-05, "loss": 0.1905, "step": 104 }, { "epoch": 0.14, "learning_rate": 9.691886958704321e-05, "loss": 0.1894, "step": 105 }, { "epoch": 0.14, "learning_rate": 9.684406731477158e-05, "loss": 0.1943, "step": 106 }, { "epoch": 0.14, "learning_rate": 9.676839746565743e-05, "loss": 0.1883, "step": 107 }, { "epoch": 0.14, "learning_rate": 9.669186144114625e-05, "loss": 0.1935, "step": 108 }, { "epoch": 0.14, "learning_rate": 9.661446065872568e-05, "loss": 0.1914, "step": 109 }, { "epoch": 0.15, "learning_rate": 9.653619655189897e-05, "loss": 0.1877, "step": 110 }, { "epoch": 0.15, "learning_rate": 9.645707057015871e-05, "loss": 0.1898, "step": 111 }, { "epoch": 0.15, "learning_rate": 9.637708417895983e-05, "loss": 0.1898, "step": 112 }, { "epoch": 0.15, "learning_rate": 9.62962388596925e-05, "loss": 0.1922, "step": 113 }, { "epoch": 0.15, "learning_rate": 9.621453610965467e-05, "loss": 0.1906, "step": 114 }, { "epoch": 0.15, "learning_rate": 9.613197744202438e-05, "loss": 0.1881, "step": 115 }, { "epoch": 0.15, "learning_rate": 9.604856438583174e-05, "loss": 0.1894, "step": 116 }, { "epoch": 0.16, "learning_rate": 9.596429848593053e-05, "loss": 0.1872, "step": 117 }, { "epoch": 0.16, "learning_rate": 9.587918130296969e-05, "loss": 0.192, "step": 118 }, { "epoch": 0.16, "learning_rate": 9.579321441336436e-05, "loss": 0.1876, "step": 119 }, { "epoch": 0.16, "learning_rate": 9.57063994092667e-05, "loss": 0.1914, "step": 120 }, { "epoch": 0.16, "learning_rate": 9.561873789853637e-05, "loss": 0.1897, "step": 121 }, { "epoch": 0.16, "learning_rate": 9.553023150471082e-05, "loss": 0.1907, "step": 122 }, { "epoch": 0.16, "learning_rate": 9.544088186697515e-05, "loss": 0.1905, "step": 123 }, { "epoch": 0.16, "learning_rate": 9.53506906401318e-05, "loss": 0.1918, "step": 124 }, { "epoch": 0.17, "learning_rate": 9.525965949456988e-05, "loss": 0.1908, "step": 125 }, { "epoch": 0.17, "learning_rate": 9.516779011623422e-05, "loss": 0.1898, "step": 126 }, { "epoch": 0.17, "learning_rate": 9.50750842065942e-05, "loss": 0.1871, "step": 127 }, { "epoch": 0.17, "learning_rate": 9.498154348261216e-05, "loss": 0.1906, "step": 128 }, { "epoch": 0.17, "learning_rate": 9.48871696767117e-05, "loss": 0.1905, "step": 129 }, { "epoch": 0.17, "learning_rate": 9.479196453674544e-05, "loss": 0.1906, "step": 130 }, { "epoch": 0.17, "learning_rate": 9.469592982596285e-05, "loss": 0.1906, "step": 131 }, { "epoch": 0.18, "learning_rate": 9.459906732297743e-05, "loss": 0.1929, "step": 132 }, { "epoch": 0.18, "learning_rate": 9.450137882173384e-05, "loss": 0.1918, "step": 133 }, { "epoch": 0.18, "learning_rate": 9.440286613147467e-05, "loss": 0.1895, "step": 134 }, { "epoch": 0.18, "learning_rate": 9.430353107670691e-05, "loss": 0.189, "step": 135 }, { "epoch": 0.18, "learning_rate": 9.420337549716818e-05, "loss": 0.1942, "step": 136 }, { "epoch": 0.18, "learning_rate": 9.410240124779268e-05, "loss": 0.192, "step": 137 }, { "epoch": 0.18, "learning_rate": 9.400061019867679e-05, "loss": 0.1908, "step": 138 }, { "epoch": 0.18, "learning_rate": 9.389800423504442e-05, "loss": 0.1908, "step": 139 }, { "epoch": 0.19, "learning_rate": 9.379458525721215e-05, "loss": 0.1892, "step": 140 }, { "epoch": 0.19, "learning_rate": 9.369035518055404e-05, "loss": 0.1887, "step": 141 }, { "epoch": 0.19, "learning_rate": 9.35853159354661e-05, "loss": 0.1901, "step": 142 }, { "epoch": 0.19, "learning_rate": 9.347946946733055e-05, "loss": 0.1908, "step": 143 }, { "epoch": 0.19, "learning_rate": 9.337281773647986e-05, "loss": 0.1958, "step": 144 }, { "epoch": 0.19, "learning_rate": 9.326536271816031e-05, "loss": 0.1905, "step": 145 }, { "epoch": 0.19, "learning_rate": 9.315710640249558e-05, "loss": 0.1932, "step": 146 }, { "epoch": 0.2, "learning_rate": 9.304805079444971e-05, "loss": 0.1894, "step": 147 }, { "epoch": 0.2, "learning_rate": 9.293819791379016e-05, "loss": 0.1921, "step": 148 }, { "epoch": 0.2, "learning_rate": 9.282754979505018e-05, "loss": 0.1927, "step": 149 }, { "epoch": 0.2, "learning_rate": 9.271610848749135e-05, "loss": 0.1971, "step": 150 }, { "epoch": 0.2, "learning_rate": 9.260387605506547e-05, "loss": 0.1894, "step": 151 }, { "epoch": 0.2, "learning_rate": 9.249085457637641e-05, "loss": 0.1908, "step": 152 }, { "epoch": 0.2, "learning_rate": 9.237704614464156e-05, "loss": 0.189, "step": 153 }, { "epoch": 0.2, "learning_rate": 9.226245286765316e-05, "loss": 0.1888, "step": 154 }, { "epoch": 0.21, "learning_rate": 9.214707686773914e-05, "loss": 0.1883, "step": 155 }, { "epoch": 0.21, "learning_rate": 9.203092028172391e-05, "loss": 0.1918, "step": 156 }, { "epoch": 0.21, "learning_rate": 9.191398526088873e-05, "loss": 0.1936, "step": 157 }, { "epoch": 0.21, "learning_rate": 9.179627397093185e-05, "loss": 0.1901, "step": 158 }, { "epoch": 0.21, "learning_rate": 9.16777885919285e-05, "loss": 0.1891, "step": 159 }, { "epoch": 0.21, "learning_rate": 9.15585313182904e-05, "loss": 0.1919, "step": 160 }, { "epoch": 0.21, "learning_rate": 9.143850435872518e-05, "loss": 0.1909, "step": 161 }, { "epoch": 0.22, "learning_rate": 9.131770993619548e-05, "loss": 0.1936, "step": 162 }, { "epoch": 0.22, "learning_rate": 9.119615028787771e-05, "loss": 0.1867, "step": 163 }, { "epoch": 0.22, "learning_rate": 9.107382766512071e-05, "loss": 0.1925, "step": 164 }, { "epoch": 0.22, "learning_rate": 9.095074433340401e-05, "loss": 0.1902, "step": 165 }, { "epoch": 0.22, "learning_rate": 9.08269025722958e-05, "loss": 0.1921, "step": 166 }, { "epoch": 0.22, "learning_rate": 9.07023046754109e-05, "loss": 0.1869, "step": 167 }, { "epoch": 0.22, "learning_rate": 9.057695295036805e-05, "loss": 0.1901, "step": 168 }, { "epoch": 0.22, "learning_rate": 9.045084971874738e-05, "loss": 0.1903, "step": 169 }, { "epoch": 0.23, "learning_rate": 9.032399731604723e-05, "loss": 0.1933, "step": 170 }, { "epoch": 0.23, "learning_rate": 9.019639809164106e-05, "loss": 0.1929, "step": 171 }, { "epoch": 0.23, "learning_rate": 9.006805440873383e-05, "loss": 0.1948, "step": 172 }, { "epoch": 0.23, "learning_rate": 8.993896864431826e-05, "loss": 0.1917, "step": 173 }, { "epoch": 0.23, "learning_rate": 8.980914318913078e-05, "loss": 0.1936, "step": 174 }, { "epoch": 0.23, "learning_rate": 8.967858044760737e-05, "loss": 0.1902, "step": 175 }, { "epoch": 0.23, "learning_rate": 8.954728283783886e-05, "loss": 0.1893, "step": 176 }, { "epoch": 0.24, "learning_rate": 8.941525279152627e-05, "loss": 0.1925, "step": 177 }, { "epoch": 0.24, "learning_rate": 8.928249275393572e-05, "loss": 0.1905, "step": 178 }, { "epoch": 0.24, "learning_rate": 8.914900518385314e-05, "loss": 0.1912, "step": 179 }, { "epoch": 0.24, "learning_rate": 8.901479255353879e-05, "loss": 0.1889, "step": 180 }, { "epoch": 0.24, "learning_rate": 8.887985734868135e-05, "loss": 0.189, "step": 181 }, { "epoch": 0.24, "learning_rate": 8.874420206835203e-05, "loss": 0.191, "step": 182 }, { "epoch": 0.24, "learning_rate": 8.860782922495822e-05, "loss": 0.1889, "step": 183 }, { "epoch": 0.24, "learning_rate": 8.847074134419691e-05, "loss": 0.1869, "step": 184 }, { "epoch": 0.25, "learning_rate": 8.833294096500797e-05, "loss": 0.1857, "step": 185 }, { "epoch": 0.25, "learning_rate": 8.819443063952713e-05, "loss": 0.1865, "step": 186 }, { "epoch": 0.25, "learning_rate": 8.805521293303874e-05, "loss": 0.1917, "step": 187 }, { "epoch": 0.25, "learning_rate": 8.791529042392813e-05, "loss": 0.1904, "step": 188 }, { "epoch": 0.25, "learning_rate": 8.777466570363401e-05, "loss": 0.1861, "step": 189 }, { "epoch": 0.25, "learning_rate": 8.763334137660042e-05, "loss": 0.1886, "step": 190 }, { "epoch": 0.25, "learning_rate": 8.749132006022844e-05, "loss": 0.1878, "step": 191 }, { "epoch": 0.25, "learning_rate": 8.734860438482777e-05, "loss": 0.1903, "step": 192 }, { "epoch": 0.26, "learning_rate": 8.720519699356804e-05, "loss": 0.1881, "step": 193 }, { "epoch": 0.26, "learning_rate": 8.70611005424298e-05, "loss": 0.1894, "step": 194 }, { "epoch": 0.26, "learning_rate": 8.691631770015533e-05, "loss": 0.1933, "step": 195 }, { "epoch": 0.26, "learning_rate": 8.677085114819928e-05, "loss": 0.1882, "step": 196 }, { "epoch": 0.26, "learning_rate": 8.662470358067895e-05, "loss": 0.1853, "step": 197 }, { "epoch": 0.26, "learning_rate": 8.647787770432439e-05, "loss": 0.1874, "step": 198 }, { "epoch": 0.26, "learning_rate": 8.633037623842828e-05, "loss": 0.1896, "step": 199 }, { "epoch": 0.27, "learning_rate": 8.618220191479563e-05, "loss": 0.1891, "step": 200 }, { "epoch": 0.27, "eval_loss": 0.18544606864452362, "eval_runtime": 11.5227, "eval_samples_per_second": 173.57, "eval_steps_per_second": 0.694, "step": 200 }, { "epoch": 0.27, "learning_rate": 8.603335747769306e-05, "loss": 0.1901, "step": 201 }, { "epoch": 0.27, "learning_rate": 8.58838456837981e-05, "loss": 0.1894, "step": 202 }, { "epoch": 0.27, "learning_rate": 8.573366930214806e-05, "loss": 0.1856, "step": 203 }, { "epoch": 0.27, "learning_rate": 8.558283111408873e-05, "loss": 0.1866, "step": 204 }, { "epoch": 0.27, "learning_rate": 8.5431333913223e-05, "loss": 0.1885, "step": 205 }, { "epoch": 0.27, "learning_rate": 8.52791805053589e-05, "loss": 0.1906, "step": 206 }, { "epoch": 0.27, "learning_rate": 8.512637370845785e-05, "loss": 0.1869, "step": 207 }, { "epoch": 0.28, "learning_rate": 8.497291635258235e-05, "loss": 0.1894, "step": 208 }, { "epoch": 0.28, "learning_rate": 8.481881127984361e-05, "loss": 0.19, "step": 209 }, { "epoch": 0.28, "learning_rate": 8.466406134434887e-05, "loss": 0.1851, "step": 210 }, { "epoch": 0.28, "learning_rate": 8.45086694121486e-05, "loss": 0.1846, "step": 211 }, { "epoch": 0.28, "learning_rate": 8.435263836118335e-05, "loss": 0.1905, "step": 212 }, { "epoch": 0.28, "learning_rate": 8.419597108123054e-05, "loss": 0.1872, "step": 213 }, { "epoch": 0.28, "learning_rate": 8.403867047385081e-05, "loss": 0.1867, "step": 214 }, { "epoch": 0.29, "learning_rate": 8.388073945233445e-05, "loss": 0.1859, "step": 215 }, { "epoch": 0.29, "learning_rate": 8.372218094164728e-05, "loss": 0.189, "step": 216 }, { "epoch": 0.29, "learning_rate": 8.356299787837658e-05, "loss": 0.1855, "step": 217 }, { "epoch": 0.29, "learning_rate": 8.340319321067668e-05, "loss": 0.1844, "step": 218 }, { "epoch": 0.29, "learning_rate": 8.324276989821434e-05, "loss": 0.1915, "step": 219 }, { "epoch": 0.29, "learning_rate": 8.308173091211391e-05, "loss": 0.1894, "step": 220 }, { "epoch": 0.29, "learning_rate": 8.292007923490245e-05, "loss": 0.1874, "step": 221 }, { "epoch": 0.29, "learning_rate": 8.275781786045427e-05, "loss": 0.1887, "step": 222 }, { "epoch": 0.3, "learning_rate": 8.259494979393563e-05, "loss": 0.1849, "step": 223 }, { "epoch": 0.3, "learning_rate": 8.243147805174908e-05, "loss": 0.1858, "step": 224 }, { "epoch": 0.3, "learning_rate": 8.226740566147753e-05, "loss": 0.1859, "step": 225 }, { "epoch": 0.3, "learning_rate": 8.210273566182818e-05, "loss": 0.1871, "step": 226 }, { "epoch": 0.3, "learning_rate": 8.193747110257637e-05, "loss": 0.1849, "step": 227 }, { "epoch": 0.3, "learning_rate": 8.177161504450888e-05, "loss": 0.1886, "step": 228 }, { "epoch": 0.3, "learning_rate": 8.160517055936744e-05, "loss": 0.1858, "step": 229 }, { "epoch": 0.31, "learning_rate": 8.143814072979173e-05, "loss": 0.186, "step": 230 }, { "epoch": 0.31, "learning_rate": 8.127052864926232e-05, "loss": 0.1888, "step": 231 }, { "epoch": 0.31, "learning_rate": 8.110233742204339e-05, "loss": 0.1852, "step": 232 }, { "epoch": 0.31, "learning_rate": 8.093357016312517e-05, "loss": 0.1865, "step": 233 }, { "epoch": 0.31, "learning_rate": 8.07642299981664e-05, "loss": 0.1864, "step": 234 }, { "epoch": 0.31, "learning_rate": 8.059432006343623e-05, "loss": 0.1816, "step": 235 }, { "epoch": 0.31, "learning_rate": 8.042384350575632e-05, "loss": 0.1834, "step": 236 }, { "epoch": 0.31, "learning_rate": 8.025280348244246e-05, "loss": 0.1859, "step": 237 }, { "epoch": 0.32, "learning_rate": 8.008120316124612e-05, "loss": 0.1831, "step": 238 }, { "epoch": 0.32, "learning_rate": 7.990904572029583e-05, "loss": 0.1879, "step": 239 }, { "epoch": 0.32, "learning_rate": 7.973633434803817e-05, "loss": 0.1839, "step": 240 }, { "epoch": 0.32, "learning_rate": 7.956307224317891e-05, "loss": 0.1888, "step": 241 }, { "epoch": 0.32, "learning_rate": 7.938926261462366e-05, "loss": 0.1832, "step": 242 }, { "epoch": 0.32, "learning_rate": 7.921490868141843e-05, "loss": 0.1849, "step": 243 }, { "epoch": 0.32, "learning_rate": 7.904001367269004e-05, "loss": 0.1815, "step": 244 }, { "epoch": 0.33, "learning_rate": 7.886458082758637e-05, "loss": 0.1827, "step": 245 }, { "epoch": 0.33, "learning_rate": 7.868861339521624e-05, "loss": 0.1835, "step": 246 }, { "epoch": 0.33, "learning_rate": 7.851211463458936e-05, "loss": 0.1868, "step": 247 }, { "epoch": 0.33, "learning_rate": 7.833508781455588e-05, "loss": 0.1837, "step": 248 }, { "epoch": 0.33, "learning_rate": 7.815753621374594e-05, "loss": 0.182, "step": 249 }, { "epoch": 0.33, "learning_rate": 7.797946312050883e-05, "loss": 0.1827, "step": 250 }, { "epoch": 0.33, "learning_rate": 7.780087183285223e-05, "loss": 0.1911, "step": 251 }, { "epoch": 0.33, "learning_rate": 7.7621765658381e-05, "loss": 0.1867, "step": 252 }, { "epoch": 0.34, "learning_rate": 7.744214791423596e-05, "loss": 0.184, "step": 253 }, { "epoch": 0.34, "learning_rate": 7.726202192703256e-05, "loss": 0.1837, "step": 254 }, { "epoch": 0.34, "learning_rate": 7.708139103279908e-05, "loss": 0.1864, "step": 255 }, { "epoch": 0.34, "learning_rate": 7.690025857691498e-05, "loss": 0.1843, "step": 256 }, { "epoch": 0.34, "learning_rate": 7.671862791404896e-05, "loss": 0.1843, "step": 257 }, { "epoch": 0.34, "learning_rate": 7.653650240809667e-05, "loss": 0.1816, "step": 258 }, { "epoch": 0.34, "learning_rate": 7.635388543211861e-05, "loss": 0.1887, "step": 259 }, { "epoch": 0.35, "learning_rate": 7.617078036827752e-05, "loss": 0.1826, "step": 260 }, { "epoch": 0.35, "learning_rate": 7.59871906077758e-05, "loss": 0.1876, "step": 261 }, { "epoch": 0.35, "learning_rate": 7.580311955079264e-05, "loss": 0.1852, "step": 262 }, { "epoch": 0.35, "learning_rate": 7.56185706064212e-05, "loss": 0.1837, "step": 263 }, { "epoch": 0.35, "learning_rate": 7.543354719260523e-05, "loss": 0.1813, "step": 264 }, { "epoch": 0.35, "learning_rate": 7.524805273607603e-05, "loss": 0.1899, "step": 265 }, { "epoch": 0.35, "learning_rate": 7.506209067228878e-05, "loss": 0.1878, "step": 266 }, { "epoch": 0.35, "learning_rate": 7.487566444535903e-05, "loss": 0.1852, "step": 267 }, { "epoch": 0.36, "learning_rate": 7.468877750799886e-05, "loss": 0.1879, "step": 268 }, { "epoch": 0.36, "learning_rate": 7.450143332145296e-05, "loss": 0.1868, "step": 269 }, { "epoch": 0.36, "learning_rate": 7.43136353554345e-05, "loss": 0.1851, "step": 270 }, { "epoch": 0.36, "learning_rate": 7.412538708806093e-05, "loss": 0.1835, "step": 271 }, { "epoch": 0.36, "learning_rate": 7.393669200578943e-05, "loss": 0.1855, "step": 272 }, { "epoch": 0.36, "learning_rate": 7.374755360335253e-05, "loss": 0.1827, "step": 273 }, { "epoch": 0.36, "learning_rate": 7.355797538369321e-05, "loss": 0.1889, "step": 274 }, { "epoch": 0.37, "learning_rate": 7.336796085790013e-05, "loss": 0.1799, "step": 275 }, { "epoch": 0.37, "learning_rate": 7.317751354514255e-05, "loss": 0.1863, "step": 276 }, { "epoch": 0.37, "learning_rate": 7.298663697260522e-05, "loss": 0.1855, "step": 277 }, { "epoch": 0.37, "learning_rate": 7.279533467542294e-05, "loss": 0.1851, "step": 278 }, { "epoch": 0.37, "learning_rate": 7.260361019661522e-05, "loss": 0.1851, "step": 279 }, { "epoch": 0.37, "learning_rate": 7.241146708702053e-05, "loss": 0.1856, "step": 280 }, { "epoch": 0.37, "learning_rate": 7.221890890523068e-05, "loss": 0.187, "step": 281 }, { "epoch": 0.37, "learning_rate": 7.202593921752475e-05, "loss": 0.1826, "step": 282 }, { "epoch": 0.38, "learning_rate": 7.18325615978032e-05, "loss": 0.1866, "step": 283 }, { "epoch": 0.38, "learning_rate": 7.163877962752157e-05, "loss": 0.1859, "step": 284 }, { "epoch": 0.38, "learning_rate": 7.144459689562418e-05, "loss": 0.1843, "step": 285 }, { "epoch": 0.38, "learning_rate": 7.12500169984777e-05, "loss": 0.1833, "step": 286 }, { "epoch": 0.38, "learning_rate": 7.105504353980448e-05, "loss": 0.1841, "step": 287 }, { "epoch": 0.38, "learning_rate": 7.085968013061584e-05, "loss": 0.185, "step": 288 }, { "epoch": 0.38, "learning_rate": 7.066393038914521e-05, "loss": 0.1838, "step": 289 }, { "epoch": 0.39, "learning_rate": 7.046779794078108e-05, "loss": 0.1852, "step": 290 }, { "epoch": 0.39, "learning_rate": 7.027128641799986e-05, "loss": 0.1832, "step": 291 }, { "epoch": 0.39, "learning_rate": 7.007439946029865e-05, "loss": 0.1899, "step": 292 }, { "epoch": 0.39, "learning_rate": 6.98771407141278e-05, "loss": 0.1841, "step": 293 }, { "epoch": 0.39, "learning_rate": 6.967951383282334e-05, "loss": 0.1869, "step": 294 }, { "epoch": 0.39, "learning_rate": 6.94815224765394e-05, "loss": 0.1847, "step": 295 }, { "epoch": 0.39, "learning_rate": 6.928317031218035e-05, "loss": 0.1848, "step": 296 }, { "epoch": 0.39, "learning_rate": 6.908446101333295e-05, "loss": 0.1849, "step": 297 }, { "epoch": 0.4, "learning_rate": 6.888539826019825e-05, "loss": 0.1829, "step": 298 }, { "epoch": 0.4, "learning_rate": 6.868598573952345e-05, "loss": 0.1849, "step": 299 }, { "epoch": 0.4, "learning_rate": 6.848622714453366e-05, "loss": 0.1844, "step": 300 }, { "epoch": 0.4, "eval_loss": 0.17592795193195343, "eval_runtime": 11.447, "eval_samples_per_second": 174.719, "eval_steps_per_second": 0.699, "step": 300 }, { "epoch": 0.4, "learning_rate": 6.828612617486347e-05, "loss": 0.1841, "step": 301 }, { "epoch": 0.4, "learning_rate": 6.808568653648838e-05, "loss": 0.1843, "step": 302 }, { "epoch": 0.4, "learning_rate": 6.78849119416563e-05, "loss": 0.1872, "step": 303 }, { "epoch": 0.4, "learning_rate": 6.76838061088186e-05, "loss": 0.1856, "step": 304 }, { "epoch": 0.41, "learning_rate": 6.748237276256143e-05, "loss": 0.1849, "step": 305 }, { "epoch": 0.41, "learning_rate": 6.728061563353667e-05, "loss": 0.1817, "step": 306 }, { "epoch": 0.41, "learning_rate": 6.707853845839278e-05, "loss": 0.1852, "step": 307 }, { "epoch": 0.41, "learning_rate": 6.687614497970566e-05, "loss": 0.1837, "step": 308 }, { "epoch": 0.41, "learning_rate": 6.667343894590935e-05, "loss": 0.1832, "step": 309 }, { "epoch": 0.41, "learning_rate": 6.647042411122652e-05, "loss": 0.1843, "step": 310 }, { "epoch": 0.41, "learning_rate": 6.626710423559901e-05, "loss": 0.1826, "step": 311 }, { "epoch": 0.41, "learning_rate": 6.606348308461823e-05, "loss": 0.1848, "step": 312 }, { "epoch": 0.42, "learning_rate": 6.585956442945532e-05, "loss": 0.1848, "step": 313 }, { "epoch": 0.42, "learning_rate": 6.565535204679134e-05, "loss": 0.1812, "step": 314 }, { "epoch": 0.42, "learning_rate": 6.545084971874738e-05, "loss": 0.186, "step": 315 }, { "epoch": 0.42, "learning_rate": 6.524606123281445e-05, "loss": 0.1813, "step": 316 }, { "epoch": 0.42, "learning_rate": 6.504099038178338e-05, "loss": 0.1814, "step": 317 }, { "epoch": 0.42, "learning_rate": 6.483564096367451e-05, "loss": 0.1849, "step": 318 }, { "epoch": 0.42, "learning_rate": 6.463001678166744e-05, "loss": 0.1855, "step": 319 }, { "epoch": 0.42, "learning_rate": 6.442412164403045e-05, "loss": 0.1865, "step": 320 }, { "epoch": 0.43, "learning_rate": 6.42179593640502e-05, "loss": 0.1828, "step": 321 }, { "epoch": 0.43, "learning_rate": 6.401153375996081e-05, "loss": 0.1834, "step": 322 }, { "epoch": 0.43, "learning_rate": 6.380484865487347e-05, "loss": 0.1817, "step": 323 }, { "epoch": 0.43, "learning_rate": 6.359790787670527e-05, "loss": 0.1855, "step": 324 }, { "epoch": 0.43, "learning_rate": 6.339071525810871e-05, "loss": 0.1837, "step": 325 }, { "epoch": 0.43, "learning_rate": 6.318327463640037e-05, "loss": 0.1845, "step": 326 }, { "epoch": 0.43, "learning_rate": 6.297558985348998e-05, "loss": 0.1817, "step": 327 }, { "epoch": 0.44, "learning_rate": 6.276766475580935e-05, "loss": 0.1869, "step": 328 }, { "epoch": 0.44, "learning_rate": 6.255950319424098e-05, "loss": 0.1838, "step": 329 }, { "epoch": 0.44, "learning_rate": 6.235110902404679e-05, "loss": 0.1851, "step": 330 }, { "epoch": 0.44, "learning_rate": 6.21424861047968e-05, "loss": 0.184, "step": 331 }, { "epoch": 0.44, "learning_rate": 6.193363830029751e-05, "loss": 0.1847, "step": 332 }, { "epoch": 0.44, "learning_rate": 6.172456947852049e-05, "loss": 0.1855, "step": 333 }, { "epoch": 0.44, "learning_rate": 6.151528351153061e-05, "loss": 0.1854, "step": 334 }, { "epoch": 0.44, "learning_rate": 6.130578427541441e-05, "loss": 0.183, "step": 335 }, { "epoch": 0.45, "learning_rate": 6.109607565020828e-05, "loss": 0.1794, "step": 336 }, { "epoch": 0.45, "learning_rate": 6.08861615198266e-05, "loss": 0.1869, "step": 337 }, { "epoch": 0.45, "learning_rate": 6.067604577198981e-05, "loss": 0.188, "step": 338 }, { "epoch": 0.45, "learning_rate": 6.046573229815243e-05, "loss": 0.1831, "step": 339 }, { "epoch": 0.45, "learning_rate": 6.025522499343097e-05, "loss": 0.1886, "step": 340 }, { "epoch": 0.45, "learning_rate": 6.004452775653178e-05, "loss": 0.1861, "step": 341 }, { "epoch": 0.45, "learning_rate": 5.9833644489678816e-05, "loss": 0.1817, "step": 342 }, { "epoch": 0.46, "learning_rate": 5.96225790985415e-05, "loss": 0.1863, "step": 343 }, { "epoch": 0.46, "learning_rate": 5.941133549216221e-05, "loss": 0.1807, "step": 344 }, { "epoch": 0.46, "learning_rate": 5.919991758288401e-05, "loss": 0.1846, "step": 345 }, { "epoch": 0.46, "learning_rate": 5.898832928627811e-05, "loss": 0.1828, "step": 346 }, { "epoch": 0.46, "learning_rate": 5.877657452107142e-05, "loss": 0.1823, "step": 347 }, { "epoch": 0.46, "learning_rate": 5.856465720907388e-05, "loss": 0.1843, "step": 348 }, { "epoch": 0.46, "learning_rate": 5.835258127510596e-05, "loss": 0.183, "step": 349 }, { "epoch": 0.46, "learning_rate": 5.8140350646925845e-05, "loss": 0.1842, "step": 350 }, { "epoch": 0.47, "learning_rate": 5.7927969255156736e-05, "loss": 0.1845, "step": 351 }, { "epoch": 0.47, "learning_rate": 5.771544103321407e-05, "loss": 0.1794, "step": 352 }, { "epoch": 0.47, "learning_rate": 5.7502769917232635e-05, "loss": 0.1782, "step": 353 }, { "epoch": 0.47, "learning_rate": 5.7289959845993736e-05, "loss": 0.187, "step": 354 }, { "epoch": 0.47, "learning_rate": 5.7077014760852164e-05, "loss": 0.1829, "step": 355 }, { "epoch": 0.47, "learning_rate": 5.686393860566324e-05, "loss": 0.1849, "step": 356 }, { "epoch": 0.47, "learning_rate": 5.66507353267098e-05, "loss": 0.1848, "step": 357 }, { "epoch": 0.48, "learning_rate": 5.643740887262905e-05, "loss": 0.1825, "step": 358 }, { "epoch": 0.48, "learning_rate": 5.6223963194339466e-05, "loss": 0.1856, "step": 359 }, { "epoch": 0.48, "learning_rate": 5.601040224496764e-05, "loss": 0.183, "step": 360 }, { "epoch": 0.48, "learning_rate": 5.579672997977503e-05, "loss": 0.1833, "step": 361 }, { "epoch": 0.48, "learning_rate": 5.5582950356084726e-05, "loss": 0.1821, "step": 362 }, { "epoch": 0.48, "learning_rate": 5.536906733320816e-05, "loss": 0.1823, "step": 363 }, { "epoch": 0.48, "learning_rate": 5.515508487237174e-05, "loss": 0.1845, "step": 364 }, { "epoch": 0.48, "learning_rate": 5.494100693664358e-05, "loss": 0.1825, "step": 365 }, { "epoch": 0.49, "learning_rate": 5.4726837490859964e-05, "loss": 0.181, "step": 366 }, { "epoch": 0.49, "learning_rate": 5.4512580501552056e-05, "loss": 0.1839, "step": 367 }, { "epoch": 0.49, "learning_rate": 5.429823993687233e-05, "loss": 0.1834, "step": 368 }, { "epoch": 0.49, "learning_rate": 5.4083819766521135e-05, "loss": 0.1843, "step": 369 }, { "epoch": 0.49, "learning_rate": 5.386932396167316e-05, "loss": 0.1827, "step": 370 }, { "epoch": 0.49, "learning_rate": 5.365475649490388e-05, "loss": 0.1844, "step": 371 }, { "epoch": 0.49, "learning_rate": 5.3440121340116e-05, "loss": 0.1854, "step": 372 }, { "epoch": 0.5, "learning_rate": 5.3225422472465824e-05, "loss": 0.1843, "step": 373 }, { "epoch": 0.5, "learning_rate": 5.3010663868289655e-05, "loss": 0.1815, "step": 374 }, { "epoch": 0.5, "learning_rate": 5.279584950503017e-05, "loss": 0.1811, "step": 375 }, { "epoch": 0.5, "learning_rate": 5.2580983361162696e-05, "loss": 0.1856, "step": 376 }, { "epoch": 0.5, "learning_rate": 5.23660694161216e-05, "loss": 0.1832, "step": 377 }, { "epoch": 0.5, "learning_rate": 5.215111165022652e-05, "loss": 0.1826, "step": 378 }, { "epoch": 0.5, "learning_rate": 5.193611404460873e-05, "loss": 0.1826, "step": 379 }, { "epoch": 0.5, "learning_rate": 5.172108058113729e-05, "loss": 0.1845, "step": 380 }, { "epoch": 0.51, "learning_rate": 5.1506015242345415e-05, "loss": 0.1815, "step": 381 }, { "epoch": 0.51, "learning_rate": 5.129092201135666e-05, "loss": 0.1848, "step": 382 }, { "epoch": 0.51, "learning_rate": 5.1075804871811115e-05, "loss": 0.1845, "step": 383 }, { "epoch": 0.51, "learning_rate": 5.086066780779174e-05, "loss": 0.184, "step": 384 }, { "epoch": 0.51, "learning_rate": 5.064551480375046e-05, "loss": 0.1851, "step": 385 }, { "epoch": 0.51, "learning_rate": 5.0430349844434424e-05, "loss": 0.187, "step": 386 }, { "epoch": 0.51, "learning_rate": 5.021517691481221e-05, "loss": 0.1817, "step": 387 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 0.1835, "step": 388 }, { "epoch": 0.52, "learning_rate": 4.978482308518779e-05, "loss": 0.1827, "step": 389 }, { "epoch": 0.52, "learning_rate": 4.956965015556559e-05, "loss": 0.1839, "step": 390 }, { "epoch": 0.52, "learning_rate": 4.9354485196249554e-05, "loss": 0.1828, "step": 391 }, { "epoch": 0.52, "learning_rate": 4.9139332192208276e-05, "loss": 0.1843, "step": 392 }, { "epoch": 0.52, "learning_rate": 4.892419512818889e-05, "loss": 0.187, "step": 393 }, { "epoch": 0.52, "learning_rate": 4.870907798864337e-05, "loss": 0.1799, "step": 394 }, { "epoch": 0.52, "learning_rate": 4.8493984757654596e-05, "loss": 0.1842, "step": 395 }, { "epoch": 0.53, "learning_rate": 4.827891941886273e-05, "loss": 0.183, "step": 396 }, { "epoch": 0.53, "learning_rate": 4.806388595539129e-05, "loss": 0.1813, "step": 397 }, { "epoch": 0.53, "learning_rate": 4.784888834977347e-05, "loss": 0.1822, "step": 398 }, { "epoch": 0.53, "learning_rate": 4.763393058387841e-05, "loss": 0.1836, "step": 399 }, { "epoch": 0.53, "learning_rate": 4.741901663883731e-05, "loss": 0.1853, "step": 400 }, { "epoch": 0.53, "eval_loss": 0.1743704080581665, "eval_runtime": 12.7542, "eval_samples_per_second": 156.811, "eval_steps_per_second": 0.627, "step": 400 }, { "epoch": 0.53, "learning_rate": 4.720415049496984e-05, "loss": 0.1851, "step": 401 }, { "epoch": 0.53, "learning_rate": 4.698933613171035e-05, "loss": 0.1835, "step": 402 }, { "epoch": 0.54, "learning_rate": 4.6774577527534195e-05, "loss": 0.185, "step": 403 }, { "epoch": 0.54, "learning_rate": 4.655987865988401e-05, "loss": 0.1858, "step": 404 }, { "epoch": 0.54, "learning_rate": 4.634524350509613e-05, "loss": 0.1825, "step": 405 }, { "epoch": 0.54, "learning_rate": 4.613067603832685e-05, "loss": 0.1885, "step": 406 }, { "epoch": 0.54, "learning_rate": 4.5916180233478883e-05, "loss": 0.1833, "step": 407 }, { "epoch": 0.54, "learning_rate": 4.5701760063127686e-05, "loss": 0.1862, "step": 408 }, { "epoch": 0.54, "learning_rate": 4.548741949844795e-05, "loss": 0.1837, "step": 409 }, { "epoch": 0.54, "learning_rate": 4.527316250914004e-05, "loss": 0.1873, "step": 410 }, { "epoch": 0.55, "learning_rate": 4.505899306335643e-05, "loss": 0.1787, "step": 411 }, { "epoch": 0.55, "learning_rate": 4.484491512762827e-05, "loss": 0.1883, "step": 412 }, { "epoch": 0.55, "learning_rate": 4.463093266679185e-05, "loss": 0.1806, "step": 413 }, { "epoch": 0.55, "learning_rate": 4.4417049643915286e-05, "loss": 0.183, "step": 414 }, { "epoch": 0.55, "learning_rate": 4.420327002022498e-05, "loss": 0.1831, "step": 415 }, { "epoch": 0.55, "learning_rate": 4.398959775503238e-05, "loss": 0.183, "step": 416 }, { "epoch": 0.55, "learning_rate": 4.377603680566054e-05, "loss": 0.1782, "step": 417 }, { "epoch": 0.56, "learning_rate": 4.356259112737096e-05, "loss": 0.1871, "step": 418 }, { "epoch": 0.56, "learning_rate": 4.334926467329021e-05, "loss": 0.1825, "step": 419 }, { "epoch": 0.56, "learning_rate": 4.313606139433676e-05, "loss": 0.1838, "step": 420 }, { "epoch": 0.56, "learning_rate": 4.292298523914785e-05, "loss": 0.1816, "step": 421 }, { "epoch": 0.56, "learning_rate": 4.271004015400627e-05, "loss": 0.1824, "step": 422 }, { "epoch": 0.56, "learning_rate": 4.249723008276737e-05, "loss": 0.182, "step": 423 }, { "epoch": 0.56, "learning_rate": 4.228455896678595e-05, "loss": 0.1817, "step": 424 }, { "epoch": 0.56, "learning_rate": 4.207203074484328e-05, "loss": 0.1808, "step": 425 }, { "epoch": 0.57, "learning_rate": 4.185964935307417e-05, "loss": 0.1776, "step": 426 }, { "epoch": 0.57, "learning_rate": 4.164741872489405e-05, "loss": 0.1831, "step": 427 }, { "epoch": 0.57, "learning_rate": 4.143534279092612e-05, "loss": 0.18, "step": 428 }, { "epoch": 0.57, "learning_rate": 4.1223425478928594e-05, "loss": 0.1816, "step": 429 }, { "epoch": 0.57, "learning_rate": 4.1011670713721905e-05, "loss": 0.1811, "step": 430 }, { "epoch": 0.57, "learning_rate": 4.0800082417115995e-05, "loss": 0.1805, "step": 431 }, { "epoch": 0.57, "learning_rate": 4.05886645078378e-05, "loss": 0.1814, "step": 432 }, { "epoch": 0.58, "learning_rate": 4.0377420901458506e-05, "loss": 0.1798, "step": 433 }, { "epoch": 0.58, "learning_rate": 4.0166355510321195e-05, "loss": 0.1826, "step": 434 }, { "epoch": 0.58, "learning_rate": 3.995547224346824e-05, "loss": 0.1818, "step": 435 }, { "epoch": 0.58, "learning_rate": 3.9744775006569057e-05, "loss": 0.1812, "step": 436 }, { "epoch": 0.58, "learning_rate": 3.953426770184757e-05, "loss": 0.1824, "step": 437 }, { "epoch": 0.58, "learning_rate": 3.9323954228010196e-05, "loss": 0.181, "step": 438 }, { "epoch": 0.58, "learning_rate": 3.911383848017341e-05, "loss": 0.1829, "step": 439 }, { "epoch": 0.58, "learning_rate": 3.890392434979172e-05, "loss": 0.1849, "step": 440 }, { "epoch": 0.59, "learning_rate": 3.8694215724585594e-05, "loss": 0.1821, "step": 441 }, { "epoch": 0.59, "learning_rate": 3.848471648846939e-05, "loss": 0.1822, "step": 442 }, { "epoch": 0.59, "learning_rate": 3.827543052147952e-05, "loss": 0.1866, "step": 443 }, { "epoch": 0.59, "learning_rate": 3.8066361699702495e-05, "loss": 0.1826, "step": 444 }, { "epoch": 0.59, "learning_rate": 3.785751389520323e-05, "loss": 0.1825, "step": 445 }, { "epoch": 0.59, "learning_rate": 3.764889097595322e-05, "loss": 0.1813, "step": 446 }, { "epoch": 0.59, "learning_rate": 3.744049680575905e-05, "loss": 0.1817, "step": 447 }, { "epoch": 0.59, "learning_rate": 3.7232335244190656e-05, "loss": 0.1799, "step": 448 }, { "epoch": 0.6, "learning_rate": 3.702441014651001e-05, "loss": 0.182, "step": 449 }, { "epoch": 0.6, "learning_rate": 3.6816725363599644e-05, "loss": 0.18, "step": 450 }, { "epoch": 0.6, "learning_rate": 3.6609284741891295e-05, "loss": 0.1793, "step": 451 }, { "epoch": 0.6, "learning_rate": 3.640209212329473e-05, "loss": 0.1801, "step": 452 }, { "epoch": 0.6, "learning_rate": 3.619515134512656e-05, "loss": 0.1798, "step": 453 }, { "epoch": 0.6, "learning_rate": 3.5988466240039206e-05, "loss": 0.179, "step": 454 }, { "epoch": 0.6, "learning_rate": 3.578204063594982e-05, "loss": 0.1827, "step": 455 }, { "epoch": 0.61, "learning_rate": 3.5575878355969566e-05, "loss": 0.1843, "step": 456 }, { "epoch": 0.61, "learning_rate": 3.536998321833258e-05, "loss": 0.1821, "step": 457 }, { "epoch": 0.61, "learning_rate": 3.516435903632548e-05, "loss": 0.182, "step": 458 }, { "epoch": 0.61, "learning_rate": 3.495900961821662e-05, "loss": 0.1792, "step": 459 }, { "epoch": 0.61, "learning_rate": 3.475393876718555e-05, "loss": 0.1836, "step": 460 }, { "epoch": 0.61, "learning_rate": 3.4549150281252636e-05, "loss": 0.1849, "step": 461 }, { "epoch": 0.61, "learning_rate": 3.4344647953208675e-05, "loss": 0.1823, "step": 462 }, { "epoch": 0.61, "learning_rate": 3.41404355705447e-05, "loss": 0.1785, "step": 463 }, { "epoch": 0.62, "learning_rate": 3.393651691538178e-05, "loss": 0.181, "step": 464 }, { "epoch": 0.62, "learning_rate": 3.3732895764401004e-05, "loss": 0.1817, "step": 465 }, { "epoch": 0.62, "learning_rate": 3.35295758887735e-05, "loss": 0.1807, "step": 466 }, { "epoch": 0.62, "learning_rate": 3.332656105409066e-05, "loss": 0.1805, "step": 467 }, { "epoch": 0.62, "learning_rate": 3.312385502029434e-05, "loss": 0.1839, "step": 468 }, { "epoch": 0.62, "learning_rate": 3.2921461541607225e-05, "loss": 0.1811, "step": 469 }, { "epoch": 0.62, "learning_rate": 3.271938436646334e-05, "loss": 0.1829, "step": 470 }, { "epoch": 0.63, "learning_rate": 3.251762723743858e-05, "loss": 0.1818, "step": 471 }, { "epoch": 0.63, "learning_rate": 3.231619389118144e-05, "loss": 0.1798, "step": 472 }, { "epoch": 0.63, "learning_rate": 3.2115088058343725e-05, "loss": 0.1813, "step": 473 }, { "epoch": 0.63, "learning_rate": 3.191431346351164e-05, "loss": 0.18, "step": 474 }, { "epoch": 0.63, "learning_rate": 3.171387382513654e-05, "loss": 0.1822, "step": 475 }, { "epoch": 0.63, "learning_rate": 3.151377285546635e-05, "loss": 0.1823, "step": 476 }, { "epoch": 0.63, "learning_rate": 3.1314014260476553e-05, "loss": 0.1826, "step": 477 }, { "epoch": 0.63, "learning_rate": 3.111460173980175e-05, "loss": 0.1785, "step": 478 }, { "epoch": 0.64, "learning_rate": 3.091553898666705e-05, "loss": 0.1844, "step": 479 }, { "epoch": 0.64, "learning_rate": 3.0716829687819643e-05, "loss": 0.1809, "step": 480 }, { "epoch": 0.64, "learning_rate": 3.051847752346061e-05, "loss": 0.182, "step": 481 }, { "epoch": 0.64, "learning_rate": 3.0320486167176664e-05, "loss": 0.1792, "step": 482 }, { "epoch": 0.64, "learning_rate": 3.0122859285872214e-05, "loss": 0.183, "step": 483 }, { "epoch": 0.64, "learning_rate": 2.9925600539701347e-05, "loss": 0.1814, "step": 484 }, { "epoch": 0.64, "learning_rate": 2.972871358200015e-05, "loss": 0.1817, "step": 485 }, { "epoch": 0.65, "learning_rate": 2.9532202059218933e-05, "loss": 0.1777, "step": 486 }, { "epoch": 0.65, "learning_rate": 2.9336069610854788e-05, "loss": 0.1837, "step": 487 }, { "epoch": 0.65, "learning_rate": 2.914031986938417e-05, "loss": 0.1805, "step": 488 }, { "epoch": 0.65, "learning_rate": 2.8944956460195515e-05, "loss": 0.18, "step": 489 }, { "epoch": 0.65, "learning_rate": 2.8749983001522302e-05, "loss": 0.18, "step": 490 }, { "epoch": 0.65, "learning_rate": 2.8555403104375827e-05, "loss": 0.1798, "step": 491 }, { "epoch": 0.65, "learning_rate": 2.8361220372478446e-05, "loss": 0.1829, "step": 492 }, { "epoch": 0.65, "learning_rate": 2.8167438402196805e-05, "loss": 0.1796, "step": 493 }, { "epoch": 0.66, "learning_rate": 2.7974060782475258e-05, "loss": 0.1786, "step": 494 }, { "epoch": 0.66, "learning_rate": 2.778109109476934e-05, "loss": 0.1827, "step": 495 }, { "epoch": 0.66, "learning_rate": 2.7588532912979483e-05, "loss": 0.1789, "step": 496 }, { "epoch": 0.66, "learning_rate": 2.739638980338479e-05, "loss": 0.1817, "step": 497 }, { "epoch": 0.66, "learning_rate": 2.720466532457707e-05, "loss": 0.1815, "step": 498 }, { "epoch": 0.66, "learning_rate": 2.70133630273948e-05, "loss": 0.1788, "step": 499 }, { "epoch": 0.66, "learning_rate": 2.6822486454857453e-05, "loss": 0.1802, "step": 500 }, { "epoch": 0.66, "eval_loss": 0.1759687066078186, "eval_runtime": 12.5156, "eval_samples_per_second": 159.801, "eval_steps_per_second": 0.639, "step": 500 }, { "epoch": 0.67, "learning_rate": 2.6632039142099896e-05, "loss": 0.1804, "step": 501 }, { "epoch": 0.67, "learning_rate": 2.6442024616306804e-05, "loss": 0.1814, "step": 502 }, { "epoch": 0.67, "learning_rate": 2.62524463966475e-05, "loss": 0.1806, "step": 503 }, { "epoch": 0.67, "learning_rate": 2.6063307994210584e-05, "loss": 0.1818, "step": 504 }, { "epoch": 0.67, "learning_rate": 2.5874612911939094e-05, "loss": 0.1796, "step": 505 }, { "epoch": 0.67, "learning_rate": 2.5686364644565486e-05, "loss": 0.1797, "step": 506 }, { "epoch": 0.67, "learning_rate": 2.5498566678547042e-05, "loss": 0.1803, "step": 507 }, { "epoch": 0.67, "learning_rate": 2.531122249200114e-05, "loss": 0.1814, "step": 508 }, { "epoch": 0.68, "learning_rate": 2.5124335554640967e-05, "loss": 0.1796, "step": 509 }, { "epoch": 0.68, "learning_rate": 2.493790932771122e-05, "loss": 0.181, "step": 510 }, { "epoch": 0.68, "learning_rate": 2.4751947263923982e-05, "loss": 0.1802, "step": 511 }, { "epoch": 0.68, "learning_rate": 2.4566452807394785e-05, "loss": 0.1806, "step": 512 }, { "epoch": 0.68, "learning_rate": 2.438142939357882e-05, "loss": 0.1833, "step": 513 }, { "epoch": 0.68, "learning_rate": 2.4196880449207366e-05, "loss": 0.1803, "step": 514 }, { "epoch": 0.68, "learning_rate": 2.4012809392224227e-05, "loss": 0.1813, "step": 515 }, { "epoch": 0.69, "learning_rate": 2.38292196317225e-05, "loss": 0.1845, "step": 516 }, { "epoch": 0.69, "learning_rate": 2.3646114567881393e-05, "loss": 0.1793, "step": 517 }, { "epoch": 0.69, "learning_rate": 2.3463497591903322e-05, "loss": 0.1806, "step": 518 }, { "epoch": 0.69, "learning_rate": 2.328137208595107e-05, "loss": 0.1822, "step": 519 }, { "epoch": 0.69, "learning_rate": 2.309974142308502e-05, "loss": 0.179, "step": 520 }, { "epoch": 0.69, "learning_rate": 2.291860896720094e-05, "loss": 0.181, "step": 521 }, { "epoch": 0.69, "learning_rate": 2.273797807296744e-05, "loss": 0.1832, "step": 522 }, { "epoch": 0.69, "learning_rate": 2.2557852085764053e-05, "loss": 0.1801, "step": 523 }, { "epoch": 0.7, "learning_rate": 2.237823434161902e-05, "loss": 0.1807, "step": 524 }, { "epoch": 0.7, "learning_rate": 2.2199128167147785e-05, "loss": 0.1812, "step": 525 }, { "epoch": 0.7, "learning_rate": 2.2020536879491167e-05, "loss": 0.1777, "step": 526 }, { "epoch": 0.7, "learning_rate": 2.184246378625407e-05, "loss": 0.1801, "step": 527 }, { "epoch": 0.7, "learning_rate": 2.1664912185444124e-05, "loss": 0.1759, "step": 528 }, { "epoch": 0.7, "learning_rate": 2.1487885365410644e-05, "loss": 0.184, "step": 529 }, { "epoch": 0.7, "learning_rate": 2.1311386604783763e-05, "loss": 0.1844, "step": 530 }, { "epoch": 0.71, "learning_rate": 2.113541917241364e-05, "loss": 0.1787, "step": 531 }, { "epoch": 0.71, "learning_rate": 2.0959986327309968e-05, "loss": 0.1798, "step": 532 }, { "epoch": 0.71, "learning_rate": 2.0785091318581577e-05, "loss": 0.178, "step": 533 }, { "epoch": 0.71, "learning_rate": 2.061073738537635e-05, "loss": 0.18, "step": 534 }, { "epoch": 0.71, "learning_rate": 2.0436927756821094e-05, "loss": 0.1792, "step": 535 }, { "epoch": 0.71, "learning_rate": 2.0263665651961834e-05, "loss": 0.1813, "step": 536 }, { "epoch": 0.71, "learning_rate": 2.0090954279704183e-05, "loss": 0.1829, "step": 537 }, { "epoch": 0.71, "learning_rate": 1.9918796838753863e-05, "loss": 0.1773, "step": 538 }, { "epoch": 0.72, "learning_rate": 1.974719651755756e-05, "loss": 0.1797, "step": 539 }, { "epoch": 0.72, "learning_rate": 1.957615649424369e-05, "loss": 0.1785, "step": 540 }, { "epoch": 0.72, "learning_rate": 1.9405679936563786e-05, "loss": 0.1825, "step": 541 }, { "epoch": 0.72, "learning_rate": 1.9235770001833607e-05, "loss": 0.1785, "step": 542 }, { "epoch": 0.72, "learning_rate": 1.9066429836874844e-05, "loss": 0.176, "step": 543 }, { "epoch": 0.72, "learning_rate": 1.889766257795663e-05, "loss": 0.1788, "step": 544 }, { "epoch": 0.72, "learning_rate": 1.8729471350737693e-05, "loss": 0.1807, "step": 545 }, { "epoch": 0.73, "learning_rate": 1.856185927020827e-05, "loss": 0.1799, "step": 546 }, { "epoch": 0.73, "learning_rate": 1.8394829440632565e-05, "loss": 0.182, "step": 547 }, { "epoch": 0.73, "learning_rate": 1.8228384955491135e-05, "loss": 0.1825, "step": 548 }, { "epoch": 0.73, "learning_rate": 1.806252889742364e-05, "loss": 0.1755, "step": 549 }, { "epoch": 0.73, "learning_rate": 1.789726433817182e-05, "loss": 0.1777, "step": 550 }, { "epoch": 0.73, "learning_rate": 1.7732594338522495e-05, "loss": 0.1823, "step": 551 }, { "epoch": 0.73, "learning_rate": 1.756852194825094e-05, "loss": 0.1793, "step": 552 }, { "epoch": 0.73, "learning_rate": 1.7405050206064373e-05, "loss": 0.1794, "step": 553 }, { "epoch": 0.74, "learning_rate": 1.7242182139545744e-05, "loss": 0.1745, "step": 554 }, { "epoch": 0.74, "learning_rate": 1.7079920765097563e-05, "loss": 0.1771, "step": 555 }, { "epoch": 0.74, "learning_rate": 1.691826908788608e-05, "loss": 0.1755, "step": 556 }, { "epoch": 0.74, "learning_rate": 1.675723010178568e-05, "loss": 0.1806, "step": 557 }, { "epoch": 0.74, "learning_rate": 1.6596806789323315e-05, "loss": 0.1818, "step": 558 }, { "epoch": 0.74, "learning_rate": 1.6437002121623436e-05, "loss": 0.1838, "step": 559 }, { "epoch": 0.74, "learning_rate": 1.6277819058352727e-05, "loss": 0.1775, "step": 560 }, { "epoch": 0.75, "learning_rate": 1.611926054766556e-05, "loss": 0.1781, "step": 561 }, { "epoch": 0.75, "learning_rate": 1.596132952614918e-05, "loss": 0.1791, "step": 562 }, { "epoch": 0.75, "learning_rate": 1.5804028918769485e-05, "loss": 0.1794, "step": 563 }, { "epoch": 0.75, "learning_rate": 1.5647361638816655e-05, "loss": 0.1817, "step": 564 }, { "epoch": 0.75, "learning_rate": 1.549133058785141e-05, "loss": 0.1797, "step": 565 }, { "epoch": 0.75, "learning_rate": 1.5335938655651122e-05, "loss": 0.1815, "step": 566 }, { "epoch": 0.75, "learning_rate": 1.5181188720156392e-05, "loss": 0.1824, "step": 567 }, { "epoch": 0.75, "learning_rate": 1.5027083647417655e-05, "loss": 0.1785, "step": 568 }, { "epoch": 0.76, "learning_rate": 1.4873626291542148e-05, "loss": 0.1811, "step": 569 }, { "epoch": 0.76, "learning_rate": 1.4720819494641109e-05, "loss": 0.1794, "step": 570 }, { "epoch": 0.76, "learning_rate": 1.4568666086777021e-05, "loss": 0.183, "step": 571 }, { "epoch": 0.76, "learning_rate": 1.4417168885911276e-05, "loss": 0.1773, "step": 572 }, { "epoch": 0.76, "learning_rate": 1.4266330697851954e-05, "loss": 0.176, "step": 573 }, { "epoch": 0.76, "learning_rate": 1.4116154316201907e-05, "loss": 0.1754, "step": 574 }, { "epoch": 0.76, "learning_rate": 1.3966642522306956e-05, "loss": 0.1771, "step": 575 }, { "epoch": 0.76, "learning_rate": 1.381779808520438e-05, "loss": 0.1816, "step": 576 }, { "epoch": 0.77, "learning_rate": 1.3669623761571726e-05, "loss": 0.1799, "step": 577 }, { "epoch": 0.77, "learning_rate": 1.3522122295675616e-05, "loss": 0.1761, "step": 578 }, { "epoch": 0.77, "learning_rate": 1.337529641932107e-05, "loss": 0.1803, "step": 579 }, { "epoch": 0.77, "learning_rate": 1.3229148851800721e-05, "loss": 0.1811, "step": 580 }, { "epoch": 0.77, "learning_rate": 1.308368229984468e-05, "loss": 0.1794, "step": 581 }, { "epoch": 0.77, "learning_rate": 1.2938899457570208e-05, "loss": 0.1805, "step": 582 }, { "epoch": 0.77, "learning_rate": 1.2794803006431982e-05, "loss": 0.1784, "step": 583 }, { "epoch": 0.78, "learning_rate": 1.265139561517224e-05, "loss": 0.1807, "step": 584 }, { "epoch": 0.78, "learning_rate": 1.2508679939771583e-05, "loss": 0.1771, "step": 585 }, { "epoch": 0.78, "learning_rate": 1.2366658623399584e-05, "loss": 0.1801, "step": 586 }, { "epoch": 0.78, "learning_rate": 1.2225334296365987e-05, "loss": 0.1794, "step": 587 }, { "epoch": 0.78, "learning_rate": 1.2084709576071884e-05, "loss": 0.1778, "step": 588 }, { "epoch": 0.78, "learning_rate": 1.1944787066961265e-05, "loss": 0.1818, "step": 589 }, { "epoch": 0.78, "learning_rate": 1.1805569360472868e-05, "loss": 0.181, "step": 590 }, { "epoch": 0.78, "learning_rate": 1.166705903499205e-05, "loss": 0.1812, "step": 591 }, { "epoch": 0.79, "learning_rate": 1.1529258655803116e-05, "loss": 0.1804, "step": 592 }, { "epoch": 0.79, "learning_rate": 1.1392170775041788e-05, "loss": 0.1799, "step": 593 }, { "epoch": 0.79, "learning_rate": 1.1255797931647971e-05, "loss": 0.1758, "step": 594 }, { "epoch": 0.79, "learning_rate": 1.1120142651318665e-05, "loss": 0.1787, "step": 595 }, { "epoch": 0.79, "learning_rate": 1.0985207446461221e-05, "loss": 0.1805, "step": 596 }, { "epoch": 0.79, "learning_rate": 1.0850994816146858e-05, "loss": 0.1797, "step": 597 }, { "epoch": 0.79, "learning_rate": 1.0717507246064273e-05, "loss": 0.1815, "step": 598 }, { "epoch": 0.8, "learning_rate": 1.0584747208473739e-05, "loss": 0.1799, "step": 599 }, { "epoch": 0.8, "learning_rate": 1.0452717162161141e-05, "loss": 0.1798, "step": 600 }, { "epoch": 0.8, "eval_loss": 0.1740500032901764, "eval_runtime": 12.3559, "eval_samples_per_second": 161.866, "eval_steps_per_second": 0.647, "step": 600 }, { "epoch": 0.8, "learning_rate": 1.0321419552392636e-05, "loss": 0.1745, "step": 601 }, { "epoch": 0.8, "learning_rate": 1.0190856810869215e-05, "loss": 0.1753, "step": 602 }, { "epoch": 0.8, "learning_rate": 1.0061031355681766e-05, "loss": 0.1838, "step": 603 }, { "epoch": 0.8, "learning_rate": 9.931945591266174e-06, "loss": 0.1854, "step": 604 }, { "epoch": 0.8, "learning_rate": 9.803601908358944e-06, "loss": 0.1819, "step": 605 }, { "epoch": 0.8, "learning_rate": 9.676002683952768e-06, "loss": 0.1781, "step": 606 }, { "epoch": 0.81, "learning_rate": 9.549150281252633e-06, "loss": 0.1789, "step": 607 }, { "epoch": 0.81, "learning_rate": 9.423047049631956e-06, "loss": 0.1802, "step": 608 }, { "epoch": 0.81, "learning_rate": 9.297695324589106e-06, "loss": 0.1771, "step": 609 }, { "epoch": 0.81, "learning_rate": 9.173097427704203e-06, "loss": 0.1806, "step": 610 }, { "epoch": 0.81, "learning_rate": 9.049255666596012e-06, "loss": 0.1805, "step": 611 }, { "epoch": 0.81, "learning_rate": 8.926172334879296e-06, "loss": 0.178, "step": 612 }, { "epoch": 0.81, "learning_rate": 8.803849712122292e-06, "loss": 0.1794, "step": 613 }, { "epoch": 0.82, "learning_rate": 8.682290063804526e-06, "loss": 0.1736, "step": 614 }, { "epoch": 0.82, "learning_rate": 8.561495641274825e-06, "loss": 0.1759, "step": 615 }, { "epoch": 0.82, "learning_rate": 8.441468681709602e-06, "loss": 0.1752, "step": 616 }, { "epoch": 0.82, "learning_rate": 8.322211408071512e-06, "loss": 0.181, "step": 617 }, { "epoch": 0.82, "learning_rate": 8.203726029068148e-06, "loss": 0.1796, "step": 618 }, { "epoch": 0.82, "learning_rate": 8.086014739111297e-06, "loss": 0.1765, "step": 619 }, { "epoch": 0.82, "learning_rate": 7.969079718276096e-06, "loss": 0.1818, "step": 620 }, { "epoch": 0.82, "learning_rate": 7.852923132260864e-06, "loss": 0.1747, "step": 621 }, { "epoch": 0.83, "learning_rate": 7.73754713234684e-06, "loss": 0.1779, "step": 622 }, { "epoch": 0.83, "learning_rate": 7.6229538553584556e-06, "loss": 0.1817, "step": 623 }, { "epoch": 0.83, "learning_rate": 7.509145423623609e-06, "loss": 0.1778, "step": 624 }, { "epoch": 0.83, "learning_rate": 7.3961239449345355e-06, "loss": 0.1847, "step": 625 }, { "epoch": 0.83, "learning_rate": 7.2838915125086504e-06, "loss": 0.1789, "step": 626 }, { "epoch": 0.83, "learning_rate": 7.172450204949821e-06, "loss": 0.1795, "step": 627 }, { "epoch": 0.83, "learning_rate": 7.061802086209857e-06, "loss": 0.1791, "step": 628 }, { "epoch": 0.84, "learning_rate": 6.951949205550284e-06, "loss": 0.1826, "step": 629 }, { "epoch": 0.84, "learning_rate": 6.842893597504435e-06, "loss": 0.177, "step": 630 }, { "epoch": 0.84, "learning_rate": 6.734637281839701e-06, "loss": 0.1802, "step": 631 }, { "epoch": 0.84, "learning_rate": 6.627182263520165e-06, "loss": 0.1785, "step": 632 }, { "epoch": 0.84, "learning_rate": 6.52053053266945e-06, "loss": 0.1793, "step": 633 }, { "epoch": 0.84, "learning_rate": 6.41468406453391e-06, "loss": 0.1798, "step": 634 }, { "epoch": 0.84, "learning_rate": 6.30964481944597e-06, "loss": 0.1783, "step": 635 }, { "epoch": 0.84, "learning_rate": 6.205414742787852e-06, "loss": 0.1783, "step": 636 }, { "epoch": 0.85, "learning_rate": 6.1019957649555985e-06, "loss": 0.1762, "step": 637 }, { "epoch": 0.85, "learning_rate": 5.999389801323219e-06, "loss": 0.1808, "step": 638 }, { "epoch": 0.85, "learning_rate": 5.897598752207328e-06, "loss": 0.1756, "step": 639 }, { "epoch": 0.85, "learning_rate": 5.796624502831821e-06, "loss": 0.1791, "step": 640 }, { "epoch": 0.85, "learning_rate": 5.696468923293108e-06, "loss": 0.1774, "step": 641 }, { "epoch": 0.85, "learning_rate": 5.59713386852534e-06, "loss": 0.1781, "step": 642 }, { "epoch": 0.85, "learning_rate": 5.498621178266167e-06, "loss": 0.1763, "step": 643 }, { "epoch": 0.86, "learning_rate": 5.40093267702258e-06, "loss": 0.1749, "step": 644 }, { "epoch": 0.86, "learning_rate": 5.304070174037146e-06, "loss": 0.1769, "step": 645 }, { "epoch": 0.86, "learning_rate": 5.208035463254557e-06, "loss": 0.1763, "step": 646 }, { "epoch": 0.86, "learning_rate": 5.112830323288314e-06, "loss": 0.1813, "step": 647 }, { "epoch": 0.86, "learning_rate": 5.018456517387837e-06, "loss": 0.1764, "step": 648 }, { "epoch": 0.86, "learning_rate": 4.924915793405799e-06, "loss": 0.1795, "step": 649 }, { "epoch": 0.86, "learning_rate": 4.832209883765782e-06, "loss": 0.1787, "step": 650 }, { "epoch": 0.86, "learning_rate": 4.74034050543013e-06, "loss": 0.1728, "step": 651 }, { "epoch": 0.87, "learning_rate": 4.649309359868209e-06, "loss": 0.1754, "step": 652 }, { "epoch": 0.87, "learning_rate": 4.559118133024853e-06, "loss": 0.1747, "step": 653 }, { "epoch": 0.87, "learning_rate": 4.4697684952891895e-06, "loss": 0.1775, "step": 654 }, { "epoch": 0.87, "learning_rate": 4.38126210146364e-06, "loss": 0.1758, "step": 655 }, { "epoch": 0.87, "learning_rate": 4.29360059073331e-06, "loss": 0.182, "step": 656 }, { "epoch": 0.87, "learning_rate": 4.2067855866356475e-06, "loss": 0.1763, "step": 657 }, { "epoch": 0.87, "learning_rate": 4.12081869703031e-06, "loss": 0.1756, "step": 658 }, { "epoch": 0.88, "learning_rate": 4.035701514069484e-06, "loss": 0.1768, "step": 659 }, { "epoch": 0.88, "learning_rate": 3.951435614168275e-06, "loss": 0.18, "step": 660 }, { "epoch": 0.88, "learning_rate": 3.868022557975626e-06, "loss": 0.1781, "step": 661 }, { "epoch": 0.88, "learning_rate": 3.7854638903453367e-06, "loss": 0.178, "step": 662 }, { "epoch": 0.88, "learning_rate": 3.70376114030751e-06, "loss": 0.1777, "step": 663 }, { "epoch": 0.88, "learning_rate": 3.6229158210401737e-06, "loss": 0.1773, "step": 664 }, { "epoch": 0.88, "learning_rate": 3.5429294298412853e-06, "loss": 0.1762, "step": 665 }, { "epoch": 0.88, "learning_rate": 3.463803448101033e-06, "loss": 0.179, "step": 666 }, { "epoch": 0.89, "learning_rate": 3.3855393412743352e-06, "loss": 0.1788, "step": 667 }, { "epoch": 0.89, "learning_rate": 3.308138558853746e-06, "loss": 0.1796, "step": 668 }, { "epoch": 0.89, "learning_rate": 3.231602534342587e-06, "loss": 0.1791, "step": 669 }, { "epoch": 0.89, "learning_rate": 3.15593268522843e-06, "loss": 0.1765, "step": 670 }, { "epoch": 0.89, "learning_rate": 3.081130412956795e-06, "loss": 0.1771, "step": 671 }, { "epoch": 0.89, "learning_rate": 3.0071971029052348e-06, "loss": 0.1765, "step": 672 }, { "epoch": 0.89, "learning_rate": 2.934134124357646e-06, "loss": 0.1746, "step": 673 }, { "epoch": 0.9, "learning_rate": 2.86194283047897e-06, "loss": 0.177, "step": 674 }, { "epoch": 0.9, "learning_rate": 2.7906245582900338e-06, "loss": 0.1771, "step": 675 }, { "epoch": 0.9, "learning_rate": 2.720180628642871e-06, "loss": 0.1756, "step": 676 }, { "epoch": 0.9, "learning_rate": 2.6506123461962408e-06, "loss": 0.1767, "step": 677 }, { "epoch": 0.9, "learning_rate": 2.5819209993914184e-06, "loss": 0.1774, "step": 678 }, { "epoch": 0.9, "learning_rate": 2.5141078604284105e-06, "loss": 0.1796, "step": 679 }, { "epoch": 0.9, "learning_rate": 2.4471741852423237e-06, "loss": 0.1821, "step": 680 }, { "epoch": 0.9, "learning_rate": 2.381121213480131e-06, "loss": 0.1815, "step": 681 }, { "epoch": 0.91, "learning_rate": 2.3159501684777207e-06, "loss": 0.1796, "step": 682 }, { "epoch": 0.91, "learning_rate": 2.2516622572372414e-06, "loss": 0.1771, "step": 683 }, { "epoch": 0.91, "learning_rate": 2.188258670404719e-06, "loss": 0.1796, "step": 684 }, { "epoch": 0.91, "learning_rate": 2.125740582248037e-06, "loss": 0.1805, "step": 685 }, { "epoch": 0.91, "learning_rate": 2.0641091506351796e-06, "loss": 0.1793, "step": 686 }, { "epoch": 0.91, "learning_rate": 2.003365517012773e-06, "loss": 0.1804, "step": 687 }, { "epoch": 0.91, "learning_rate": 1.943510806384968e-06, "loss": 0.1777, "step": 688 }, { "epoch": 0.92, "learning_rate": 1.884546127292569e-06, "loss": 0.176, "step": 689 }, { "epoch": 0.92, "learning_rate": 1.8264725717925591e-06, "loss": 0.1745, "step": 690 }, { "epoch": 0.92, "learning_rate": 1.7692912154378294e-06, "loss": 0.1754, "step": 691 }, { "epoch": 0.92, "learning_rate": 1.7130031172572614e-06, "loss": 0.1777, "step": 692 }, { "epoch": 0.92, "learning_rate": 1.6576093197361253e-06, "loss": 0.1799, "step": 693 }, { "epoch": 0.92, "learning_rate": 1.603110848796785e-06, "loss": 0.1751, "step": 694 }, { "epoch": 0.92, "learning_rate": 1.549508713779696e-06, "loss": 0.1756, "step": 695 }, { "epoch": 0.92, "learning_rate": 1.4968039074246486e-06, "loss": 0.1772, "step": 696 }, { "epoch": 0.93, "learning_rate": 1.444997405852483e-06, "loss": 0.1752, "step": 697 }, { "epoch": 0.93, "learning_rate": 1.3940901685469298e-06, "loss": 0.181, "step": 698 }, { "epoch": 0.93, "learning_rate": 1.3440831383369046e-06, "loss": 0.1781, "step": 699 }, { "epoch": 0.93, "learning_rate": 1.294977241378975e-06, "loss": 0.1766, "step": 700 }, { "epoch": 0.93, "eval_loss": 0.1736355423927307, "eval_runtime": 12.0759, "eval_samples_per_second": 165.62, "eval_steps_per_second": 0.662, "step": 700 }, { "epoch": 0.93, "learning_rate": 1.2467733871402655e-06, "loss": 0.1771, "step": 701 }, { "epoch": 0.93, "learning_rate": 1.199472468381585e-06, "loss": 0.1788, "step": 702 }, { "epoch": 0.93, "learning_rate": 1.153075361140915e-06, "loss": 0.1777, "step": 703 }, { "epoch": 0.93, "learning_rate": 1.1075829247171598e-06, "loss": 0.1747, "step": 704 }, { "epoch": 0.94, "learning_rate": 1.0629960016542373e-06, "loss": 0.1772, "step": 705 }, { "epoch": 0.94, "learning_rate": 1.0193154177254971e-06, "loss": 0.1758, "step": 706 }, { "epoch": 0.94, "learning_rate": 9.765419819183997e-07, "loss": 0.1785, "step": 707 }, { "epoch": 0.94, "learning_rate": 9.346764864195335e-07, "loss": 0.1782, "step": 708 }, { "epoch": 0.94, "learning_rate": 8.937197065999714e-07, "loss": 0.1784, "step": 709 }, { "epoch": 0.94, "learning_rate": 8.536724010008878e-07, "loss": 0.178, "step": 710 }, { "epoch": 0.94, "learning_rate": 8.145353113195142e-07, "loss": 0.1757, "step": 711 }, { "epoch": 0.95, "learning_rate": 7.763091623953889e-07, "loss": 0.1776, "step": 712 }, { "epoch": 0.95, "learning_rate": 7.389946621969679e-07, "loss": 0.176, "step": 713 }, { "epoch": 0.95, "learning_rate": 7.0259250180848e-07, "loss": 0.1766, "step": 714 }, { "epoch": 0.95, "learning_rate": 6.67103355417148e-07, "loss": 0.1761, "step": 715 }, { "epoch": 0.95, "learning_rate": 6.325278803006818e-07, "loss": 0.1734, "step": 716 }, { "epoch": 0.95, "learning_rate": 5.988667168151219e-07, "loss": 0.1789, "step": 717 }, { "epoch": 0.95, "learning_rate": 5.661204883829763e-07, "loss": 0.1748, "step": 718 }, { "epoch": 0.95, "learning_rate": 5.342898014816855e-07, "loss": 0.1772, "step": 719 }, { "epoch": 0.96, "learning_rate": 5.033752456323482e-07, "loss": 0.1772, "step": 720 }, { "epoch": 0.96, "learning_rate": 4.73377393388863e-07, "loss": 0.1761, "step": 721 }, { "epoch": 0.96, "learning_rate": 4.4429680032726517e-07, "loss": 0.1796, "step": 722 }, { "epoch": 0.96, "learning_rate": 4.1613400503550114e-07, "loss": 0.1789, "step": 723 }, { "epoch": 0.96, "learning_rate": 3.888895291033867e-07, "loss": 0.1789, "step": 724 }, { "epoch": 0.96, "learning_rate": 3.6256387711299247e-07, "loss": 0.179, "step": 725 }, { "epoch": 0.96, "learning_rate": 3.371575366292845e-07, "loss": 0.1768, "step": 726 }, { "epoch": 0.97, "learning_rate": 3.126709781910986e-07, "loss": 0.1772, "step": 727 }, { "epoch": 0.97, "learning_rate": 2.8910465530240793e-07, "loss": 0.1805, "step": 728 }, { "epoch": 0.97, "learning_rate": 2.6645900442394677e-07, "loss": 0.1769, "step": 729 }, { "epoch": 0.97, "learning_rate": 2.4473444496512233e-07, "loss": 0.1796, "step": 730 }, { "epoch": 0.97, "learning_rate": 2.2393137927623763e-07, "loss": 0.1773, "step": 731 }, { "epoch": 0.97, "learning_rate": 2.0405019264104762e-07, "loss": 0.1774, "step": 732 }, { "epoch": 0.97, "learning_rate": 1.850912532696092e-07, "loss": 0.1786, "step": 733 }, { "epoch": 0.97, "learning_rate": 1.6705491229149218e-07, "loss": 0.1756, "step": 734 }, { "epoch": 0.98, "learning_rate": 1.4994150374924575e-07, "loss": 0.1771, "step": 735 }, { "epoch": 0.98, "learning_rate": 1.337513445922256e-07, "loss": 0.1763, "step": 736 }, { "epoch": 0.98, "learning_rate": 1.184847346707152e-07, "loss": 0.1767, "step": 737 }, { "epoch": 0.98, "learning_rate": 1.041419567303914e-07, "loss": 0.173, "step": 738 }, { "epoch": 0.98, "learning_rate": 9.072327640706756e-08, "loss": 0.1774, "step": 739 }, { "epoch": 0.98, "learning_rate": 7.822894222178633e-08, "loss": 0.1801, "step": 740 }, { "epoch": 0.98, "learning_rate": 6.665918557620665e-08, "loss": 0.1772, "step": 741 }, { "epoch": 0.99, "learning_rate": 5.6014220748318303e-08, "loss": 0.1734, "step": 742 }, { "epoch": 0.99, "learning_rate": 4.6294244888500645e-08, "loss": 0.1778, "step": 743 }, { "epoch": 0.99, "learning_rate": 3.749943801582556e-08, "loss": 0.1755, "step": 744 }, { "epoch": 0.99, "learning_rate": 2.9629963014760065e-08, "loss": 0.1782, "step": 745 }, { "epoch": 0.99, "learning_rate": 2.2685965632135432e-08, "loss": 0.1765, "step": 746 }, { "epoch": 0.99, "learning_rate": 1.666757447443823e-08, "loss": 0.1809, "step": 747 }, { "epoch": 0.99, "learning_rate": 1.1574901005456662e-08, "loss": 0.1772, "step": 748 }, { "epoch": 0.99, "learning_rate": 7.4080395441877834e-09, "loss": 0.175, "step": 749 }, { "epoch": 1.0, "learning_rate": 4.167067263105562e-09, "loss": 0.176, "step": 750 }, { "epoch": 1.0, "learning_rate": 1.8520441867231341e-09, "loss": 0.1788, "step": 751 }, { "epoch": 1.0, "learning_rate": 4.630131904936885e-10, "loss": 0.1729, "step": 752 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.1755, "step": 753 }, { "epoch": 1.0, "step": 753, "total_flos": 0.0, "train_loss": 0.10856808526107514, "train_runtime": 9051.9774, "train_samples_per_second": 85.227, "train_steps_per_second": 0.083 } ], "logging_steps": 1.0, "max_steps": 753, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }