{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9667673716012084, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001714285714285714, "loss": 2.3426, "step": 2 }, { "epoch": 0.02, "learning_rate": 0.0003428571428571428, "loss": 2.3022, "step": 4 }, { "epoch": 0.04, "learning_rate": 0.0005142857142857142, "loss": 2.2919, "step": 6 }, { "epoch": 0.05, "learning_rate": 0.0005999858099954059, "loss": 2.254, "step": 8 }, { "epoch": 0.06, "learning_rate": 0.0005998722980127753, "loss": 2.3186, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.0005996453169994465, "loss": 2.3878, "step": 12 }, { "epoch": 0.08, "learning_rate": 0.0005993049528430316, "loss": 2.3118, "step": 14 }, { "epoch": 0.1, "learning_rate": 0.0005988513343343228, "loss": 2.3463, "step": 16 }, { "epoch": 0.11, "learning_rate": 0.00059828463311856, "loss": 2.3265, "step": 18 }, { "epoch": 0.12, "learning_rate": 0.0005976050636304812, "loss": 2.3153, "step": 20 }, { "epoch": 0.13, "learning_rate": 0.0005968128830131823, "loss": 2.2885, "step": 22 }, { "epoch": 0.15, "learning_rate": 0.0005959083910208167, "loss": 2.3109, "step": 24 }, { "epoch": 0.16, "learning_rate": 0.0005948919299051705, "loss": 2.3845, "step": 26 }, { "epoch": 0.17, "learning_rate": 0.000593763884286158, "loss": 2.193, "step": 28 }, { "epoch": 0.18, "learning_rate": 0.0005925246810062845, "loss": 2.3119, "step": 30 }, { "epoch": 0.19, "learning_rate": 0.0005911747889691333, "loss": 2.2861, "step": 32 }, { "epoch": 0.21, "learning_rate": 0.000589714718961936, "loss": 2.3374, "step": 34 }, { "epoch": 0.22, "learning_rate": 0.0005881450234622957, "loss": 2.2921, "step": 36 }, { "epoch": 0.23, "learning_rate": 0.0005864662964291346, "loss": 2.3176, "step": 38 }, { "epoch": 0.24, "learning_rate": 0.0005846791730779443, "loss": 2.3021, "step": 40 }, { "epoch": 0.25, "learning_rate": 0.0005827843296404265, "loss": 2.401, "step": 42 }, { "epoch": 0.27, "learning_rate": 0.0005807824831086132, "loss": 2.3131, "step": 44 }, { "epoch": 0.28, "learning_rate": 0.000578674390963562, "loss": 2.3135, "step": 46 }, { "epoch": 0.29, "learning_rate": 0.0005764608508887334, "loss": 2.3548, "step": 48 }, { "epoch": 0.3, "learning_rate": 0.0005741427004681535, "loss": 2.2768, "step": 50 }, { "epoch": 0.31, "learning_rate": 0.0005717208168694799, "loss": 2.3406, "step": 52 }, { "epoch": 0.33, "learning_rate": 0.0005691961165120892, "loss": 2.2956, "step": 54 }, { "epoch": 0.34, "learning_rate": 0.0005665695547203121, "loss": 2.3519, "step": 56 }, { "epoch": 0.35, "learning_rate": 0.0005638421253619467, "loss": 2.2872, "step": 58 }, { "epoch": 0.36, "learning_rate": 0.0005610148604721882, "loss": 2.3266, "step": 60 }, { "epoch": 0.37, "learning_rate": 0.0005580888298631155, "loss": 2.28, "step": 62 }, { "epoch": 0.39, "learning_rate": 0.0005550651407188842, "loss": 2.3256, "step": 64 }, { "epoch": 0.4, "learning_rate": 0.0005519449371767772, "loss": 2.261, "step": 66 }, { "epoch": 0.41, "learning_rate": 0.000548729399894274, "loss": 2.3369, "step": 68 }, { "epoch": 0.42, "learning_rate": 0.0005454197456022998, "loss": 2.2836, "step": 70 }, { "epoch": 0.44, "learning_rate": 0.0005420172266448255, "loss": 2.3457, "step": 72 }, { "epoch": 0.45, "learning_rate": 0.0005385231305049919, "loss": 2.3171, "step": 74 }, { "epoch": 0.46, "learning_rate": 0.0005349387793179368, "loss": 2.3302, "step": 76 }, { "epoch": 0.47, "learning_rate": 0.0005312655293705118, "loss": 2.2234, "step": 78 }, { "epoch": 0.48, "learning_rate": 0.0005275047705880744, "loss": 2.3212, "step": 80 }, { "epoch": 0.5, "learning_rate": 0.000523657926008553, "loss": 2.3651, "step": 82 }, { "epoch": 0.51, "learning_rate": 0.0005197264512439815, "loss": 2.3028, "step": 84 }, { "epoch": 0.52, "learning_rate": 0.0005157118339297094, "loss": 2.3303, "step": 86 }, { "epoch": 0.53, "learning_rate": 0.0005116155931614926, "loss": 2.3035, "step": 88 }, { "epoch": 0.54, "learning_rate": 0.0005074392789206815, "loss": 2.2973, "step": 90 }, { "epoch": 0.56, "learning_rate": 0.0005031844714877223, "loss": 2.333, "step": 92 }, { "epoch": 0.57, "learning_rate": 0.0004988527808441915, "loss": 2.318, "step": 94 }, { "epoch": 0.58, "learning_rate": 0.0004944458460635947, "loss": 2.3368, "step": 96 }, { "epoch": 0.59, "learning_rate": 0.000489965334691154, "loss": 2.2936, "step": 98 }, { "epoch": 0.6, "learning_rate": 0.00048541294211282504, "loss": 2.3529, "step": 100 }, { "epoch": 0.62, "learning_rate": 0.0004807903909137769, "loss": 2.3239, "step": 102 }, { "epoch": 0.63, "learning_rate": 0.0004760994302265817, "loss": 2.3217, "step": 104 }, { "epoch": 0.64, "learning_rate": 0.00047134183506935797, "loss": 2.3422, "step": 106 }, { "epoch": 0.65, "learning_rate": 0.0004665194056741195, "loss": 2.2571, "step": 108 }, { "epoch": 0.66, "learning_rate": 0.00046163396680558354, "loss": 2.3643, "step": 110 }, { "epoch": 0.68, "learning_rate": 0.000456687367070696, "loss": 2.4112, "step": 112 }, { "epoch": 0.69, "learning_rate": 0.00045168147821913443, "loss": 2.4067, "step": 114 }, { "epoch": 0.7, "learning_rate": 0.000446618194435055, "loss": 2.2525, "step": 116 }, { "epoch": 0.71, "learning_rate": 0.0004414994316203498, "loss": 2.3874, "step": 118 }, { "epoch": 0.73, "learning_rate": 0.00043632712666968643, "loss": 2.3131, "step": 120 }, { "epoch": 0.74, "learning_rate": 0.000431103236737604, "loss": 2.3785, "step": 122 }, { "epoch": 0.75, "learning_rate": 0.0004258297384979432, "loss": 2.2967, "step": 124 }, { "epoch": 0.76, "learning_rate": 0.0004205086273958908, "loss": 2.2922, "step": 126 }, { "epoch": 0.77, "learning_rate": 0.00041514191689292, "loss": 2.2529, "step": 128 }, { "epoch": 0.79, "learning_rate": 0.00040973163770491517, "loss": 2.3434, "step": 130 }, { "epoch": 0.8, "learning_rate": 0.00040427983703376615, "loss": 2.2736, "step": 132 }, { "epoch": 0.81, "learning_rate": 0.000398788577792726, "loss": 2.3327, "step": 134 }, { "epoch": 0.82, "learning_rate": 0.0003932599378258218, "loss": 2.3188, "step": 136 }, { "epoch": 0.83, "learning_rate": 0.0003876960091216179, "loss": 2.3499, "step": 138 }, { "epoch": 0.85, "learning_rate": 0.0003820988970216249, "loss": 2.3068, "step": 140 }, { "epoch": 0.86, "learning_rate": 0.0003764707194236576, "loss": 2.3397, "step": 142 }, { "epoch": 0.87, "learning_rate": 0.00037081360598044123, "loss": 2.3231, "step": 144 }, { "epoch": 0.88, "learning_rate": 0.0003651296972937693, "loss": 2.3411, "step": 146 }, { "epoch": 0.89, "learning_rate": 0.0003594211441045188, "loss": 2.2935, "step": 148 }, { "epoch": 0.91, "learning_rate": 0.0003536901064788292, "loss": 2.3536, "step": 150 }, { "epoch": 0.92, "learning_rate": 0.0003479387529907521, "loss": 2.3074, "step": 152 }, { "epoch": 0.93, "learning_rate": 0.00034216925990168234, "loss": 2.3337, "step": 154 }, { "epoch": 0.94, "learning_rate": 0.00033638381033687924, "loss": 2.2681, "step": 156 }, { "epoch": 0.95, "learning_rate": 0.0003305845934593921, "loss": 2.412, "step": 158 }, { "epoch": 0.97, "learning_rate": 0.0003247738036416997, "loss": 2.2969, "step": 160 }, { "epoch": 0.98, "learning_rate": 0.0003189536396353791, "loss": 2.2902, "step": 162 }, { "epoch": 0.99, "learning_rate": 0.0003131263037391177, "loss": 2.3273, "step": 164 }, { "epoch": 1.01, "learning_rate": 0.0003072940009653828, "loss": 2.8654, "step": 166 }, { "epoch": 1.02, "learning_rate": 0.0003014589382060642, "loss": 2.194, "step": 168 }, { "epoch": 1.03, "learning_rate": 0.0002956233233974061, "loss": 2.2423, "step": 170 }, { "epoch": 1.04, "learning_rate": 0.0002897893646845429, "loss": 2.3222, "step": 172 }, { "epoch": 1.05, "learning_rate": 0.00028395926958595737, "loss": 2.1401, "step": 174 }, { "epoch": 1.07, "learning_rate": 0.0002781352441581744, "loss": 2.2044, "step": 176 }, { "epoch": 1.08, "learning_rate": 0.0002723194921610094, "loss": 2.1597, "step": 178 }, { "epoch": 1.09, "learning_rate": 0.00026651421422368434, "loss": 2.2658, "step": 180 }, { "epoch": 1.1, "learning_rate": 0.0002607216070121293, "loss": 2.1676, "step": 182 }, { "epoch": 1.11, "learning_rate": 0.00025494386239778414, "loss": 2.2419, "step": 184 }, { "epoch": 1.13, "learning_rate": 0.0002491831666282127, "loss": 2.1281, "step": 186 }, { "epoch": 1.14, "learning_rate": 0.0002434416994998462, "loss": 2.2088, "step": 188 }, { "epoch": 1.15, "learning_rate": 0.00023772163353316837, "loss": 2.2267, "step": 190 }, { "epoch": 1.16, "learning_rate": 0.00023202513315065184, "loss": 2.2146, "step": 192 }, { "epoch": 1.18, "learning_rate": 0.00022635435385776024, "loss": 2.2714, "step": 194 }, { "epoch": 1.19, "learning_rate": 0.0002207114414273241, "loss": 2.2393, "step": 196 }, { "epoch": 1.2, "learning_rate": 0.00021509853108759883, "loss": 2.1523, "step": 198 }, { "epoch": 1.21, "learning_rate": 0.00020951774671431413, "loss": 2.1741, "step": 200 }, { "epoch": 1.22, "learning_rate": 0.00020397120002701684, "loss": 2.1682, "step": 202 }, { "epoch": 1.24, "learning_rate": 0.000198460989790016, "loss": 2.3001, "step": 204 }, { "epoch": 1.25, "learning_rate": 0.00019298920101822875, "loss": 2.29, "step": 206 }, { "epoch": 1.26, "learning_rate": 0.0001875579041882286, "loss": 2.2567, "step": 208 }, { "epoch": 1.27, "learning_rate": 0.0001821691544547958, "loss": 2.2445, "step": 210 }, { "epoch": 1.28, "learning_rate": 0.000176824990873265, "loss": 2.1725, "step": 212 }, { "epoch": 1.3, "learning_rate": 0.00017152743562796427, "loss": 2.2335, "step": 214 }, { "epoch": 1.31, "learning_rate": 0.00016627849326703853, "loss": 2.2806, "step": 216 }, { "epoch": 1.32, "learning_rate": 0.00016108014994394682, "loss": 2.241, "step": 218 }, { "epoch": 1.33, "learning_rate": 0.00015593437266591854, "loss": 2.235, "step": 220 }, { "epoch": 1.34, "learning_rate": 0.00015084310854965552, "loss": 2.1917, "step": 222 }, { "epoch": 1.36, "learning_rate": 0.00014580828408456074, "loss": 2.2387, "step": 224 }, { "epoch": 1.37, "learning_rate": 0.00014083180440377106, "loss": 2.2408, "step": 226 }, { "epoch": 1.38, "learning_rate": 0.00013591555256327198, "loss": 2.2359, "step": 228 }, { "epoch": 1.39, "learning_rate": 0.0001310613888293659, "loss": 2.1735, "step": 230 }, { "epoch": 1.4, "learning_rate": 0.00012627114997476352, "loss": 2.2335, "step": 232 }, { "epoch": 1.42, "learning_rate": 0.00012154664858356591, "loss": 2.2986, "step": 234 }, { "epoch": 1.43, "learning_rate": 0.00011688967236539785, "loss": 2.2063, "step": 236 }, { "epoch": 1.44, "learning_rate": 0.00011230198347895463, "loss": 2.2019, "step": 238 }, { "epoch": 1.45, "learning_rate": 0.0001077853178652171, "loss": 2.2321, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.00010334138459058639, "loss": 2.2161, "step": 242 }, { "epoch": 1.48, "learning_rate": 9.897186520018855e-05, "loss": 2.2123, "step": 244 }, { "epoch": 1.49, "learning_rate": 9.46784130815926e-05, "loss": 2.2248, "step": 246 }, { "epoch": 1.5, "learning_rate": 9.046265283918349e-05, "loss": 2.2312, "step": 248 }, { "epoch": 1.51, "learning_rate": 8.632617967942566e-05, "loss": 2.2357, "step": 250 }, { "epoch": 1.53, "learning_rate": 8.227055880725182e-05, "loss": 2.1949, "step": 252 }, { "epoch": 1.54, "learning_rate": 7.82973248338023e-05, "loss": 2.1943, "step": 254 }, { "epoch": 1.55, "learning_rate": 7.440798119574219e-05, "loss": 2.1259, "step": 256 }, { "epoch": 1.56, "learning_rate": 7.060399958637441e-05, "loss": 2.1426, "step": 258 }, { "epoch": 1.57, "learning_rate": 6.688681939876344e-05, "loss": 2.2838, "step": 260 }, { "epoch": 1.59, "learning_rate": 6.325784718108195e-05, "loss": 2.1821, "step": 262 }, { "epoch": 1.6, "learning_rate": 5.971845610438546e-05, "loss": 2.2078, "step": 264 }, { "epoch": 1.61, "learning_rate": 5.626998544301631e-05, "loss": 2.2778, "step": 266 }, { "epoch": 1.62, "learning_rate": 5.2913740067834434e-05, "loss": 2.2872, "step": 268 }, { "epoch": 1.63, "learning_rate": 4.9650989952465326e-05, "loss": 2.1835, "step": 270 }, { "epoch": 1.65, "learning_rate": 4.6482969692753415e-05, "loss": 2.1698, "step": 272 }, { "epoch": 1.66, "learning_rate": 4.341087803960204e-05, "loss": 2.256, "step": 274 }, { "epoch": 1.67, "learning_rate": 4.0435877445376396e-05, "loss": 2.1821, "step": 276 }, { "epoch": 1.68, "learning_rate": 3.7559093624042036e-05, "loss": 2.182, "step": 278 }, { "epoch": 1.69, "learning_rate": 3.4781615125204575e-05, "loss": 2.1865, "step": 280 }, { "epoch": 1.71, "learning_rate": 3.210449292221213e-05, "loss": 2.256, "step": 282 }, { "epoch": 1.72, "learning_rate": 2.9528740014476193e-05, "loss": 2.2292, "step": 284 }, { "epoch": 1.73, "learning_rate": 2.7055331044161933e-05, "loss": 2.1894, "step": 286 }, { "epoch": 1.74, "learning_rate": 2.468520192739162e-05, "loss": 2.2869, "step": 288 }, { "epoch": 1.76, "learning_rate": 2.2419249500102832e-05, "loss": 2.2185, "step": 290 }, { "epoch": 1.77, "learning_rate": 2.0258331178693286e-05, "loss": 2.2471, "step": 292 }, { "epoch": 1.78, "learning_rate": 1.8203264635581994e-05, "loss": 2.1422, "step": 294 }, { "epoch": 1.79, "learning_rate": 1.625482748980961e-05, "loss": 2.1964, "step": 296 }, { "epoch": 1.8, "learning_rate": 1.441375701279388e-05, "loss": 2.2073, "step": 298 }, { "epoch": 1.82, "learning_rate": 1.2680749849352745e-05, "loss": 2.2215, "step": 300 }, { "epoch": 1.83, "learning_rate": 1.1056461754100309e-05, "loss": 2.2439, "step": 302 }, { "epoch": 1.84, "learning_rate": 9.541507343314714e-06, "loss": 2.1537, "step": 304 }, { "epoch": 1.85, "learning_rate": 8.136459862373056e-06, "loss": 2.1494, "step": 306 }, { "epoch": 1.86, "learning_rate": 6.841850968840212e-06, "loss": 2.1858, "step": 308 }, { "epoch": 1.88, "learning_rate": 5.6581705312944145e-06, "loss": 2.1937, "step": 310 }, { "epoch": 1.89, "learning_rate": 4.585866443965302e-06, "loss": 2.1877, "step": 312 }, { "epoch": 1.9, "learning_rate": 3.62534445725472e-06, "loss": 2.2081, "step": 314 }, { "epoch": 1.91, "learning_rate": 2.7769680242041624e-06, "loss": 2.2323, "step": 316 }, { "epoch": 1.92, "learning_rate": 2.0410581629676236e-06, "loss": 2.2301, "step": 318 }, { "epoch": 1.94, "learning_rate": 1.417893335340925e-06, "loss": 2.1788, "step": 320 }, { "epoch": 1.95, "learning_rate": 9.077093413944358e-07, "loss": 2.166, "step": 322 }, { "epoch": 1.96, "learning_rate": 5.106992302482127e-07, "loss": 2.1464, "step": 324 } ], "max_steps": 330, "num_train_epochs": 2, "total_flos": 1.536257738185638e+18, "trial_name": null, "trial_params": null }