{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9971671388101981, "global_step": 330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0005, "loss": 2.3487, "step": 2 }, { "epoch": 0.02, "learning_rate": 0.001, "loss": 2.3946, "step": 4 }, { "epoch": 0.04, "learning_rate": 0.0009999071352056674, "loss": 2.4059, "step": 6 }, { "epoch": 0.05, "learning_rate": 0.00099962857531815, "loss": 2.4061, "step": 8 }, { "epoch": 0.06, "learning_rate": 0.000999164423811074, "loss": 2.3801, "step": 10 }, { "epoch": 0.07, "learning_rate": 0.0009985148530977765, "loss": 2.4389, "step": 12 }, { "epoch": 0.08, "learning_rate": 0.0009976801044672607, "loss": 2.4007, "step": 14 }, { "epoch": 0.1, "learning_rate": 0.0009966604879945657, "loss": 2.4691, "step": 16 }, { "epoch": 0.11, "learning_rate": 0.0009954563824255878, "loss": 2.4015, "step": 18 }, { "epoch": 0.12, "learning_rate": 0.0009940682350363913, "loss": 2.4415, "step": 20 }, { "epoch": 0.13, "learning_rate": 0.000992496561467063, "loss": 2.477, "step": 22 }, { "epoch": 0.15, "learning_rate": 0.000990741945530174, "loss": 2.4429, "step": 24 }, { "epoch": 0.16, "learning_rate": 0.0009888050389939172, "loss": 2.4429, "step": 26 }, { "epoch": 0.17, "learning_rate": 0.0009866865613400006, "loss": 2.4597, "step": 28 }, { "epoch": 0.18, "learning_rate": 0.0009843872994963912, "loss": 2.4501, "step": 30 }, { "epoch": 0.19, "learning_rate": 0.0009819081075450014, "loss": 2.4307, "step": 32 }, { "epoch": 0.21, "learning_rate": 0.0009792499064044343, "loss": 2.4182, "step": 34 }, { "epoch": 0.22, "learning_rate": 0.0009764136834878986, "loss": 2.4354, "step": 36 }, { "epoch": 0.23, "learning_rate": 0.0009734004923364257, "loss": 2.4323, "step": 38 }, { "epoch": 0.24, "learning_rate": 0.0009702114522275216, "loss": 2.4592, "step": 40 }, { "epoch": 0.25, "learning_rate": 0.000966847747759402, "loss": 2.4242, "step": 42 }, { "epoch": 0.27, "learning_rate": 0.0009633106284109611, "loss": 2.4355, "step": 44 }, { "epoch": 0.28, "learning_rate": 0.0009596014080776422, "loss": 2.4379, "step": 46 }, { "epoch": 0.29, "learning_rate": 0.0009557214645833791, "loss": 2.3786, "step": 48 }, { "epoch": 0.3, "learning_rate": 0.0009516722391687902, "loss": 2.4303, "step": 50 }, { "epoch": 0.31, "learning_rate": 0.0009474552359558167, "loss": 2.3946, "step": 52 }, { "epoch": 0.33, "learning_rate": 0.000943072021389003, "loss": 2.4104, "step": 54 }, { "epoch": 0.34, "learning_rate": 0.0009385242236536259, "loss": 2.4266, "step": 56 }, { "epoch": 0.35, "learning_rate": 0.0009338135320708912, "loss": 2.5106, "step": 58 }, { "epoch": 0.36, "learning_rate": 0.0009289416964704185, "loss": 2.4225, "step": 60 }, { "epoch": 0.37, "learning_rate": 0.0009239105265402525, "loss": 2.4745, "step": 62 }, { "epoch": 0.39, "learning_rate": 0.0009187218911546363, "loss": 2.4572, "step": 64 }, { "epoch": 0.4, "learning_rate": 0.0009133777176798013, "loss": 2.4366, "step": 66 }, { "epoch": 0.41, "learning_rate": 0.0009078799912580304, "loss": 2.4021, "step": 68 }, { "epoch": 0.42, "learning_rate": 0.0009022307540702576, "loss": 2.4054, "step": 70 }, { "epoch": 0.44, "learning_rate": 0.0008964321045774807, "loss": 2.4628, "step": 72 }, { "epoch": 0.45, "learning_rate": 0.0008904861967412702, "loss": 2.5038, "step": 74 }, { "epoch": 0.46, "learning_rate": 0.0008843952392236594, "loss": 2.3801, "step": 76 }, { "epoch": 0.47, "learning_rate": 0.0008781614945667169, "loss": 2.4056, "step": 78 }, { "epoch": 0.48, "learning_rate": 0.0008717872783521047, "loss": 2.3334, "step": 80 }, { "epoch": 0.5, "learning_rate": 0.0008652749583409339, "loss": 2.3913, "step": 82 }, { "epoch": 0.51, "learning_rate": 0.0008586269535942384, "loss": 2.3784, "step": 84 }, { "epoch": 0.52, "learning_rate": 0.0008518457335743926, "loss": 2.4436, "step": 86 }, { "epoch": 0.53, "learning_rate": 0.0008449338172278058, "loss": 2.3735, "step": 88 }, { "epoch": 0.54, "learning_rate": 0.0008378937720492384, "loss": 2.374, "step": 90 }, { "epoch": 0.56, "learning_rate": 0.0008307282131280805, "loss": 2.4064, "step": 92 }, { "epoch": 0.57, "learning_rate": 0.000823439802176954, "loss": 2.4124, "step": 94 }, { "epoch": 0.58, "learning_rate": 0.0008160312465429952, "loss": 2.4181, "step": 96 }, { "epoch": 0.59, "learning_rate": 0.0008085052982021848, "loss": 2.4253, "step": 98 }, { "epoch": 0.6, "learning_rate": 0.0008008647527371022, "loss": 2.4678, "step": 100 }, { "epoch": 0.62, "learning_rate": 0.0007931124482984802, "loss": 2.4738, "step": 102 }, { "epoch": 0.63, "learning_rate": 0.0007852512645509479, "loss": 2.3738, "step": 104 }, { "epoch": 0.64, "learning_rate": 0.0007772841216033533, "loss": 2.4081, "step": 106 }, { "epoch": 0.65, "learning_rate": 0.0007692139789240611, "loss": 2.3738, "step": 108 }, { "epoch": 0.66, "learning_rate": 0.0007610438342416319, "loss": 2.3701, "step": 110 }, { "epoch": 0.68, "learning_rate": 0.0007527767224312882, "loss": 2.4355, "step": 112 }, { "epoch": 0.69, "learning_rate": 0.000744415714387582, "loss": 2.4036, "step": 114 }, { "epoch": 0.7, "learning_rate": 0.0007359639158836828, "loss": 2.3746, "step": 116 }, { "epoch": 0.71, "learning_rate": 0.0007274244664177097, "loss": 2.4855, "step": 118 }, { "epoch": 0.73, "learning_rate": 0.0007188005380465365, "loss": 2.379, "step": 120 }, { "epoch": 0.74, "learning_rate": 0.000710095334207501, "loss": 2.4178, "step": 122 }, { "epoch": 0.75, "learning_rate": 0.0007013120885284599, "loss": 2.4561, "step": 124 }, { "epoch": 0.76, "learning_rate": 0.0006924540636266272, "loss": 2.4024, "step": 126 }, { "epoch": 0.77, "learning_rate": 0.000683524549896646, "loss": 2.4172, "step": 128 }, { "epoch": 0.79, "learning_rate": 0.0006745268642883404, "loss": 2.3858, "step": 130 }, { "epoch": 0.8, "learning_rate": 0.0006654643490746042, "loss": 2.3547, "step": 132 }, { "epoch": 0.81, "learning_rate": 0.0006563403706098833, "loss": 2.4372, "step": 134 }, { "epoch": 0.82, "learning_rate": 0.0006471583180797121, "loss": 2.3785, "step": 136 }, { "epoch": 0.83, "learning_rate": 0.0006379216022417695, "loss": 2.3815, "step": 138 }, { "epoch": 0.85, "learning_rate": 0.0006286336541589224, "loss": 2.4209, "step": 140 }, { "epoch": 0.86, "learning_rate": 0.0006192979239247243, "loss": 2.3962, "step": 142 }, { "epoch": 0.87, "learning_rate": 0.0006099178793818478, "loss": 2.3626, "step": 144 }, { "epoch": 0.88, "learning_rate": 0.0006004970048339225, "loss": 2.3991, "step": 146 }, { "epoch": 0.89, "learning_rate": 0.0005910387997512573, "loss": 2.4396, "step": 148 }, { "epoch": 0.91, "learning_rate": 0.0005815467774709313, "loss": 2.3816, "step": 150 }, { "epoch": 0.92, "learning_rate": 0.0005720244638917323, "loss": 2.3866, "step": 152 }, { "epoch": 0.93, "learning_rate": 0.0005624753961644281, "loss": 2.4035, "step": 154 }, { "epoch": 0.94, "learning_rate": 0.0005529031213778615, "loss": 2.4063, "step": 156 }, { "epoch": 0.95, "learning_rate": 0.0005433111952413496, "loss": 2.3944, "step": 158 }, { "epoch": 0.97, "learning_rate": 0.0005337031807638841, "loss": 2.4192, "step": 160 }, { "epoch": 0.98, "learning_rate": 0.0005240826469306187, "loss": 2.3603, "step": 162 }, { "epoch": 0.99, "learning_rate": 0.0005144531673771364, "loss": 2.4041, "step": 164 }, { "epoch": 1.01, "learning_rate": 0.0005048183190619903, "loss": 2.8813, "step": 166 }, { "epoch": 1.02, "learning_rate": 0.0004951816809380097, "loss": 2.2786, "step": 168 }, { "epoch": 1.03, "learning_rate": 0.0004855468326228638, "loss": 2.2886, "step": 170 }, { "epoch": 1.04, "learning_rate": 0.00047591735306938137, "loss": 2.1822, "step": 172 }, { "epoch": 1.05, "learning_rate": 0.00046629681923611606, "loss": 2.2589, "step": 174 }, { "epoch": 1.07, "learning_rate": 0.0004566888047586507, "loss": 2.2625, "step": 176 }, { "epoch": 1.08, "learning_rate": 0.00044709687862213866, "loss": 2.2715, "step": 178 }, { "epoch": 1.09, "learning_rate": 0.000437524603835572, "loss": 2.1988, "step": 180 }, { "epoch": 1.1, "learning_rate": 0.000427975536108268, "loss": 2.3257, "step": 182 }, { "epoch": 1.11, "learning_rate": 0.00041845322252906863, "loss": 2.3026, "step": 184 }, { "epoch": 1.13, "learning_rate": 0.00040896120024874283, "loss": 2.2306, "step": 186 }, { "epoch": 1.14, "learning_rate": 0.0003995029951660776, "loss": 2.2269, "step": 188 }, { "epoch": 1.15, "learning_rate": 0.00039008212061815206, "loss": 2.3079, "step": 190 }, { "epoch": 1.16, "learning_rate": 0.00038070207607527587, "loss": 2.218, "step": 192 }, { "epoch": 1.18, "learning_rate": 0.00037136634584107787, "loss": 2.2667, "step": 194 }, { "epoch": 1.19, "learning_rate": 0.0003620783977582305, "loss": 2.2754, "step": 196 }, { "epoch": 1.2, "learning_rate": 0.0003528416819202881, "loss": 2.2835, "step": 198 }, { "epoch": 1.21, "learning_rate": 0.00034365962939011697, "loss": 2.2843, "step": 200 }, { "epoch": 1.22, "learning_rate": 0.00033453565092539584, "loss": 2.2387, "step": 202 }, { "epoch": 1.24, "learning_rate": 0.0003254731357116597, "loss": 2.254, "step": 204 }, { "epoch": 1.25, "learning_rate": 0.000316475450103354, "loss": 2.2686, "step": 206 }, { "epoch": 1.26, "learning_rate": 0.00030754593637337277, "loss": 2.2422, "step": 208 }, { "epoch": 1.27, "learning_rate": 0.0002986879114715403, "loss": 2.3003, "step": 210 }, { "epoch": 1.28, "learning_rate": 0.0002899046657924992, "loss": 2.2619, "step": 212 }, { "epoch": 1.3, "learning_rate": 0.00028119946195346375, "loss": 2.3022, "step": 214 }, { "epoch": 1.31, "learning_rate": 0.00027257553358229033, "loss": 2.2523, "step": 216 }, { "epoch": 1.32, "learning_rate": 0.0002640360841163174, "loss": 2.3098, "step": 218 }, { "epoch": 1.33, "learning_rate": 0.0002555842856124182, "loss": 2.235, "step": 220 }, { "epoch": 1.34, "learning_rate": 0.00024722327756871186, "loss": 2.2448, "step": 222 }, { "epoch": 1.36, "learning_rate": 0.0002389561657583681, "loss": 2.2411, "step": 224 }, { "epoch": 1.37, "learning_rate": 0.00023078602107593898, "loss": 2.2485, "step": 226 }, { "epoch": 1.38, "learning_rate": 0.0002227158783966467, "loss": 2.2261, "step": 228 }, { "epoch": 1.39, "learning_rate": 0.00021474873544905204, "loss": 2.2427, "step": 230 }, { "epoch": 1.4, "learning_rate": 0.00020688755170151997, "loss": 2.2961, "step": 232 }, { "epoch": 1.42, "learning_rate": 0.00019913524726289784, "loss": 2.2272, "step": 234 }, { "epoch": 1.43, "learning_rate": 0.00019149470179781532, "loss": 2.2368, "step": 236 }, { "epoch": 1.44, "learning_rate": 0.00018396875345700497, "loss": 2.2846, "step": 238 }, { "epoch": 1.45, "learning_rate": 0.000176560197823046, "loss": 2.1709, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.0001692717868719195, "loss": 2.2659, "step": 242 }, { "epoch": 1.48, "learning_rate": 0.0001621062279507617, "loss": 2.2655, "step": 244 }, { "epoch": 1.49, "learning_rate": 0.0001550661827721941, "loss": 2.2284, "step": 246 }, { "epoch": 1.5, "learning_rate": 0.00014815426642560752, "loss": 2.2444, "step": 248 }, { "epoch": 1.51, "learning_rate": 0.0001413730464057616, "loss": 2.3102, "step": 250 }, { "epoch": 1.53, "learning_rate": 0.00013472504165906613, "loss": 2.2287, "step": 252 }, { "epoch": 1.54, "learning_rate": 0.00012821272164789544, "loss": 2.2713, "step": 254 }, { "epoch": 1.55, "learning_rate": 0.00012183850543328313, "loss": 2.2127, "step": 256 }, { "epoch": 1.56, "learning_rate": 0.00011560476077634069, "loss": 2.1682, "step": 258 }, { "epoch": 1.57, "learning_rate": 0.00010951380325872979, "loss": 2.2393, "step": 260 }, { "epoch": 1.59, "learning_rate": 0.00010356789542251938, "loss": 2.2259, "step": 262 }, { "epoch": 1.6, "learning_rate": 9.776924592974257e-05, "loss": 2.2157, "step": 264 }, { "epoch": 1.61, "learning_rate": 9.212000874196952e-05, "loss": 2.2393, "step": 266 }, { "epoch": 1.62, "learning_rate": 8.662228232019875e-05, "loss": 2.2613, "step": 268 }, { "epoch": 1.63, "learning_rate": 8.127810884536401e-05, "loss": 2.1981, "step": 270 }, { "epoch": 1.65, "learning_rate": 7.60894734597476e-05, "loss": 2.2457, "step": 272 }, { "epoch": 1.66, "learning_rate": 7.105830352958143e-05, "loss": 2.2571, "step": 274 }, { "epoch": 1.67, "learning_rate": 6.618646792910893e-05, "loss": 2.1771, "step": 276 }, { "epoch": 1.68, "learning_rate": 6.147577634637414e-05, "loss": 2.2243, "step": 278 }, { "epoch": 1.69, "learning_rate": 5.692797861099719e-05, "loss": 2.2427, "step": 280 }, { "epoch": 1.71, "learning_rate": 5.25447640441834e-05, "loss": 2.2266, "step": 282 }, { "epoch": 1.72, "learning_rate": 4.832776083120982e-05, "loss": 2.3057, "step": 284 }, { "epoch": 1.73, "learning_rate": 4.4278535416620916e-05, "loss": 2.2225, "step": 286 }, { "epoch": 1.74, "learning_rate": 4.039859192235778e-05, "loss": 2.2665, "step": 288 }, { "epoch": 1.76, "learning_rate": 3.668937158903901e-05, "loss": 2.2807, "step": 290 }, { "epoch": 1.77, "learning_rate": 3.315225224059809e-05, "loss": 2.2165, "step": 292 }, { "epoch": 1.78, "learning_rate": 2.9788547772478415e-05, "loss": 2.2651, "step": 294 }, { "epoch": 1.79, "learning_rate": 2.6599507663574384e-05, "loss": 2.2437, "step": 296 }, { "epoch": 1.8, "learning_rate": 2.3586316512101414e-05, "loss": 2.3066, "step": 298 }, { "epoch": 1.82, "learning_rate": 2.0750093595565732e-05, "loss": 2.1727, "step": 300 }, { "epoch": 1.83, "learning_rate": 1.8091892454998595e-05, "loss": 2.2409, "step": 302 }, { "epoch": 1.84, "learning_rate": 1.561270050360897e-05, "loss": 2.2908, "step": 304 }, { "epoch": 1.85, "learning_rate": 1.33134386599994e-05, "loss": 2.2925, "step": 306 }, { "epoch": 1.86, "learning_rate": 1.1194961006082971e-05, "loss": 2.2449, "step": 308 }, { "epoch": 1.88, "learning_rate": 9.258054469825972e-06, "loss": 2.235, "step": 310 }, { "epoch": 1.89, "learning_rate": 7.503438532937168e-06, "loss": 2.2216, "step": 312 }, { "epoch": 1.9, "learning_rate": 5.931764963608866e-06, "loss": 2.2884, "step": 314 }, { "epoch": 1.91, "learning_rate": 4.5436175744121845e-06, "loss": 2.2124, "step": 316 }, { "epoch": 1.92, "learning_rate": 3.3395120054343087e-06, "loss": 2.2418, "step": 318 }, { "epoch": 1.94, "learning_rate": 2.319895532739369e-06, "loss": 2.2855, "step": 320 }, { "epoch": 1.95, "learning_rate": 1.4851469022234e-06, "loss": 2.2974, "step": 322 }, { "epoch": 1.96, "learning_rate": 8.35576188926046e-07, "loss": 2.2552, "step": 324 }, { "epoch": 1.97, "learning_rate": 3.71424681850141e-07, "loss": 2.2209, "step": 326 }, { "epoch": 1.99, "learning_rate": 9.286479433257e-08, "loss": 2.1935, "step": 328 }, { "epoch": 2.0, "learning_rate": 0.0, "loss": 2.2702, "step": 330 }, { "epoch": 2.0, "step": 330, "total_flos": 4.634629374287544e+17, "train_loss": 2.336302039117524, "train_runtime": 79791.9217, "train_samples_per_second": 0.265, "train_steps_per_second": 0.004 } ], "max_steps": 330, "num_train_epochs": 2, "total_flos": 4.634629374287544e+17, "trial_name": null, "trial_params": null }