|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.57446808510638, |
|
"eval_steps": 500, |
|
"global_step": 675, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.999990671457219e-06, |
|
"loss": 2.0607, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9997667899113055e-06, |
|
"loss": 2.0852, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.999067203154777e-06, |
|
"loss": 2.0685, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 2.0233097076416016, |
|
"eval_runtime": 4.0604, |
|
"eval_samples_per_second": 20.441, |
|
"eval_steps_per_second": 1.478, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.997901370250966e-06, |
|
"loss": 2.0386, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.99626950870707e-06, |
|
"loss": 2.0152, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.994171922976349e-06, |
|
"loss": 1.9972, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 1.9437636137008667, |
|
"eval_runtime": 3.9692, |
|
"eval_samples_per_second": 20.911, |
|
"eval_steps_per_second": 1.512, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.991609004401324e-06, |
|
"loss": 1.956, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.988581231140772e-06, |
|
"loss": 1.9288, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.985089168080509e-06, |
|
"loss": 1.9109, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.8719902038574219, |
|
"eval_runtime": 3.9854, |
|
"eval_samples_per_second": 20.826, |
|
"eval_steps_per_second": 1.505, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.981133466728004e-06, |
|
"loss": 1.8712, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.976714865090827e-06, |
|
"loss": 1.8583, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 1.809607744216919, |
|
"eval_runtime": 3.9774, |
|
"eval_samples_per_second": 20.868, |
|
"eval_steps_per_second": 1.509, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.971834187538963e-06, |
|
"loss": 1.843, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 4.966492344651006e-06, |
|
"loss": 1.8171, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.960690333044279e-06, |
|
"loss": 1.7903, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 1.7595775127410889, |
|
"eval_runtime": 3.9935, |
|
"eval_samples_per_second": 20.784, |
|
"eval_steps_per_second": 1.502, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.954429235188897e-06, |
|
"loss": 1.7816, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.947710219205808e-06, |
|
"loss": 1.765, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.940534538648862e-06, |
|
"loss": 1.7454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 1.7169337272644043, |
|
"eval_runtime": 3.9846, |
|
"eval_samples_per_second": 20.83, |
|
"eval_steps_per_second": 1.506, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 4.932903532270939e-06, |
|
"loss": 1.7242, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.924818623774178e-06, |
|
"loss": 1.7146, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 1.6789494752883911, |
|
"eval_runtime": 3.9897, |
|
"eval_samples_per_second": 20.803, |
|
"eval_steps_per_second": 1.504, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.916281321544362e-06, |
|
"loss": 1.7114, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 4.907293218369499e-06, |
|
"loss": 1.6893, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 4.897855991142658e-06, |
|
"loss": 1.6737, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 1.6394565105438232, |
|
"eval_runtime": 3.9863, |
|
"eval_samples_per_second": 20.821, |
|
"eval_steps_per_second": 1.505, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.8879714005491205e-06, |
|
"loss": 1.6691, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 1.6507, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.8668675889776095e-06, |
|
"loss": 1.6289, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 1.605614185333252, |
|
"eval_runtime": 4.0005, |
|
"eval_samples_per_second": 20.748, |
|
"eval_steps_per_second": 1.5, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.855652305297052e-06, |
|
"loss": 1.6312, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 4.843997532110051e-06, |
|
"loss": 1.5965, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.83190544382516e-06, |
|
"loss": 1.5934, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 1.5665417909622192, |
|
"eval_runtime": 3.9832, |
|
"eval_samples_per_second": 20.838, |
|
"eval_steps_per_second": 1.506, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 4.819378296439962e-06, |
|
"loss": 1.5634, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.80641842712018e-06, |
|
"loss": 1.565, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 1.5258013010025024, |
|
"eval_runtime": 3.9933, |
|
"eval_samples_per_second": 20.785, |
|
"eval_steps_per_second": 1.503, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 4.793028253763633e-06, |
|
"loss": 1.5491, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.7792102745491345e-06, |
|
"loss": 1.5353, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.764967067470409e-06, |
|
"loss": 1.519, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 1.4776073694229126, |
|
"eval_runtime": 3.9813, |
|
"eval_samples_per_second": 20.847, |
|
"eval_steps_per_second": 1.507, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.750301289855128e-06, |
|
"loss": 1.4823, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 4.735215677869129e-06, |
|
"loss": 1.4795, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 4.7197130460059385e-06, |
|
"loss": 1.4593, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"eval_loss": 1.4280617237091064, |
|
"eval_runtime": 3.9852, |
|
"eval_samples_per_second": 20.827, |
|
"eval_steps_per_second": 1.506, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 4.7037962865616795e-06, |
|
"loss": 1.4393, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 4.687468369095457e-06, |
|
"loss": 1.4156, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_loss": 1.3676358461380005, |
|
"eval_runtime": 3.9729, |
|
"eval_samples_per_second": 20.892, |
|
"eval_steps_per_second": 1.51, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 4.6707323398753346e-06, |
|
"loss": 1.3898, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 4.6535913213100005e-06, |
|
"loss": 1.3671, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 4.636048511366222e-06, |
|
"loss": 1.3512, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"eval_loss": 1.3222225904464722, |
|
"eval_runtime": 4.0174, |
|
"eval_samples_per_second": 20.66, |
|
"eval_steps_per_second": 1.494, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 4.618107182972209e-06, |
|
"loss": 1.3308, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 4.599770683406992e-06, |
|
"loss": 1.3194, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 4.58104243367592e-06, |
|
"loss": 1.3146, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"eval_loss": 1.2824859619140625, |
|
"eval_runtime": 3.9833, |
|
"eval_samples_per_second": 20.837, |
|
"eval_steps_per_second": 1.506, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 4.561925927872421e-06, |
|
"loss": 1.3001, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 4.542424732526105e-06, |
|
"loss": 1.2798, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"eval_loss": 1.2492066621780396, |
|
"eval_runtime": 3.9849, |
|
"eval_samples_per_second": 20.829, |
|
"eval_steps_per_second": 1.506, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 1.259, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 4.5022828974986044e-06, |
|
"loss": 1.2516, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 4.481649747002146e-06, |
|
"loss": 1.2532, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"eval_loss": 1.22245192527771, |
|
"eval_runtime": 3.9686, |
|
"eval_samples_per_second": 20.914, |
|
"eval_steps_per_second": 1.512, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 4.460646883935079e-06, |
|
"loss": 1.2491, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 4.43927822676105e-06, |
|
"loss": 1.2058, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 4.417547762189207e-06, |
|
"loss": 1.2277, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"eval_loss": 1.2032711505889893, |
|
"eval_runtime": 3.9983, |
|
"eval_samples_per_second": 20.759, |
|
"eval_steps_per_second": 1.501, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 4.395459544430407e-06, |
|
"loss": 1.2244, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 4.373017694440828e-06, |
|
"loss": 1.1986, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 4.35022639915313e-06, |
|
"loss": 1.208, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_loss": 1.18324875831604, |
|
"eval_runtime": 3.9861, |
|
"eval_samples_per_second": 20.822, |
|
"eval_steps_per_second": 1.505, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 4.32708991069531e-06, |
|
"loss": 1.1919, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 4.30361254559739e-06, |
|
"loss": 1.1944, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"eval_loss": 1.1732494831085205, |
|
"eval_runtime": 3.9909, |
|
"eval_samples_per_second": 20.797, |
|
"eval_steps_per_second": 1.503, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 4.279798683986084e-06, |
|
"loss": 1.18, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 4.255652768767619e-06, |
|
"loss": 1.1806, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 4.2311793047988145e-06, |
|
"loss": 1.1799, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"eval_loss": 1.158596158027649, |
|
"eval_runtime": 3.993, |
|
"eval_samples_per_second": 20.787, |
|
"eval_steps_per_second": 1.503, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 22.13, |
|
"learning_rate": 4.206382858046636e-06, |
|
"loss": 1.1803, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 4.181268054736319e-06, |
|
"loss": 1.1532, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"learning_rate": 4.15583958048827e-06, |
|
"loss": 1.1621, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 22.55, |
|
"eval_loss": 1.1493983268737793, |
|
"eval_runtime": 3.9997, |
|
"eval_samples_per_second": 20.752, |
|
"eval_steps_per_second": 1.5, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 4.130102179443877e-06, |
|
"loss": 1.1681, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 4.104060653380403e-06, |
|
"loss": 1.1448, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"eval_loss": 1.1393245458602905, |
|
"eval_runtime": 3.9913, |
|
"eval_samples_per_second": 20.795, |
|
"eval_steps_per_second": 1.503, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 4.077719860815132e-06, |
|
"loss": 1.1513, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 4.051084716098921e-06, |
|
"loss": 1.1441, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 4.024160188499337e-06, |
|
"loss": 1.1564, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"eval_loss": 1.1301108598709106, |
|
"eval_runtime": 3.9936, |
|
"eval_samples_per_second": 20.783, |
|
"eval_steps_per_second": 1.502, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 3.996951301273556e-06, |
|
"loss": 1.1396, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 1.1346, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 3.941700805287169e-06, |
|
"loss": 1.1293, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.57, |
|
"eval_loss": 1.1233478784561157, |
|
"eval_runtime": 3.9814, |
|
"eval_samples_per_second": 20.847, |
|
"eval_steps_per_second": 1.507, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3.913669504505015e-06, |
|
"loss": 1.1483, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 3.8853744581304376e-06, |
|
"loss": 1.1228, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"eval_loss": 1.1160305738449097, |
|
"eval_runtime": 3.9869, |
|
"eval_samples_per_second": 20.818, |
|
"eval_steps_per_second": 1.505, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 3.856820945115655e-06, |
|
"loss": 1.1261, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 3.828014292634508e-06, |
|
"loss": 1.1307, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 3.798959875088584e-06, |
|
"loss": 1.1266, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"eval_loss": 1.1105799674987793, |
|
"eval_runtime": 3.9806, |
|
"eval_samples_per_second": 20.851, |
|
"eval_steps_per_second": 1.507, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 3.769663113104516e-06, |
|
"loss": 1.1051, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 3.7401294725226707e-06, |
|
"loss": 1.1152, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 28.51, |
|
"learning_rate": 3.7103644633774015e-06, |
|
"loss": 1.1159, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 28.55, |
|
"eval_loss": 1.1047321557998657, |
|
"eval_runtime": 3.9941, |
|
"eval_samples_per_second": 20.781, |
|
"eval_steps_per_second": 1.502, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 29.15, |
|
"learning_rate": 3.680373638869047e-06, |
|
"loss": 1.1149, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"learning_rate": 3.650162594327881e-06, |
|
"loss": 1.0942, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 3.6197369661702052e-06, |
|
"loss": 1.125, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"eval_loss": 1.098873496055603, |
|
"eval_runtime": 3.9772, |
|
"eval_samples_per_second": 20.869, |
|
"eval_steps_per_second": 1.509, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 3.589102430846773e-06, |
|
"loss": 1.1067, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 3.5582647037837446e-06, |
|
"loss": 1.094, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 30.55, |
|
"eval_loss": 1.0941215753555298, |
|
"eval_runtime": 3.9808, |
|
"eval_samples_per_second": 20.85, |
|
"eval_steps_per_second": 1.507, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 3.527229538316371e-06, |
|
"loss": 1.1033, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 31.28, |
|
"learning_rate": 3.4960027246156043e-06, |
|
"loss": 1.0886, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 31.49, |
|
"learning_rate": 3.4645900886078388e-06, |
|
"loss": 1.1077, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 31.57, |
|
"eval_loss": 1.0902585983276367, |
|
"eval_runtime": 3.9794, |
|
"eval_samples_per_second": 20.857, |
|
"eval_steps_per_second": 1.508, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 3.432997490887979e-06, |
|
"loss": 1.1054, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 3.4012308256260366e-06, |
|
"loss": 1.0936, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 3.369296019467473e-06, |
|
"loss": 1.0874, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"eval_loss": 1.0833797454833984, |
|
"eval_runtime": 3.9868, |
|
"eval_samples_per_second": 20.819, |
|
"eval_steps_per_second": 1.505, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 33.19, |
|
"learning_rate": 3.3371990304274654e-06, |
|
"loss": 1.0687, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 33.4, |
|
"learning_rate": 3.304945846779346e-06, |
|
"loss": 1.0957, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 33.57, |
|
"eval_loss": 1.0769224166870117, |
|
"eval_runtime": 3.9835, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 1.506, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 34.04, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.0894, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 34.26, |
|
"learning_rate": 3.239994993334059e-06, |
|
"loss": 1.0906, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 34.47, |
|
"learning_rate": 3.207309441292325e-06, |
|
"loss": 1.0755, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"eval_loss": 1.0749725103378296, |
|
"eval_runtime": 3.9797, |
|
"eval_samples_per_second": 20.856, |
|
"eval_steps_per_second": 1.508, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 3.174491927892561e-06, |
|
"loss": 1.0862, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 35.32, |
|
"learning_rate": 3.1415485758349344e-06, |
|
"loss": 1.0767, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 35.53, |
|
"learning_rate": 3.1084855312970897e-06, |
|
"loss": 1.0705, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 35.57, |
|
"eval_loss": 1.0719436407089233, |
|
"eval_runtime": 3.9776, |
|
"eval_samples_per_second": 20.867, |
|
"eval_steps_per_second": 1.508, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 36.17, |
|
"learning_rate": 3.0753089627874668e-06, |
|
"loss": 1.0786, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 36.38, |
|
"learning_rate": 3.0420250599944525e-06, |
|
"loss": 1.0749, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 36.55, |
|
"eval_loss": 1.0687665939331055, |
|
"eval_runtime": 3.9811, |
|
"eval_samples_per_second": 20.848, |
|
"eval_steps_per_second": 1.507, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 37.02, |
|
"learning_rate": 3.0086400326315853e-06, |
|
"loss": 1.0646, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"learning_rate": 2.9751601092790185e-06, |
|
"loss": 1.0734, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 37.45, |
|
"learning_rate": 2.941591536221469e-06, |
|
"loss": 1.0742, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 37.57, |
|
"eval_loss": 1.064079761505127, |
|
"eval_runtime": 3.9781, |
|
"eval_samples_per_second": 20.864, |
|
"eval_steps_per_second": 1.508, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 38.09, |
|
"learning_rate": 2.907940576282856e-06, |
|
"loss": 1.0547, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 2.8742135076578608e-06, |
|
"loss": 1.0686, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 38.51, |
|
"learning_rate": 2.840416622740617e-06, |
|
"loss": 1.064, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 38.55, |
|
"eval_loss": 1.0626107454299927, |
|
"eval_runtime": 3.9847, |
|
"eval_samples_per_second": 20.829, |
|
"eval_steps_per_second": 1.506, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"learning_rate": 2.8065562269507464e-06, |
|
"loss": 1.0739, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 2.7726386375569748e-06, |
|
"loss": 1.0569, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 2.7386701824985257e-06, |
|
"loss": 1.0569, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"eval_loss": 1.0592881441116333, |
|
"eval_runtime": 3.9656, |
|
"eval_samples_per_second": 20.93, |
|
"eval_steps_per_second": 1.513, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 40.21, |
|
"learning_rate": 2.7046571992045334e-06, |
|
"loss": 1.0466, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 2.670606033411678e-06, |
|
"loss": 1.0686, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 40.55, |
|
"eval_loss": 1.0553538799285889, |
|
"eval_runtime": 3.9786, |
|
"eval_samples_per_second": 20.862, |
|
"eval_steps_per_second": 1.508, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 41.06, |
|
"learning_rate": 2.636523037980275e-06, |
|
"loss": 1.0564, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 41.28, |
|
"learning_rate": 2.602414571709036e-06, |
|
"loss": 1.0523, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"learning_rate": 2.5682869981487154e-06, |
|
"loss": 1.059, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"eval_loss": 1.0516701936721802, |
|
"eval_runtime": 3.9778, |
|
"eval_samples_per_second": 20.866, |
|
"eval_steps_per_second": 1.508, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 2.5341466844148775e-06, |
|
"loss": 1.0414, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.0394, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"learning_rate": 2.465853315585123e-06, |
|
"loss": 1.0588, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"eval_loss": 1.0469306707382202, |
|
"eval_runtime": 4.0006, |
|
"eval_samples_per_second": 20.747, |
|
"eval_steps_per_second": 1.5, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 43.19, |
|
"learning_rate": 2.431713001851286e-06, |
|
"loss": 1.0442, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 2.3975854282909645e-06, |
|
"loss": 1.0447, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 43.57, |
|
"eval_loss": 1.0471419095993042, |
|
"eval_runtime": 3.9796, |
|
"eval_samples_per_second": 20.856, |
|
"eval_steps_per_second": 1.508, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 44.04, |
|
"learning_rate": 2.3634769620197253e-06, |
|
"loss": 1.0573, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 44.26, |
|
"learning_rate": 2.3293939665883233e-06, |
|
"loss": 1.0478, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"learning_rate": 2.2953428007954682e-06, |
|
"loss": 1.0356, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 44.55, |
|
"eval_loss": 1.0420819520950317, |
|
"eval_runtime": 3.9978, |
|
"eval_samples_per_second": 20.762, |
|
"eval_steps_per_second": 1.501, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 45.11, |
|
"learning_rate": 2.261329817501475e-06, |
|
"loss": 1.0465, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 45.32, |
|
"learning_rate": 2.2273613624430256e-06, |
|
"loss": 1.0389, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 45.53, |
|
"learning_rate": 2.1934437730492544e-06, |
|
"loss": 1.0431, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 45.57, |
|
"eval_loss": 1.0415891408920288, |
|
"eval_runtime": 3.9789, |
|
"eval_samples_per_second": 20.86, |
|
"eval_steps_per_second": 1.508, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 46.17, |
|
"learning_rate": 2.159583377259384e-06, |
|
"loss": 1.045, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 2.1257864923421405e-06, |
|
"loss": 1.0195, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 46.55, |
|
"eval_loss": 1.038739562034607, |
|
"eval_runtime": 3.9849, |
|
"eval_samples_per_second": 20.829, |
|
"eval_steps_per_second": 1.506, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 47.02, |
|
"learning_rate": 2.092059423717145e-06, |
|
"loss": 1.0458, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 47.23, |
|
"learning_rate": 2.0584084637785316e-06, |
|
"loss": 1.0347, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 2.0248398907209827e-06, |
|
"loss": 1.0326, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 47.57, |
|
"eval_loss": 1.0356136560440063, |
|
"eval_runtime": 3.9756, |
|
"eval_samples_per_second": 20.877, |
|
"eval_steps_per_second": 1.509, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 48.09, |
|
"learning_rate": 1.991359967368416e-06, |
|
"loss": 1.0351, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 48.3, |
|
"learning_rate": 1.957974940005548e-06, |
|
"loss": 1.0422, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 48.51, |
|
"learning_rate": 1.9246910372125345e-06, |
|
"loss": 1.0227, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 48.55, |
|
"eval_loss": 1.0340739488601685, |
|
"eval_runtime": 3.9883, |
|
"eval_samples_per_second": 20.811, |
|
"eval_steps_per_second": 1.504, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 49.15, |
|
"learning_rate": 1.8915144687029107e-06, |
|
"loss": 1.0192, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 49.36, |
|
"learning_rate": 1.8584514241650667e-06, |
|
"loss": 1.0328, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 1.8255080721074391e-06, |
|
"loss": 1.0403, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"eval_loss": 1.0316752195358276, |
|
"eval_runtime": 3.9751, |
|
"eval_samples_per_second": 20.88, |
|
"eval_steps_per_second": 1.509, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"step": 675, |
|
"total_flos": 6306831714484224.0, |
|
"train_loss": 1.2869272242652046, |
|
"train_runtime": 6519.8949, |
|
"train_samples_per_second": 5.721, |
|
"train_steps_per_second": 0.176 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1150, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6306831714484224.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|