|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9960768928991763, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.997799572243123e-06, |
|
"loss": 0.4974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.990195641770761e-06, |
|
"loss": 0.4841, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 0.4602764844894409, |
|
"eval_runtime": 62.0223, |
|
"eval_samples_per_second": 13.431, |
|
"eval_steps_per_second": 0.855, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.9771693033643e-06, |
|
"loss": 0.4748, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.958734711603195e-06, |
|
"loss": 0.4678, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.45144811272621155, |
|
"eval_runtime": 61.8568, |
|
"eval_samples_per_second": 13.467, |
|
"eval_steps_per_second": 0.857, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.934911897741493e-06, |
|
"loss": 0.4687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.905726747941616e-06, |
|
"loss": 0.4687, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.44708773493766785, |
|
"eval_runtime": 62.0907, |
|
"eval_samples_per_second": 13.416, |
|
"eval_steps_per_second": 0.854, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.871210975146135e-06, |
|
"loss": 0.473, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.831402084618113e-06, |
|
"loss": 0.4608, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.44395506381988525, |
|
"eval_runtime": 61.9484, |
|
"eval_samples_per_second": 13.447, |
|
"eval_steps_per_second": 0.856, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.786343333187412e-06, |
|
"loss": 0.4542, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.736083682247287e-06, |
|
"loss": 0.4593, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 0.43974635004997253, |
|
"eval_runtime": 61.9084, |
|
"eval_samples_per_second": 13.455, |
|
"eval_steps_per_second": 0.856, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.680677744552346e-06, |
|
"loss": 0.4681, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.620185724875652e-06, |
|
"loss": 0.4488, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.435758113861084, |
|
"eval_runtime": 61.8043, |
|
"eval_samples_per_second": 13.478, |
|
"eval_steps_per_second": 0.858, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.55467335458948e-06, |
|
"loss": 0.4478, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.484211820240797e-06, |
|
"loss": 0.4494, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.434115469455719, |
|
"eval_runtime": 61.841, |
|
"eval_samples_per_second": 13.47, |
|
"eval_steps_per_second": 0.857, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.408877686199078e-06, |
|
"loss": 0.44, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.328752811460542e-06, |
|
"loss": 0.4477, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.4319334030151367, |
|
"eval_runtime": 61.9956, |
|
"eval_samples_per_second": 13.436, |
|
"eval_steps_per_second": 0.855, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.243924260699133e-06, |
|
"loss": 0.4465, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.154484209661002e-06, |
|
"loss": 0.445, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.42997926473617554, |
|
"eval_runtime": 62.0149, |
|
"eval_samples_per_second": 13.432, |
|
"eval_steps_per_second": 0.855, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.060529845005184e-06, |
|
"loss": 0.4332, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 8.962163258699397e-06, |
|
"loss": 0.4366, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.4274918735027313, |
|
"eval_runtime": 61.8797, |
|
"eval_samples_per_second": 13.462, |
|
"eval_steps_per_second": 0.857, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.859491337085643e-06, |
|
"loss": 0.428, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.752625644736204e-06, |
|
"loss": 0.442, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.42491650581359863, |
|
"eval_runtime": 62.1183, |
|
"eval_samples_per_second": 13.41, |
|
"eval_steps_per_second": 0.853, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.641682303226197e-06, |
|
"loss": 0.4442, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.526781864954453e-06, |
|
"loss": 0.4424, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.4228505492210388, |
|
"eval_runtime": 62.0338, |
|
"eval_samples_per_second": 13.428, |
|
"eval_steps_per_second": 0.854, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.40804918214979e-06, |
|
"loss": 0.4301, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.28561327120505e-06, |
|
"loss": 0.4427, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 0.41998758912086487, |
|
"eval_runtime": 61.9344, |
|
"eval_samples_per_second": 13.45, |
|
"eval_steps_per_second": 0.856, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.159607172486301e-06, |
|
"loss": 0.4316, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.030167805769537e-06, |
|
"loss": 0.4372, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.4175536036491394, |
|
"eval_runtime": 61.971, |
|
"eval_samples_per_second": 13.442, |
|
"eval_steps_per_second": 0.855, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.897435821461964e-06, |
|
"loss": 0.4398, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.761555447769548e-06, |
|
"loss": 0.4335, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.4155929982662201, |
|
"eval_runtime": 61.9696, |
|
"eval_samples_per_second": 13.442, |
|
"eval_steps_per_second": 0.855, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.622674333976863e-06, |
|
"loss": 0.4371, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.4809433900095705e-06, |
|
"loss": 0.4088, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.41465404629707336, |
|
"eval_runtime": 61.9822, |
|
"eval_samples_per_second": 13.439, |
|
"eval_steps_per_second": 0.855, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.336516622453833e-06, |
|
"loss": 0.3166, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.1895509672108674e-06, |
|
"loss": 0.3145, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.42051395773887634, |
|
"eval_runtime": 62.5118, |
|
"eval_samples_per_second": 13.325, |
|
"eval_steps_per_second": 0.848, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.040206118968466e-06, |
|
"loss": 0.3136, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.88864435767478e-06, |
|
"loss": 0.3151, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.4205115735530853, |
|
"eval_runtime": 62.5792, |
|
"eval_samples_per_second": 13.311, |
|
"eval_steps_per_second": 0.847, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.735030372202942e-06, |
|
"loss": 0.3137, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.579531081398105e-06, |
|
"loss": 0.3019, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.4216003119945526, |
|
"eval_runtime": 62.3646, |
|
"eval_samples_per_second": 13.357, |
|
"eval_steps_per_second": 0.85, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.4223154527013755e-06, |
|
"loss": 0.3044, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.263554318547713e-06, |
|
"loss": 0.3044, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.4185173809528351, |
|
"eval_runtime": 61.9058, |
|
"eval_samples_per_second": 13.456, |
|
"eval_steps_per_second": 0.856, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.1034201907373045e-06, |
|
"loss": 0.305, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.942087072982131e-06, |
|
"loss": 0.3034, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.41815003752708435, |
|
"eval_runtime": 62.0238, |
|
"eval_samples_per_second": 13.43, |
|
"eval_steps_per_second": 0.855, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.779730271831384e-06, |
|
"loss": 0.3115, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.616526206181215e-06, |
|
"loss": 0.3026, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.41711267828941345, |
|
"eval_runtime": 62.5134, |
|
"eval_samples_per_second": 13.325, |
|
"eval_steps_per_second": 0.848, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.4526522155758015e-06, |
|
"loss": 0.3077, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.288286367508009e-06, |
|
"loss": 0.3062, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.41751572489738464, |
|
"eval_runtime": 62.561, |
|
"eval_samples_per_second": 13.315, |
|
"eval_steps_per_second": 0.847, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.123607263929075e-06, |
|
"loss": 0.3076, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.958793847177518e-06, |
|
"loss": 0.315, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.41455498337745667, |
|
"eval_runtime": 62.0669, |
|
"eval_samples_per_second": 13.421, |
|
"eval_steps_per_second": 0.854, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.7940252055382115e-06, |
|
"loss": 0.3024, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.629480378642832e-06, |
|
"loss": 0.3124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.41453319787979126, |
|
"eval_runtime": 61.9067, |
|
"eval_samples_per_second": 13.456, |
|
"eval_steps_per_second": 0.856, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.46533816292321e-06, |
|
"loss": 0.31, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.301776917328918e-06, |
|
"loss": 0.3096, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.41273748874664307, |
|
"eval_runtime": 62.1417, |
|
"eval_samples_per_second": 13.405, |
|
"eval_steps_per_second": 0.853, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.138974369520252e-06, |
|
"loss": 0.3044, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.977107422747163e-06, |
|
"loss": 0.3178, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.4111482501029968, |
|
"eval_runtime": 62.5639, |
|
"eval_samples_per_second": 13.314, |
|
"eval_steps_per_second": 0.847, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.816351963624017e-06, |
|
"loss": 0.3102, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.6568826710090353e-06, |
|
"loss": 0.3044, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.4110707640647888, |
|
"eval_runtime": 62.5689, |
|
"eval_samples_per_second": 13.313, |
|
"eval_steps_per_second": 0.847, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.4988728261960957e-06, |
|
"loss": 0.306, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3424941246251574e-06, |
|
"loss": 0.3078, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.4091060757637024, |
|
"eval_runtime": 62.0676, |
|
"eval_samples_per_second": 13.421, |
|
"eval_steps_per_second": 0.854, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.1879164893158713e-06, |
|
"loss": 0.2977, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.035307886227156e-06, |
|
"loss": 0.2967, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.4094270169734955, |
|
"eval_runtime": 62.0655, |
|
"eval_samples_per_second": 13.421, |
|
"eval_steps_per_second": 0.854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.8848341417433036e-06, |
|
"loss": 0.3069, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.736658762485005e-06, |
|
"loss": 0.3068, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.4080323278903961, |
|
"eval_runtime": 62.0098, |
|
"eval_samples_per_second": 13.433, |
|
"eval_steps_per_second": 0.855, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.590942757641035e-06, |
|
"loss": 0.3037, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.447844464013703e-06, |
|
"loss": 0.276, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.4152510464191437, |
|
"eval_runtime": 61.9609, |
|
"eval_samples_per_second": 13.444, |
|
"eval_steps_per_second": 0.855, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.3075193739681182e-06, |
|
"loss": 0.2327, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.170119966472293e-06, |
|
"loss": 0.2288, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.43204566836357117, |
|
"eval_runtime": 62.0792, |
|
"eval_samples_per_second": 13.418, |
|
"eval_steps_per_second": 0.854, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.0357955414116075e-06, |
|
"loss": 0.2267, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.9046920573577239e-06, |
|
"loss": 0.2244, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.4292474389076233, |
|
"eval_runtime": 62.0223, |
|
"eval_samples_per_second": 13.431, |
|
"eval_steps_per_second": 0.855, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.7769519729682105e-06, |
|
"loss": 0.2327, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.6527140921892066e-06, |
|
"loss": 0.2336, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 0.427610844373703, |
|
"eval_runtime": 62.1107, |
|
"eval_samples_per_second": 13.412, |
|
"eval_steps_per_second": 0.853, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.532113413429357e-06, |
|
"loss": 0.2386, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.4152809828688708e-06, |
|
"loss": 0.2266, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.4290391206741333, |
|
"eval_runtime": 62.2449, |
|
"eval_samples_per_second": 13.383, |
|
"eval_steps_per_second": 0.851, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3023437520631426e-06, |
|
"loss": 0.2328, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1934244399956206e-06, |
|
"loss": 0.2312, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 0.42950907349586487, |
|
"eval_runtime": 62.2915, |
|
"eval_samples_per_second": 13.373, |
|
"eval_steps_per_second": 0.851, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0886413997298595e-06, |
|
"loss": 0.2338, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.881084898056197e-07, |
|
"loss": 0.2277, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 0.4284292161464691, |
|
"eval_runtime": 62.4097, |
|
"eval_samples_per_second": 13.347, |
|
"eval_steps_per_second": 0.849, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 8.919349505187813e-07, |
|
"loss": 0.2333, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.002252852194992e-07, |
|
"loss": 0.2332, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.42790091037750244, |
|
"eval_runtime": 62.4651, |
|
"eval_samples_per_second": 13.335, |
|
"eval_steps_per_second": 0.848, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.130791467575676e-07, |
|
"loss": 0.2257, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.305912291984229e-07, |
|
"loss": 0.2289, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.42792582511901855, |
|
"eval_runtime": 62.5296, |
|
"eval_samples_per_second": 13.322, |
|
"eval_steps_per_second": 0.848, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 5.528511649273932e-07, |
|
"loss": 0.2303, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.799434272540576e-07, |
|
"loss": 0.2279, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 0.4278266131877899, |
|
"eval_runtime": 62.5218, |
|
"eval_samples_per_second": 13.323, |
|
"eval_steps_per_second": 0.848, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.1194723862250317e-07, |
|
"loss": 0.2267, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.4893648452724636e-07, |
|
"loss": 0.2312, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 0.4273243546485901, |
|
"eval_runtime": 62.5591, |
|
"eval_samples_per_second": 13.315, |
|
"eval_steps_per_second": 0.847, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.9097963322834597e-07, |
|
"loss": 0.2306, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.3813966135294574e-07, |
|
"loss": 0.2334, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 0.42646506428718567, |
|
"eval_runtime": 62.6378, |
|
"eval_samples_per_second": 13.299, |
|
"eval_steps_per_second": 0.846, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.9047398546410633e-07, |
|
"loss": 0.2306, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.4803439967125022e-07, |
|
"loss": 0.2278, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.42754805088043213, |
|
"eval_runtime": 62.1476, |
|
"eval_samples_per_second": 13.404, |
|
"eval_steps_per_second": 0.853, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.1086701935005606e-07, |
|
"loss": 0.2296, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 7.901223103291833e-08, |
|
"loss": 0.2295, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 0.4276488721370697, |
|
"eval_runtime": 61.9957, |
|
"eval_samples_per_second": 13.436, |
|
"eval_steps_per_second": 0.855, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.250464852444792e-08, |
|
"loss": 0.2334, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.137307528968292e-08, |
|
"loss": 0.2292, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.4273829162120819, |
|
"eval_runtime": 62.0452, |
|
"eval_samples_per_second": 13.426, |
|
"eval_steps_per_second": 0.854, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5640473155894566e-08, |
|
"loss": 0.2284, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.323937361977338e-09, |
|
"loss": 0.2291, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 0.42734310030937195, |
|
"eval_runtime": 61.9825, |
|
"eval_samples_per_second": 13.439, |
|
"eval_steps_per_second": 0.855, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.346779825575853e-10, |
|
"loss": 0.2288, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 954, |
|
"total_flos": 6.195687991759864e+18, |
|
"train_loss": 0.1527079766776327, |
|
"train_runtime": 29559.5606, |
|
"train_samples_per_second": 4.138, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"max_steps": 954, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.195687991759864e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|