|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.83629191321499, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.2827061767850974e-06, |
|
"loss": 3.8041, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.721930086076435e-06, |
|
"loss": 3.5706, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.1424336578396131e-05, |
|
"loss": 3.2293, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.2565412353570195e-05, |
|
"loss": 2.8198, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3424920790724471e-05, |
|
"loss": 2.6382, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.4114674634318977e-05, |
|
"loss": 2.5852, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4690794260273606e-05, |
|
"loss": 2.4718, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5185478617142983e-05, |
|
"loss": 2.3908, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.561892416495773e-05, |
|
"loss": 2.3321, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.600463626286153e-05, |
|
"loss": 2.3622, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.0142996311187744, |
|
"eval_runtime": 13.3285, |
|
"eval_samples_per_second": 92.958, |
|
"eval_steps_per_second": 18.607, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6352093070986755e-05, |
|
"loss": 2.2679, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.666820294156779e-05, |
|
"loss": 2.2493, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.6958156901812732e-05, |
|
"loss": 2.3766, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.7225954557575116e-05, |
|
"loss": 2.1449, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.747474292314672e-05, |
|
"loss": 2.1136, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.770704275518123e-05, |
|
"loss": 2.107, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.7924904433402933e-05, |
|
"loss": 2.1469, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.8130018169564944e-05, |
|
"loss": 2.1229, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.832379371735486e-05, |
|
"loss": 2.0893, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.8507419182561513e-05, |
|
"loss": 2.0836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.813197135925293, |
|
"eval_runtime": 13.3546, |
|
"eval_samples_per_second": 92.777, |
|
"eval_steps_per_second": 18.57, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.868190518296623e-05, |
|
"loss": 2.0307, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.8848118530355293e-05, |
|
"loss": 2.0025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9006808283301558e-05, |
|
"loss": 2.0116, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.915862615484805e-05, |
|
"loss": 2.0355, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.9304142682139013e-05, |
|
"loss": 2.0056, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.945753054066002e-05, |
|
"loss": 2.1508, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.959138057324679e-05, |
|
"loss": 1.9203, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.9720308518695147e-05, |
|
"loss": 1.9358, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.9844663575115566e-05, |
|
"loss": 1.877, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.9964759049286476e-05, |
|
"loss": 1.9239, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 1.726140022277832, |
|
"eval_runtime": 13.3534, |
|
"eval_samples_per_second": 92.785, |
|
"eval_steps_per_second": 18.572, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.994594594594595e-05, |
|
"loss": 1.9449, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.9855855855855857e-05, |
|
"loss": 1.895, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9765765765765768e-05, |
|
"loss": 1.864, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.967567567567568e-05, |
|
"loss": 1.8928, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.9585585585585586e-05, |
|
"loss": 1.8801, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.9495495495495497e-05, |
|
"loss": 1.911, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.9405405405405408e-05, |
|
"loss": 1.8945, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.930630630630631e-05, |
|
"loss": 1.9508, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.9216216216216216e-05, |
|
"loss": 1.7709, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.9126126126126127e-05, |
|
"loss": 1.7693, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 1.673058032989502, |
|
"eval_runtime": 13.3468, |
|
"eval_samples_per_second": 92.832, |
|
"eval_steps_per_second": 18.581, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.9036036036036038e-05, |
|
"loss": 1.7773, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.8945945945945945e-05, |
|
"loss": 1.8198, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.8855855855855856e-05, |
|
"loss": 1.7813, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.8765765765765767e-05, |
|
"loss": 1.7736, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.8675675675675678e-05, |
|
"loss": 1.8544, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.8585585585585585e-05, |
|
"loss": 1.7672, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.8495495495495496e-05, |
|
"loss": 1.8223, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.8405405405405407e-05, |
|
"loss": 1.7832, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.8315315315315318e-05, |
|
"loss": 1.7937, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.822522522522523e-05, |
|
"loss": 1.8078, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"eval_loss": 1.6267061233520508, |
|
"eval_runtime": 13.3535, |
|
"eval_samples_per_second": 92.784, |
|
"eval_steps_per_second": 18.572, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.8126126126126127e-05, |
|
"loss": 1.8374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.8036036036036037e-05, |
|
"loss": 1.7228, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.7945945945945948e-05, |
|
"loss": 1.6986, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.7855855855855856e-05, |
|
"loss": 1.6858, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.7765765765765767e-05, |
|
"loss": 1.7169, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.7675675675675677e-05, |
|
"loss": 1.7069, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 1.7585585585585588e-05, |
|
"loss": 1.6725, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.7495495495495496e-05, |
|
"loss": 1.7052, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.7405405405405406e-05, |
|
"loss": 1.6597, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.7315315315315317e-05, |
|
"loss": 1.7565, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 1.5994915962219238, |
|
"eval_runtime": 13.2972, |
|
"eval_samples_per_second": 93.177, |
|
"eval_steps_per_second": 18.651, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.7225225225225225e-05, |
|
"loss": 1.723, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.7135135135135135e-05, |
|
"loss": 1.6777, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.7045045045045046e-05, |
|
"loss": 1.6943, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.6954954954954957e-05, |
|
"loss": 1.7274, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 1.6864864864864868e-05, |
|
"loss": 1.7062, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.6774774774774775e-05, |
|
"loss": 1.6078, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.6684684684684686e-05, |
|
"loss": 1.6484, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.6594594594594597e-05, |
|
"loss": 1.6097, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 1.6504504504504508e-05, |
|
"loss": 1.6255, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1.641441441441442e-05, |
|
"loss": 1.6188, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 1.5767285823822021, |
|
"eval_runtime": 13.2952, |
|
"eval_samples_per_second": 93.191, |
|
"eval_steps_per_second": 18.653, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 1.6324324324324326e-05, |
|
"loss": 1.68, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 1.6234234234234237e-05, |
|
"loss": 1.618, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.6144144144144144e-05, |
|
"loss": 1.5747, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 1.6054054054054055e-05, |
|
"loss": 1.6617, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 1.5963963963963966e-05, |
|
"loss": 1.6466, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.5864864864864867e-05, |
|
"loss": 1.7621, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.5774774774774778e-05, |
|
"loss": 1.5679, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.5684684684684685e-05, |
|
"loss": 1.6356, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.5594594594594596e-05, |
|
"loss": 1.5499, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.5504504504504504e-05, |
|
"loss": 1.5783, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"eval_loss": 1.5619192123413086, |
|
"eval_runtime": 13.2986, |
|
"eval_samples_per_second": 93.168, |
|
"eval_steps_per_second": 18.649, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.5414414414414414e-05, |
|
"loss": 1.5848, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.5324324324324325e-05, |
|
"loss": 1.5019, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1.5234234234234236e-05, |
|
"loss": 1.6008, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.5144144144144147e-05, |
|
"loss": 1.5757, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.5054054054054054e-05, |
|
"loss": 1.5855, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.4963963963963965e-05, |
|
"loss": 1.629, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.4873873873873874e-05, |
|
"loss": 1.5948, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.4783783783783785e-05, |
|
"loss": 1.5814, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4684684684684686e-05, |
|
"loss": 1.7112, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.4594594594594596e-05, |
|
"loss": 1.4981, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 1.546679139137268, |
|
"eval_runtime": 13.2814, |
|
"eval_samples_per_second": 93.288, |
|
"eval_steps_per_second": 18.673, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.4504504504504506e-05, |
|
"loss": 1.5341, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.4414414414414416e-05, |
|
"loss": 1.5301, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1.4324324324324326e-05, |
|
"loss": 1.5253, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1.4234234234234234e-05, |
|
"loss": 1.5697, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 1.4144144144144145e-05, |
|
"loss": 1.5482, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 1.4054054054054055e-05, |
|
"loss": 1.4849, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.3963963963963964e-05, |
|
"loss": 1.536, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.3873873873873875e-05, |
|
"loss": 1.5267, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1.3783783783783784e-05, |
|
"loss": 1.5003, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.3693693693693695e-05, |
|
"loss": 1.5296, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 1.5358564853668213, |
|
"eval_runtime": 13.2903, |
|
"eval_samples_per_second": 93.226, |
|
"eval_steps_per_second": 18.66, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.3594594594594597e-05, |
|
"loss": 1.6728, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 1.3504504504504506e-05, |
|
"loss": 1.4944, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 1.3414414414414417e-05, |
|
"loss": 1.5016, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1.3324324324324324e-05, |
|
"loss": 1.5407, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1.3234234234234235e-05, |
|
"loss": 1.5129, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 1.3144144144144144e-05, |
|
"loss": 1.4774, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.3054054054054055e-05, |
|
"loss": 1.5181, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.2963963963963966e-05, |
|
"loss": 1.4897, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.2873873873873875e-05, |
|
"loss": 1.4918, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 1.2783783783783785e-05, |
|
"loss": 1.4734, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_loss": 1.5299837589263916, |
|
"eval_runtime": 13.3097, |
|
"eval_samples_per_second": 93.09, |
|
"eval_steps_per_second": 18.633, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.2693693693693695e-05, |
|
"loss": 1.4425, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.2603603603603605e-05, |
|
"loss": 1.5102, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.2513513513513516e-05, |
|
"loss": 1.4614, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 1.2423423423423424e-05, |
|
"loss": 1.5378, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 1.2333333333333334e-05, |
|
"loss": 1.5182, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 1.2243243243243244e-05, |
|
"loss": 1.4606, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 1.2153153153153154e-05, |
|
"loss": 1.4675, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 1.2063063063063063e-05, |
|
"loss": 1.5261, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.1972972972972974e-05, |
|
"loss": 1.4602, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 1.1882882882882885e-05, |
|
"loss": 1.4415, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"eval_loss": 1.521682620048523, |
|
"eval_runtime": 13.2939, |
|
"eval_samples_per_second": 93.2, |
|
"eval_steps_per_second": 18.655, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.1792792792792792e-05, |
|
"loss": 1.4601, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.1702702702702703e-05, |
|
"loss": 1.4676, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.1612612612612612e-05, |
|
"loss": 1.4213, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.1522522522522523e-05, |
|
"loss": 1.4096, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 1.1432432432432434e-05, |
|
"loss": 1.4475, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 1.1342342342342343e-05, |
|
"loss": 1.4477, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 1.1243243243243245e-05, |
|
"loss": 1.4979, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 1.1153153153153154e-05, |
|
"loss": 1.4436, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 1.1063063063063065e-05, |
|
"loss": 1.4913, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 1.0972972972972974e-05, |
|
"loss": 1.4513, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"eval_loss": 1.5171560049057007, |
|
"eval_runtime": 13.2984, |
|
"eval_samples_per_second": 93.169, |
|
"eval_steps_per_second": 18.649, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 1.0882882882882884e-05, |
|
"loss": 1.4649, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 1.0792792792792795e-05, |
|
"loss": 1.4126, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 1.0702702702702703e-05, |
|
"loss": 1.3916, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 1.0612612612612613e-05, |
|
"loss": 1.393, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 1.0522522522522523e-05, |
|
"loss": 1.3972, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 1.0432432432432433e-05, |
|
"loss": 1.4867, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 1.0342342342342344e-05, |
|
"loss": 1.4109, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 1.0252252252252253e-05, |
|
"loss": 1.4215, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 1.0153153153153155e-05, |
|
"loss": 1.5288, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 1.0063063063063064e-05, |
|
"loss": 1.3782, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_loss": 1.5077377557754517, |
|
"eval_runtime": 13.2758, |
|
"eval_samples_per_second": 93.328, |
|
"eval_steps_per_second": 18.681, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 9.972972972972975e-06, |
|
"loss": 1.4292, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 9.882882882882884e-06, |
|
"loss": 1.4404, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 9.792792792792793e-06, |
|
"loss": 1.4192, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 9.702702702702704e-06, |
|
"loss": 1.324, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 9.612612612612613e-06, |
|
"loss": 1.4342, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 9.522522522522524e-06, |
|
"loss": 1.4233, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 9.432432432432433e-06, |
|
"loss": 1.3475, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 9.342342342342344e-06, |
|
"loss": 1.4178, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 9.252252252252253e-06, |
|
"loss": 1.3842, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 9.162162162162162e-06, |
|
"loss": 1.4103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_loss": 1.507853388786316, |
|
"eval_runtime": 13.2911, |
|
"eval_samples_per_second": 93.22, |
|
"eval_steps_per_second": 18.659, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 9.072072072072073e-06, |
|
"loss": 1.3918, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 8.972972972972974e-06, |
|
"loss": 1.4841, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 8.882882882882883e-06, |
|
"loss": 1.3866, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 8.792792792792794e-06, |
|
"loss": 1.3713, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 8.702702702702703e-06, |
|
"loss": 1.3384, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 8.612612612612612e-06, |
|
"loss": 1.4079, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 8.522522522522523e-06, |
|
"loss": 1.3715, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 8.432432432432434e-06, |
|
"loss": 1.3541, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 8.342342342342343e-06, |
|
"loss": 1.3764, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"learning_rate": 8.252252252252254e-06, |
|
"loss": 1.3907, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.69, |
|
"eval_loss": 1.5033966302871704, |
|
"eval_runtime": 13.2864, |
|
"eval_samples_per_second": 93.254, |
|
"eval_steps_per_second": 18.666, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 8.162162162162163e-06, |
|
"loss": 1.3626, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 8.072072072072072e-06, |
|
"loss": 1.4094, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 7.981981981981983e-06, |
|
"loss": 1.3579, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 7.891891891891894e-06, |
|
"loss": 1.4517, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 7.801801801801803e-06, |
|
"loss": 1.301, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 7.711711711711712e-06, |
|
"loss": 1.3513, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 7.621621621621622e-06, |
|
"loss": 1.3487, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 7.531531531531532e-06, |
|
"loss": 1.3894, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 7.441441441441442e-06, |
|
"loss": 1.3619, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 7.3513513513513525e-06, |
|
"loss": 1.3663, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"eval_loss": 1.501574993133545, |
|
"eval_runtime": 13.3092, |
|
"eval_samples_per_second": 93.094, |
|
"eval_steps_per_second": 18.634, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 7.2612612612612625e-06, |
|
"loss": 1.3268, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 7.1711711711711716e-06, |
|
"loss": 1.3703, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 7.0810810810810815e-06, |
|
"loss": 1.4246, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 6.9909909909909915e-06, |
|
"loss": 1.3642, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 6.900900900900901e-06, |
|
"loss": 1.3467, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 6.810810810810811e-06, |
|
"loss": 1.3802, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 6.711711711711713e-06, |
|
"loss": 1.4328, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 6.621621621621622e-06, |
|
"loss": 1.3316, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 6.531531531531532e-06, |
|
"loss": 1.3634, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 6.441441441441442e-06, |
|
"loss": 1.3565, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_loss": 1.4980181455612183, |
|
"eval_runtime": 13.2875, |
|
"eval_samples_per_second": 93.246, |
|
"eval_steps_per_second": 18.664, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 6.351351351351351e-06, |
|
"loss": 1.3217, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 6.261261261261262e-06, |
|
"loss": 1.33, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 6.171171171171172e-06, |
|
"loss": 1.3513, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 6.081081081081082e-06, |
|
"loss": 1.3649, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 5.990990990990992e-06, |
|
"loss": 1.3462, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 5.900900900900901e-06, |
|
"loss": 1.3454, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 5.810810810810811e-06, |
|
"loss": 1.3316, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 14.92, |
|
"learning_rate": 5.720720720720722e-06, |
|
"loss": 1.3347, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 5.6306306306306316e-06, |
|
"loss": 1.3039, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 5.531531531531532e-06, |
|
"loss": 1.4057, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"eval_loss": 1.4985554218292236, |
|
"eval_runtime": 13.2747, |
|
"eval_samples_per_second": 93.335, |
|
"eval_steps_per_second": 18.682, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 5.441441441441442e-06, |
|
"loss": 1.3082, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 5.351351351351351e-06, |
|
"loss": 1.3589, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 5.261261261261261e-06, |
|
"loss": 1.3235, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 5.171171171171172e-06, |
|
"loss": 1.3153, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 5.081081081081082e-06, |
|
"loss": 1.3345, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 4.990990990990991e-06, |
|
"loss": 1.3824, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 4.900900900900901e-06, |
|
"loss": 1.291, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 4.810810810810811e-06, |
|
"loss": 1.3106, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 4.720720720720721e-06, |
|
"loss": 1.3559, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.630630630630631e-06, |
|
"loss": 1.3406, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"eval_loss": 1.4952143430709839, |
|
"eval_runtime": 13.2782, |
|
"eval_samples_per_second": 93.31, |
|
"eval_steps_per_second": 18.677, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 4.540540540540541e-06, |
|
"loss": 1.328, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 4.441441441441442e-06, |
|
"loss": 1.4218, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 4.351351351351352e-06, |
|
"loss": 1.2962, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 4.2612612612612615e-06, |
|
"loss": 1.3122, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 4.1711711711711715e-06, |
|
"loss": 1.3641, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 4.0810810810810815e-06, |
|
"loss": 1.3058, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 3.990990990990991e-06, |
|
"loss": 1.2986, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 3.900900900900901e-06, |
|
"loss": 1.2902, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 3.810810810810811e-06, |
|
"loss": 1.3725, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 3.720720720720721e-06, |
|
"loss": 1.3031, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"eval_loss": 1.495803713798523, |
|
"eval_runtime": 13.2846, |
|
"eval_samples_per_second": 93.266, |
|
"eval_steps_per_second": 18.668, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 3.6306306306306312e-06, |
|
"loss": 1.3091, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 3.5405405405405408e-06, |
|
"loss": 1.3003, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 3.4504504504504503e-06, |
|
"loss": 1.2694, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 3.3603603603603607e-06, |
|
"loss": 1.3349, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 3.2702702702702706e-06, |
|
"loss": 1.3553, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 3.1801801801801806e-06, |
|
"loss": 1.2844, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 3.0900900900900905e-06, |
|
"loss": 1.2815, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 3e-06, |
|
"loss": 1.2756, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 2.9099099099099105e-06, |
|
"loss": 1.33, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"learning_rate": 2.81981981981982e-06, |
|
"loss": 1.31, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"eval_loss": 1.4959148168563843, |
|
"eval_runtime": 13.2926, |
|
"eval_samples_per_second": 93.21, |
|
"eval_steps_per_second": 18.657, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 2.72972972972973e-06, |
|
"loss": 1.3306, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 2.63963963963964e-06, |
|
"loss": 1.331, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 2.54954954954955e-06, |
|
"loss": 1.3347, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 2.45945945945946e-06, |
|
"loss": 1.3291, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 2.3693693693693693e-06, |
|
"loss": 1.2774, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 2.2792792792792793e-06, |
|
"loss": 1.3102, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 2.1801801801801804e-06, |
|
"loss": 1.4393, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 2.0900900900900904e-06, |
|
"loss": 1.2779, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.2708, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 1.90990990990991e-06, |
|
"loss": 1.3565, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"eval_loss": 1.494253396987915, |
|
"eval_runtime": 13.2974, |
|
"eval_samples_per_second": 93.176, |
|
"eval_steps_per_second": 18.65, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 1.81981981981982e-06, |
|
"loss": 1.2744, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 1.72972972972973e-06, |
|
"loss": 1.2697, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 1.6396396396396397e-06, |
|
"loss": 1.2921, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 1.5495495495495497e-06, |
|
"loss": 1.3096, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 1.4594594594594596e-06, |
|
"loss": 1.2971, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 1.3693693693693694e-06, |
|
"loss": 1.3132, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 1.2792792792792793e-06, |
|
"loss": 1.3367, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 1.1891891891891893e-06, |
|
"loss": 1.2648, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"learning_rate": 1.0990990990990993e-06, |
|
"loss": 1.3025, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.3732, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"eval_loss": 1.4953521490097046, |
|
"eval_runtime": 13.2927, |
|
"eval_samples_per_second": 93.209, |
|
"eval_steps_per_second": 18.657, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 9.0990990990991e-07, |
|
"loss": 1.2938, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 8.198198198198199e-07, |
|
"loss": 1.3374, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 7.297297297297298e-07, |
|
"loss": 1.297, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 6.396396396396397e-07, |
|
"loss": 1.2748, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 5.495495495495496e-07, |
|
"loss": 1.3259, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 4.5945945945945953e-07, |
|
"loss": 1.3099, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 3.693693693693694e-07, |
|
"loss": 1.3277, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 19.68, |
|
"learning_rate": 2.792792792792793e-07, |
|
"loss": 1.2708, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 1.8918918918918921e-07, |
|
"loss": 1.2546, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 9.90990990990991e-08, |
|
"loss": 1.2705, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"eval_loss": 1.494584560394287, |
|
"eval_runtime": 13.2864, |
|
"eval_samples_per_second": 93.253, |
|
"eval_steps_per_second": 18.666, |
|
"step": 2500 |
|
} |
|
], |
|
"max_steps": 2520, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.993865079244718e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|