madlag's picture
Initial release.
44c8220
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 21960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 2.9316939890710385e-05,
"loss": 2.1044,
"step": 500
},
{
"epoch": 0.09,
"learning_rate": 2.8633879781420765e-05,
"loss": 1.5614,
"step": 1000
},
{
"epoch": 0.14,
"learning_rate": 2.795081967213115e-05,
"loss": 1.4288,
"step": 1500
},
{
"epoch": 0.18,
"learning_rate": 2.7267759562841533e-05,
"loss": 1.3095,
"step": 2000
},
{
"epoch": 0.23,
"learning_rate": 2.6584699453551913e-05,
"loss": 1.2225,
"step": 2500
},
{
"HasAns_exact": 71.89608636977059,
"HasAns_f1": 78.04168619043323,
"HasAns_total": 5928,
"NoAns_exact": 82.62405382674517,
"NoAns_f1": 82.62405382674517,
"NoAns_total": 5945,
"best_exact": 77.27617282910806,
"best_exact_thresh": 0.0,
"best_f1": 80.34457304277659,
"best_f1_thresh": 0.0,
"epoch": 0.23,
"exact": 77.26775035795502,
"f1": 80.33615057162366,
"step": 2500,
"total": 11873
},
{
"epoch": 0.27,
"learning_rate": 2.5901639344262294e-05,
"loss": 1.2062,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 2.5218579234972678e-05,
"loss": 1.2019,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 2.453551912568306e-05,
"loss": 1.1856,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 2.3852459016393442e-05,
"loss": 1.1677,
"step": 4500
},
{
"epoch": 0.46,
"learning_rate": 2.3169398907103826e-05,
"loss": 1.1343,
"step": 5000
},
{
"HasAns_exact": 74.44331983805668,
"HasAns_f1": 80.60592872513861,
"HasAns_total": 5928,
"NoAns_exact": 83.65012615643398,
"NoAns_f1": 83.65012615643398,
"NoAns_total": 5945,
"best_exact": 79.05331424239871,
"best_exact_thresh": 0.0,
"best_f1": 82.13020681231532,
"best_f1_thresh": 0.0,
"epoch": 0.46,
"exact": 79.05331424239871,
"f1": 82.13020681231545,
"step": 5000,
"total": 11873
},
{
"epoch": 0.5,
"learning_rate": 2.248633879781421e-05,
"loss": 1.1489,
"step": 5500
},
{
"epoch": 0.55,
"learning_rate": 2.180327868852459e-05,
"loss": 1.1349,
"step": 6000
},
{
"epoch": 0.59,
"learning_rate": 2.1120218579234974e-05,
"loss": 1.1156,
"step": 6500
},
{
"epoch": 0.64,
"learning_rate": 2.0437158469945358e-05,
"loss": 1.1066,
"step": 7000
},
{
"epoch": 0.68,
"learning_rate": 1.975409836065574e-05,
"loss": 1.0877,
"step": 7500
},
{
"HasAns_exact": 80.61740890688259,
"HasAns_f1": 87.36413644383099,
"HasAns_total": 5928,
"NoAns_exact": 72.31286795626578,
"NoAns_f1": 72.31286795626578,
"NoAns_total": 5945,
"best_exact": 76.45919312726353,
"best_exact_thresh": 0.0,
"best_f1": 79.82772684570281,
"best_f1_thresh": 0.0,
"epoch": 0.68,
"exact": 76.45919312726353,
"f1": 79.82772684570291,
"step": 7500,
"total": 11873
},
{
"epoch": 0.73,
"learning_rate": 1.907103825136612e-05,
"loss": 1.0836,
"step": 8000
},
{
"epoch": 0.77,
"learning_rate": 1.8387978142076503e-05,
"loss": 1.0793,
"step": 8500
},
{
"epoch": 0.82,
"learning_rate": 1.7704918032786887e-05,
"loss": 1.0722,
"step": 9000
},
{
"epoch": 0.87,
"learning_rate": 1.7021857923497267e-05,
"loss": 1.0497,
"step": 9500
},
{
"epoch": 0.91,
"learning_rate": 1.633879781420765e-05,
"loss": 0.8443,
"step": 10000
},
{
"HasAns_exact": 77.7327935222672,
"HasAns_f1": 84.30659896226393,
"HasAns_total": 5928,
"NoAns_exact": 81.19428090832632,
"NoAns_f1": 81.19428090832632,
"NoAns_total": 5945,
"best_exact": 79.4660153288975,
"best_exact_thresh": 0.0,
"best_f1": 82.7482117955276,
"best_f1_thresh": 0.0,
"epoch": 0.91,
"exact": 79.4660153288975,
"f1": 82.74821179552765,
"step": 10000,
"total": 11873
},
{
"epoch": 0.96,
"learning_rate": 1.5655737704918035e-05,
"loss": 0.7997,
"step": 10500
},
{
"epoch": 1.0,
"learning_rate": 1.4972677595628415e-05,
"loss": 0.7642,
"step": 11000
},
{
"epoch": 1.05,
"learning_rate": 1.4289617486338798e-05,
"loss": 0.4993,
"step": 11500
},
{
"epoch": 1.09,
"learning_rate": 1.3606557377049181e-05,
"loss": 0.4945,
"step": 12000
},
{
"epoch": 1.14,
"learning_rate": 1.2923497267759564e-05,
"loss": 0.5155,
"step": 12500
},
{
"HasAns_exact": 75.79284750337382,
"HasAns_f1": 82.13405464912947,
"HasAns_total": 5928,
"NoAns_exact": 86.67788057190917,
"NoAns_f1": 86.67788057190917,
"NoAns_total": 5945,
"best_exact": 81.24315674218816,
"best_exact_thresh": 0.0,
"best_f1": 84.4092205811537,
"best_f1_thresh": 0.0,
"epoch": 1.14,
"exact": 81.24315674218816,
"f1": 84.40922058115372,
"step": 12500,
"total": 11873
},
{
"epoch": 1.18,
"learning_rate": 1.2240437158469946e-05,
"loss": 0.4964,
"step": 13000
},
{
"epoch": 1.23,
"learning_rate": 1.1557377049180328e-05,
"loss": 0.4947,
"step": 13500
},
{
"epoch": 1.28,
"learning_rate": 1.087431693989071e-05,
"loss": 0.5173,
"step": 14000
},
{
"epoch": 1.32,
"learning_rate": 1.0191256830601094e-05,
"loss": 0.4718,
"step": 14500
},
{
"epoch": 1.37,
"learning_rate": 9.508196721311476e-06,
"loss": 0.4768,
"step": 15000
},
{
"HasAns_exact": 77.83400809716599,
"HasAns_f1": 84.204371493218,
"HasAns_total": 5928,
"NoAns_exact": 85.23128679562657,
"NoAns_f1": 85.23128679562657,
"NoAns_total": 5945,
"best_exact": 81.53794323254444,
"best_exact_thresh": 0.0,
"best_f1": 84.71856432340547,
"best_f1_thresh": 0.0,
"epoch": 1.37,
"exact": 81.53794323254444,
"f1": 84.71856432340567,
"step": 15000,
"total": 11873
},
{
"epoch": 1.41,
"learning_rate": 8.825136612021857e-06,
"loss": 0.4937,
"step": 15500
},
{
"epoch": 1.46,
"learning_rate": 8.14207650273224e-06,
"loss": 0.4678,
"step": 16000
},
{
"epoch": 1.5,
"learning_rate": 7.459016393442623e-06,
"loss": 0.4922,
"step": 16500
},
{
"epoch": 1.55,
"learning_rate": 6.775956284153006e-06,
"loss": 0.4779,
"step": 17000
},
{
"epoch": 1.59,
"learning_rate": 6.092896174863388e-06,
"loss": 0.4787,
"step": 17500
},
{
"HasAns_exact": 79.30161943319838,
"HasAns_f1": 85.07291240565793,
"HasAns_total": 5928,
"NoAns_exact": 85.5004205214466,
"NoAns_f1": 85.5004205214466,
"NoAns_total": 5945,
"best_exact": 82.40545776130716,
"best_exact_thresh": 0.0,
"best_f1": 85.28697252090775,
"best_f1_thresh": 0.0,
"epoch": 1.59,
"exact": 82.40545776130716,
"f1": 85.28697252090787,
"step": 17500,
"total": 11873
},
{
"epoch": 1.64,
"learning_rate": 5.409836065573771e-06,
"loss": 0.4554,
"step": 18000
},
{
"epoch": 1.68,
"learning_rate": 4.726775956284153e-06,
"loss": 0.4661,
"step": 18500
},
{
"epoch": 1.73,
"learning_rate": 4.043715846994535e-06,
"loss": 0.449,
"step": 19000
},
{
"epoch": 1.78,
"learning_rate": 3.3606557377049183e-06,
"loss": 0.4426,
"step": 19500
},
{
"epoch": 1.82,
"learning_rate": 2.6775956284153005e-06,
"loss": 0.4732,
"step": 20000
},
{
"HasAns_exact": 81.29217273954116,
"HasAns_f1": 87.36118517518237,
"HasAns_total": 5928,
"NoAns_exact": 82.64087468460892,
"NoAns_f1": 82.64087468460892,
"NoAns_total": 5945,
"best_exact": 81.96748926134929,
"best_exact_thresh": 0.0,
"best_f1": 84.99765061218557,
"best_f1_thresh": 0.0,
"epoch": 1.82,
"exact": 81.96748926134929,
"f1": 84.99765061218568,
"step": 20000,
"total": 11873
},
{
"epoch": 1.87,
"learning_rate": 1.994535519125683e-06,
"loss": 0.4269,
"step": 20500
},
{
"epoch": 1.91,
"learning_rate": 1.3114754098360657e-06,
"loss": 0.4461,
"step": 21000
},
{
"epoch": 1.96,
"learning_rate": 6.284153005464482e-07,
"loss": 0.434,
"step": 21500
},
{
"epoch": 2.0,
"step": 21960,
"total_flos": 2.0283616700399e+17,
"train_runtime": 13072.0089,
"train_samples_per_second": 1.68
}
],
"max_steps": 21960,
"num_train_epochs": 2,
"total_flos": 2.0283616700399e+17,
"trial_name": null,
"trial_params": null
}