test-t5-qplus-base / trainer_state.json
jacobmorrison's picture
Upload 14 files
d62be2d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 5e-05,
"loss": 1.5652,
"step": 1000
},
{
"epoch": 0.1,
"eval_exact_match": 38.3413,
"eval_exact_match_for_squad-like": 38.3413,
"eval_exact_match_for_task000_NewsQA_dev": 23.8367,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 50.0455,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 52.0534,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 53.7009,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 50.0357,
"eval_exact_match_for_task000_SQuAD_dev": 55.8009,
"eval_exact_match_for_task000_SearchQA_dev": 2.0554,
"eval_f1": 52.1049,
"eval_f1_for_squad-like": 52.1049,
"eval_f1_for_task000_NewsQA_dev": 40.9979,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 68.6298,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.9454,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 69.6682,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 66.9921,
"eval_f1_for_task000_SQuAD_dev": 71.031,
"eval_f1_for_task000_SearchQA_dev": 6.183,
"eval_gen_len": 6.4379,
"eval_global_step": 1000,
"eval_loss": 1.6005868911743164,
"eval_rouge1": 51.5552,
"eval_rouge1_for_squad-like": 51.5552,
"eval_rouge1_for_task000_NewsQA_dev": 40.9869,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 68.2955,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.008,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 68.733,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 65.7541,
"eval_rouge1_for_task000_SQuAD_dev": 70.1352,
"eval_rouge1_for_task000_SearchQA_dev": 6.3956,
"eval_rougeL": 51.4856,
"eval_rougeL_for_squad-like": 51.4856,
"eval_rougeL_for_task000_NewsQA_dev": 40.8766,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 68.2039,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 68.8974,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 68.6842,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 65.6558,
"eval_rougeL_for_task000_SQuAD_dev": 70.0603,
"eval_rougeL_for_task000_SearchQA_dev": 6.3754,
"eval_runtime": 5547.761,
"eval_samples_per_second": 12.508,
"eval_steps_per_second": 6.254,
"step": 1000
},
{
"epoch": 0.2,
"learning_rate": 5e-05,
"loss": 1.0877,
"step": 2000
},
{
"epoch": 0.2,
"eval_exact_match": 42.2972,
"eval_exact_match_for_squad-like": 42.2972,
"eval_exact_match_for_task000_NewsQA_dev": 28.8936,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 55.0531,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 56.4374,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 58.6687,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 54.8506,
"eval_exact_match_for_task000_SQuAD_dev": 61.3876,
"eval_exact_match_for_task000_SearchQA_dev": 2.821,
"eval_f1": 54.8049,
"eval_f1_for_squad-like": 54.8049,
"eval_f1_for_task000_NewsQA_dev": 46.0401,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 71.6103,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.681,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 72.9829,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 69.6718,
"eval_f1_for_task000_SQuAD_dev": 74.8343,
"eval_f1_for_task000_SearchQA_dev": 7.0869,
"eval_gen_len": 5.9867,
"eval_global_step": 2000,
"eval_loss": 1.5502516031265259,
"eval_rouge1": 54.5481,
"eval_rouge1_for_squad-like": 54.5481,
"eval_rouge1_for_task000_NewsQA_dev": 46.2121,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 71.3277,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.3257,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 72.7441,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 68.4653,
"eval_rouge1_for_task000_SQuAD_dev": 74.5387,
"eval_rouge1_for_task000_SearchQA_dev": 7.3461,
"eval_rougeL": 54.4937,
"eval_rougeL_for_squad-like": 54.4937,
"eval_rougeL_for_task000_NewsQA_dev": 46.1235,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 71.2521,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 72.2345,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 72.7112,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 68.3891,
"eval_rougeL_for_task000_SQuAD_dev": 74.4799,
"eval_rougeL_for_task000_SearchQA_dev": 7.3323,
"eval_runtime": 5244.5628,
"eval_samples_per_second": 13.231,
"eval_steps_per_second": 6.615,
"step": 2000
},
{
"epoch": 0.3,
"learning_rate": 5e-05,
"loss": 1.0892,
"step": 3000
},
{
"epoch": 0.3,
"eval_exact_match": 42.9471,
"eval_exact_match_for_squad-like": 42.9471,
"eval_exact_match_for_task000_NewsQA_dev": 28.4188,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.3784,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 58.2514,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.7417,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 55.177,
"eval_exact_match_for_task000_SQuAD_dev": 62.387,
"eval_exact_match_for_task000_SearchQA_dev": 2.5324,
"eval_f1": 56.2906,
"eval_f1_for_squad-like": 56.2906,
"eval_f1_for_task000_NewsQA_dev": 46.7128,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 73.6538,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.7309,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.2649,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.4267,
"eval_f1_for_task000_SQuAD_dev": 76.8954,
"eval_f1_for_task000_SearchQA_dev": 6.7347,
"eval_gen_len": 6.1098,
"eval_global_step": 3000,
"eval_loss": 1.4990659952163696,
"eval_rouge1": 56.0886,
"eval_rouge1_for_squad-like": 56.0886,
"eval_rouge1_for_task000_NewsQA_dev": 46.9106,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.4005,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5803,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.0322,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.2838,
"eval_rouge1_for_task000_SQuAD_dev": 76.6652,
"eval_rouge1_for_task000_SearchQA_dev": 7.018,
"eval_rougeL": 56.0392,
"eval_rougeL_for_squad-like": 56.0392,
"eval_rougeL_for_task000_NewsQA_dev": 46.8585,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.3224,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5113,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.0019,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.2237,
"eval_rougeL_for_task000_SQuAD_dev": 76.5983,
"eval_rougeL_for_task000_SearchQA_dev": 7.0011,
"eval_runtime": 5239.5633,
"eval_samples_per_second": 13.243,
"eval_steps_per_second": 6.622,
"step": 3000
},
{
"epoch": 0.4,
"learning_rate": 5e-05,
"loss": 0.9586,
"step": 4000
},
{
"epoch": 0.4,
"eval_exact_match": 43.2815,
"eval_exact_match_for_squad-like": 43.2815,
"eval_exact_match_for_task000_NewsQA_dev": 29.416,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.6313,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 59.0199,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.8013,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 56.2583,
"eval_exact_match_for_task000_SQuAD_dev": 62.406,
"eval_exact_match_for_task000_SearchQA_dev": 2.4735,
"eval_f1": 56.5384,
"eval_f1_for_squad-like": 56.5384,
"eval_f1_for_task000_NewsQA_dev": 47.1146,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 74.0447,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.4587,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.331,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.9027,
"eval_f1_for_task000_SQuAD_dev": 77.2248,
"eval_f1_for_task000_SearchQA_dev": 6.562,
"eval_gen_len": 6.5536,
"eval_global_step": 4000,
"eval_loss": 1.4763996601104736,
"eval_rouge1": 56.4181,
"eval_rouge1_for_squad-like": 56.4181,
"eval_rouge1_for_task000_NewsQA_dev": 47.2027,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.8315,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.3451,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.187,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.9723,
"eval_rouge1_for_task000_SQuAD_dev": 77.2012,
"eval_rouge1_for_task000_SearchQA_dev": 6.8629,
"eval_rougeL": 56.3585,
"eval_rougeL_for_squad-like": 56.3585,
"eval_rougeL_for_task000_NewsQA_dev": 47.124,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.7295,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.2619,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.156,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.9096,
"eval_rougeL_for_task000_SQuAD_dev": 77.129,
"eval_rougeL_for_task000_SearchQA_dev": 6.8364,
"eval_runtime": 5411.8899,
"eval_samples_per_second": 12.822,
"eval_steps_per_second": 6.411,
"step": 4000
},
{
"epoch": 0.5,
"learning_rate": 5e-05,
"loss": 0.8703,
"step": 5000
},
{
"epoch": 0.5,
"eval_exact_match": 44.8091,
"eval_exact_match_for_squad-like": 44.8091,
"eval_exact_match_for_task000_NewsQA_dev": 29.1548,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.8569,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.7458,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.6294,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.7881,
"eval_exact_match_for_task000_SQuAD_dev": 65.661,
"eval_exact_match_for_task000_SearchQA_dev": 2.1201,
"eval_f1": 57.5067,
"eval_f1_for_squad-like": 57.5067,
"eval_f1_for_task000_NewsQA_dev": 47.528,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.5801,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.4224,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.6725,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.7737,
"eval_f1_for_task000_SQuAD_dev": 79.5359,
"eval_f1_for_task000_SearchQA_dev": 5.7665,
"eval_gen_len": 6.5617,
"eval_global_step": 5000,
"eval_loss": 1.4937905073165894,
"eval_rouge1": 57.5127,
"eval_rouge1_for_squad-like": 57.5127,
"eval_rouge1_for_task000_NewsQA_dev": 47.7949,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.4332,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5705,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.5839,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 73.2737,
"eval_rouge1_for_task000_SQuAD_dev": 79.6618,
"eval_rouge1_for_task000_SearchQA_dev": 6.0046,
"eval_rougeL": 57.4686,
"eval_rougeL_for_squad-like": 57.4686,
"eval_rougeL_for_task000_NewsQA_dev": 47.6893,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.3688,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5261,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.5531,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.2278,
"eval_rougeL_for_task000_SQuAD_dev": 79.6122,
"eval_rougeL_for_task000_SearchQA_dev": 5.9839,
"eval_runtime": 5610.9818,
"eval_samples_per_second": 12.367,
"eval_steps_per_second": 6.183,
"step": 5000
},
{
"epoch": 0.6,
"learning_rate": 5e-05,
"loss": 0.9186,
"step": 6000
},
{
"epoch": 0.6,
"eval_exact_match": 45.2284,
"eval_exact_match_for_squad-like": 45.2284,
"eval_exact_match_for_task000_NewsQA_dev": 31.529,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.9277,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.4308,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.5897,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.8595,
"eval_exact_match_for_task000_SQuAD_dev": 66.0893,
"eval_exact_match_for_task000_SearchQA_dev": 3.0683,
"eval_f1": 57.6298,
"eval_f1_for_squad-like": 57.6298,
"eval_f1_for_task000_NewsQA_dev": 49.1496,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.4059,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.631,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.5732,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.1298,
"eval_f1_for_task000_SQuAD_dev": 79.143,
"eval_f1_for_task000_SearchQA_dev": 7.0128,
"eval_gen_len": 5.6208,
"eval_global_step": 6000,
"eval_loss": 1.4303617477416992,
"eval_rouge1": 57.5885,
"eval_rouge1_for_squad-like": 57.5885,
"eval_rouge1_for_task000_NewsQA_dev": 49.3293,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.2476,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6477,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.397,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 72.5855,
"eval_rouge1_for_task000_SQuAD_dev": 79.167,
"eval_rouge1_for_task000_SearchQA_dev": 7.2874,
"eval_rougeL": 57.5543,
"eval_rougeL_for_squad-like": 57.5543,
"eval_rougeL_for_task000_NewsQA_dev": 49.2858,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.2047,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6027,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.369,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 72.54,
"eval_rougeL_for_task000_SQuAD_dev": 79.1235,
"eval_rougeL_for_task000_SearchQA_dev": 7.2743,
"eval_runtime": 4976.2912,
"eval_samples_per_second": 13.944,
"eval_steps_per_second": 6.972,
"step": 6000
},
{
"epoch": 0.7,
"learning_rate": 5e-05,
"loss": 0.8626,
"step": 7000
},
{
"epoch": 0.7,
"eval_exact_match": 46.145,
"eval_exact_match_for_squad-like": 46.145,
"eval_exact_match_for_task000_NewsQA_dev": 30.1282,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.6055,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.5676,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.5469,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.2775,
"eval_exact_match_for_task000_SQuAD_dev": 67.7453,
"eval_exact_match_for_task000_SearchQA_dev": 2.2968,
"eval_f1": 58.3057,
"eval_f1_for_squad-like": 58.3057,
"eval_f1_for_task000_NewsQA_dev": 48.4752,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.1677,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2423,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.2045,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.6885,
"eval_f1_for_task000_SQuAD_dev": 80.3613,
"eval_f1_for_task000_SearchQA_dev": 5.657,
"eval_gen_len": 5.8196,
"eval_global_step": 7000,
"eval_loss": 1.3808618783950806,
"eval_rouge1": 58.2675,
"eval_rouge1_for_squad-like": 58.2675,
"eval_rouge1_for_task000_NewsQA_dev": 48.7542,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.9378,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.3134,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.1724,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.0395,
"eval_rouge1_for_task000_SQuAD_dev": 80.3759,
"eval_rouge1_for_task000_SearchQA_dev": 5.9169,
"eval_rougeL": 58.2236,
"eval_rougeL_for_squad-like": 58.2236,
"eval_rougeL_for_task000_NewsQA_dev": 48.6986,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.8622,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2595,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.1477,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.978,
"eval_rougeL_for_task000_SQuAD_dev": 80.3316,
"eval_rougeL_for_task000_SearchQA_dev": 5.8983,
"eval_runtime": 5035.1499,
"eval_samples_per_second": 13.781,
"eval_steps_per_second": 6.891,
"step": 7000
},
{
"epoch": 0.8,
"learning_rate": 5e-05,
"loss": 0.8322,
"step": 8000
},
{
"epoch": 0.8,
"eval_exact_match": 46.3338,
"eval_exact_match_for_squad-like": 46.3338,
"eval_exact_match_for_task000_NewsQA_dev": 31.5527,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6576,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.4669,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.8351,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5325,
"eval_exact_match_for_task000_SQuAD_dev": 67.0505,
"eval_exact_match_for_task000_SearchQA_dev": 2.2615,
"eval_f1": 58.5957,
"eval_f1_for_squad-like": 58.5957,
"eval_f1_for_task000_NewsQA_dev": 49.4319,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.9441,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.665,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.3856,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.1449,
"eval_f1_for_task000_SQuAD_dev": 80.3231,
"eval_f1_for_task000_SearchQA_dev": 5.608,
"eval_gen_len": 5.9989,
"eval_global_step": 8000,
"eval_loss": 1.4950507879257202,
"eval_rouge1": 58.6055,
"eval_rouge1_for_squad-like": 58.6055,
"eval_rouge1_for_task000_NewsQA_dev": 49.7455,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.7235,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.4596,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.565,
"eval_rouge1_for_task000_SQuAD_dev": 80.4772,
"eval_rouge1_for_task000_SearchQA_dev": 5.8525,
"eval_rougeL": 58.5662,
"eval_rougeL_for_squad-like": 58.5662,
"eval_rougeL_for_task000_NewsQA_dev": 49.6823,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.6541,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7031,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.4393,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.5146,
"eval_rougeL_for_task000_SQuAD_dev": 80.4338,
"eval_rougeL_for_task000_SearchQA_dev": 5.84,
"eval_runtime": 5037.486,
"eval_samples_per_second": 13.775,
"eval_steps_per_second": 6.887,
"step": 8000
},
{
"epoch": 0.9,
"learning_rate": 5e-05,
"loss": 0.8616,
"step": 9000
},
{
"epoch": 0.9,
"eval_exact_match": 46.805,
"eval_exact_match_for_squad-like": 46.805,
"eval_exact_match_for_task000_NewsQA_dev": 31.6002,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9611,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.0086,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.686,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.0935,
"eval_exact_match_for_task000_SQuAD_dev": 68.4686,
"eval_exact_match_for_task000_SearchQA_dev": 2.6325,
"eval_f1": 58.9037,
"eval_f1_for_squad-like": 58.9037,
"eval_f1_for_task000_NewsQA_dev": 49.0476,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.1558,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.4691,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.349,
"eval_f1_for_task000_SQuAD_dev": 81.0326,
"eval_f1_for_task000_SearchQA_dev": 6.1907,
"eval_gen_len": 6.0577,
"eval_global_step": 9000,
"eval_loss": 1.3887202739715576,
"eval_rouge1": 59.0007,
"eval_rouge1_for_squad-like": 59.0007,
"eval_rouge1_for_task000_NewsQA_dev": 49.3458,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.0702,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9826,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.6904,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.9073,
"eval_rouge1_for_task000_SQuAD_dev": 81.2597,
"eval_rouge1_for_task000_SearchQA_dev": 6.4396,
"eval_rougeL": 58.9619,
"eval_rougeL_for_squad-like": 58.9619,
"eval_rougeL_for_task000_NewsQA_dev": 49.2793,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.0101,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9214,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.6681,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.8591,
"eval_rougeL_for_task000_SQuAD_dev": 81.2181,
"eval_rougeL_for_task000_SearchQA_dev": 6.4279,
"eval_runtime": 5346.6704,
"eval_samples_per_second": 12.978,
"eval_steps_per_second": 6.489,
"step": 9000
},
{
"epoch": 1.0,
"learning_rate": 5e-05,
"loss": 0.76,
"step": 10000
},
{
"epoch": 1.0,
"eval_exact_match": 47.0558,
"eval_exact_match_for_squad-like": 47.0558,
"eval_exact_match_for_task000_NewsQA_dev": 32.075,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.7688,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.6762,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.0139,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3894,
"eval_exact_match_for_task000_SQuAD_dev": 68.7066,
"eval_exact_match_for_task000_SearchQA_dev": 2.8269,
"eval_f1": 59.6922,
"eval_f1_for_squad-like": 59.6922,
"eval_f1_for_task000_NewsQA_dev": 50.5612,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7643,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.537,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.1097,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.3824,
"eval_f1_for_task000_SQuAD_dev": 81.8903,
"eval_f1_for_task000_SearchQA_dev": 6.8104,
"eval_gen_len": 6.2411,
"eval_global_step": 10000,
"eval_loss": 1.4306766986846924,
"eval_rouge1": 59.8786,
"eval_rouge1_for_squad-like": 59.8786,
"eval_rouge1_for_task000_NewsQA_dev": 51.1274,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7056,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8227,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.5036,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.0271,
"eval_rouge1_for_task000_SQuAD_dev": 82.2167,
"eval_rouge1_for_task000_SearchQA_dev": 7.1018,
"eval_rougeL": 59.8423,
"eval_rougeL_for_squad-like": 59.8423,
"eval_rougeL_for_task000_NewsQA_dev": 51.0694,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.66,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7668,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4863,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9724,
"eval_rougeL_for_task000_SQuAD_dev": 82.178,
"eval_rougeL_for_task000_SearchQA_dev": 7.0864,
"eval_runtime": 5392.0755,
"eval_samples_per_second": 12.869,
"eval_steps_per_second": 6.434,
"step": 10000
},
{
"epoch": 1.1,
"learning_rate": 5e-05,
"loss": 0.6424,
"step": 11000
},
{
"epoch": 1.1,
"eval_exact_match": 47.7403,
"eval_exact_match_for_squad-like": 47.7403,
"eval_exact_match_for_task000_NewsQA_dev": 32.9772,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.1128,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.9912,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.7327,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.8892,
"eval_exact_match_for_task000_SQuAD_dev": 70.1342,
"eval_exact_match_for_task000_SearchQA_dev": 2.8622,
"eval_f1": 59.9977,
"eval_f1_for_squad-like": 59.9977,
"eval_f1_for_task000_NewsQA_dev": 51.0113,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6964,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.5474,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.0965,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.6229,
"eval_f1_for_task000_SQuAD_dev": 82.6467,
"eval_f1_for_task000_SearchQA_dev": 6.79,
"eval_gen_len": 6.0952,
"eval_global_step": 11000,
"eval_loss": 1.458894968032837,
"eval_rouge1": 60.1081,
"eval_rouge1_for_squad-like": 60.1081,
"eval_rouge1_for_task000_NewsQA_dev": 51.3906,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.5815,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8219,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.3102,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.1436,
"eval_rouge1_for_task000_SQuAD_dev": 82.8847,
"eval_rouge1_for_task000_SearchQA_dev": 7.0882,
"eval_rougeL": 60.0649,
"eval_rougeL_for_squad-like": 60.0649,
"eval_rougeL_for_task000_NewsQA_dev": 51.3201,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5161,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7665,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.2852,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.0825,
"eval_rougeL_for_task000_SQuAD_dev": 82.8429,
"eval_rougeL_for_task000_SearchQA_dev": 7.0691,
"eval_runtime": 5216.6936,
"eval_samples_per_second": 13.302,
"eval_steps_per_second": 6.651,
"step": 11000
},
{
"epoch": 1.2,
"learning_rate": 5e-05,
"loss": 0.6121,
"step": 12000
},
{
"epoch": 1.2,
"eval_exact_match": 47.0053,
"eval_exact_match_for_squad-like": 47.0053,
"eval_exact_match_for_task000_NewsQA_dev": 32.2175,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.3541,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.9204,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.2921,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.1547,
"eval_exact_match_for_task000_SQuAD_dev": 69.3633,
"eval_exact_match_for_task000_SearchQA_dev": 2.7444,
"eval_f1": 58.8892,
"eval_f1_for_squad-like": 58.8892,
"eval_f1_for_task000_NewsQA_dev": 50.0588,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.4924,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.1391,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.5755,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.0157,
"eval_f1_for_task000_SQuAD_dev": 81.7388,
"eval_f1_for_task000_SearchQA_dev": 6.2467,
"eval_gen_len": 5.4838,
"eval_global_step": 12000,
"eval_loss": 1.5454285144805908,
"eval_rouge1": 59.1209,
"eval_rouge1_for_squad-like": 59.1209,
"eval_rouge1_for_task000_NewsQA_dev": 50.5568,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.4035,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.652,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.959,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.82,
"eval_rouge1_for_task000_SQuAD_dev": 82.0668,
"eval_rouge1_for_task000_SearchQA_dev": 6.5645,
"eval_rougeL": 59.0936,
"eval_rougeL_for_squad-like": 59.0936,
"eval_rougeL_for_task000_NewsQA_dev": 50.5017,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.3605,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.6138,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.939,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7889,
"eval_rougeL_for_task000_SQuAD_dev": 82.0374,
"eval_rougeL_for_task000_SearchQA_dev": 6.5579,
"eval_runtime": 4739.7954,
"eval_samples_per_second": 14.64,
"eval_steps_per_second": 7.32,
"step": 12000
},
{
"epoch": 1.3,
"learning_rate": 5e-05,
"loss": 0.6267,
"step": 13000
},
{
"epoch": 1.3,
"eval_exact_match": 46.8425,
"eval_exact_match_for_squad-like": 46.8425,
"eval_exact_match_for_task000_NewsQA_dev": 31.2441,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.8381,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8652,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7491,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3081,
"eval_exact_match_for_task000_SQuAD_dev": 69.0968,
"eval_exact_match_for_task000_SearchQA_dev": 2.5618,
"eval_f1": 59.0311,
"eval_f1_for_squad-like": 59.0311,
"eval_f1_for_task000_NewsQA_dev": 49.2573,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.2242,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.2601,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.0037,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.8635,
"eval_f1_for_task000_SQuAD_dev": 81.6974,
"eval_f1_for_task000_SearchQA_dev": 6.5171,
"eval_gen_len": 6.1312,
"eval_global_step": 13000,
"eval_loss": 1.3520243167877197,
"eval_rouge1": 59.3382,
"eval_rouge1_for_squad-like": 59.3382,
"eval_rouge1_for_task000_NewsQA_dev": 49.8146,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.3477,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7192,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.3855,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.8334,
"eval_rouge1_for_task000_SQuAD_dev": 82.1319,
"eval_rouge1_for_task000_SearchQA_dev": 6.8695,
"eval_rougeL": 59.2915,
"eval_rougeL_for_squad-like": 59.2915,
"eval_rougeL_for_task000_NewsQA_dev": 49.7207,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.2786,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.6445,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.3596,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7793,
"eval_rougeL_for_task000_SQuAD_dev": 82.1035,
"eval_rougeL_for_task000_SearchQA_dev": 6.8414,
"eval_runtime": 5365.8691,
"eval_samples_per_second": 12.932,
"eval_steps_per_second": 6.466,
"step": 13000
},
{
"epoch": 1.4,
"learning_rate": 5e-05,
"loss": 0.6173,
"step": 14000
},
{
"epoch": 1.4,
"eval_exact_match": 47.0529,
"eval_exact_match_for_squad-like": 47.0529,
"eval_exact_match_for_task000_NewsQA_dev": 32.265,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0506,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4069,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7988,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3897,
"eval_exact_match_for_task000_SQuAD_dev": 68.5828,
"eval_exact_match_for_task000_SearchQA_dev": 3.033,
"eval_f1": 59.4463,
"eval_f1_for_squad-like": 59.4463,
"eval_f1_for_task000_NewsQA_dev": 49.3437,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.8803,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.055,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.972,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.2939,
"eval_f1_for_task000_SQuAD_dev": 81.7056,
"eval_f1_for_task000_SearchQA_dev": 7.2043,
"eval_gen_len": 6.2021,
"eval_global_step": 14000,
"eval_loss": 1.421161413192749,
"eval_rouge1": 59.7198,
"eval_rouge1_for_squad-like": 59.7198,
"eval_rouge1_for_task000_NewsQA_dev": 49.8919,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.9311,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4792,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.4279,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.1727,
"eval_rouge1_for_task000_SQuAD_dev": 82.0675,
"eval_rouge1_for_task000_SearchQA_dev": 7.5337,
"eval_rougeL": 59.6836,
"eval_rougeL_for_squad-like": 59.6836,
"eval_rougeL_for_task000_NewsQA_dev": 49.8304,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.8717,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4312,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4022,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.1244,
"eval_rougeL_for_task000_SQuAD_dev": 82.035,
"eval_rougeL_for_task000_SearchQA_dev": 7.5215,
"eval_runtime": 5210.8523,
"eval_samples_per_second": 13.316,
"eval_steps_per_second": 6.658,
"step": 14000
},
{
"epoch": 1.5,
"learning_rate": 5e-05,
"loss": 0.6122,
"step": 15000
},
{
"epoch": 1.5,
"eval_exact_match": 47.7187,
"eval_exact_match_for_squad-like": 47.7187,
"eval_exact_match_for_task000_NewsQA_dev": 31.9801,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6677,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.7722,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.1003,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.4302,
"eval_exact_match_for_task000_SQuAD_dev": 70.4197,
"eval_exact_match_for_task000_SearchQA_dev": 2.7856,
"eval_f1": 60.0424,
"eval_f1_for_squad-like": 60.0424,
"eval_f1_for_task000_NewsQA_dev": 49.8937,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6636,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1636,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.9381,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.2199,
"eval_f1_for_task000_SQuAD_dev": 82.7855,
"eval_f1_for_task000_SearchQA_dev": 7.2213,
"eval_gen_len": 6.7893,
"eval_global_step": 15000,
"eval_loss": 1.4648000001907349,
"eval_rouge1": 60.3199,
"eval_rouge1_for_squad-like": 60.3199,
"eval_rouge1_for_task000_NewsQA_dev": 50.5586,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.6507,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.6021,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.4139,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.9828,
"eval_rouge1_for_task000_SQuAD_dev": 83.1733,
"eval_rouge1_for_task000_SearchQA_dev": 7.608,
"eval_rougeL": 60.27,
"eval_rougeL_for_squad-like": 60.27,
"eval_rougeL_for_task000_NewsQA_dev": 50.4674,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5673,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5322,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.3954,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9265,
"eval_rougeL_for_task000_SQuAD_dev": 83.1281,
"eval_rougeL_for_task000_SearchQA_dev": 7.5794,
"eval_runtime": 5599.7297,
"eval_samples_per_second": 12.392,
"eval_steps_per_second": 6.196,
"step": 15000
},
{
"epoch": 1.6,
"learning_rate": 5e-05,
"loss": 0.5975,
"step": 16000
},
{
"epoch": 1.6,
"eval_exact_match": 47.7028,
"eval_exact_match_for_squad-like": 47.7028,
"eval_exact_match_for_task000_NewsQA_dev": 32.906,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6373,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4825,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.0373,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3996,
"eval_exact_match_for_task000_SQuAD_dev": 70.1056,
"eval_exact_match_for_task000_SearchQA_dev": 3.4865,
"eval_f1": 60.0719,
"eval_f1_for_squad-like": 60.0719,
"eval_f1_for_task000_NewsQA_dev": 50.6484,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.4577,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7913,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.553,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.9467,
"eval_f1_for_task000_SQuAD_dev": 82.5792,
"eval_f1_for_task000_SearchQA_dev": 7.9625,
"eval_gen_len": 6.5056,
"eval_global_step": 16000,
"eval_loss": 1.4460376501083374,
"eval_rouge1": 60.3765,
"eval_rouge1_for_squad-like": 60.3765,
"eval_rouge1_for_task000_NewsQA_dev": 51.3099,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.4873,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.2315,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.091,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.8312,
"eval_rouge1_for_task000_SQuAD_dev": 83.0172,
"eval_rouge1_for_task000_SearchQA_dev": 8.2968,
"eval_rougeL": 60.3322,
"eval_rougeL_for_squad-like": 60.3322,
"eval_rougeL_for_task000_NewsQA_dev": 51.2196,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.4096,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1818,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0692,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.7759,
"eval_rougeL_for_task000_SQuAD_dev": 82.9814,
"eval_rougeL_for_task000_SearchQA_dev": 8.2737,
"eval_runtime": 5456.9286,
"eval_samples_per_second": 12.716,
"eval_steps_per_second": 6.358,
"step": 16000
},
{
"epoch": 1.7,
"learning_rate": 5e-05,
"loss": 0.5826,
"step": 17000
},
{
"epoch": 1.7,
"eval_exact_match": 47.6063,
"eval_exact_match_for_squad-like": 47.6063,
"eval_exact_match_for_task000_NewsQA_dev": 32.6923,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.2428,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.8478,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.6433,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3384,
"eval_exact_match_for_task000_SQuAD_dev": 69.9534,
"eval_exact_match_for_task000_SearchQA_dev": 2.9741,
"eval_f1": 59.786,
"eval_f1_for_squad-like": 59.786,
"eval_f1_for_task000_NewsQA_dev": 50.6166,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0853,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0205,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6158,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.6525,
"eval_f1_for_task000_SQuAD_dev": 82.5741,
"eval_f1_for_task000_SearchQA_dev": 7.0474,
"eval_gen_len": 6.2252,
"eval_global_step": 17000,
"eval_loss": 1.4596614837646484,
"eval_rouge1": 60.1349,
"eval_rouge1_for_squad-like": 60.1349,
"eval_rouge1_for_task000_NewsQA_dev": 51.2595,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2979,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5869,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.098,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.5838,
"eval_rouge1_for_task000_SQuAD_dev": 83.0602,
"eval_rouge1_for_task000_SearchQA_dev": 7.378,
"eval_rougeL": 60.0935,
"eval_rougeL_for_squad-like": 60.0935,
"eval_rougeL_for_task000_NewsQA_dev": 51.189,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.2209,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5258,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0791,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.5283,
"eval_rougeL_for_task000_SQuAD_dev": 83.0318,
"eval_rougeL_for_task000_SearchQA_dev": 7.3608,
"eval_runtime": 5176.1682,
"eval_samples_per_second": 13.406,
"eval_steps_per_second": 6.703,
"step": 17000
},
{
"epoch": 1.8,
"learning_rate": 5e-05,
"loss": 0.5955,
"step": 18000
},
{
"epoch": 1.8,
"eval_exact_match": 47.0024,
"eval_exact_match_for_squad-like": 47.0024,
"eval_exact_match_for_task000_NewsQA_dev": 32.3837,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0303,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8526,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7591,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5631,
"eval_exact_match_for_task000_SQuAD_dev": 69.3728,
"eval_exact_match_for_task000_SearchQA_dev": 2.5029,
"eval_f1": 59.6025,
"eval_f1_for_squad-like": 59.6025,
"eval_f1_for_task000_NewsQA_dev": 49.9466,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0998,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7246,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6329,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.662,
"eval_f1_for_task000_SQuAD_dev": 82.5021,
"eval_f1_for_task000_SearchQA_dev": 6.6223,
"eval_gen_len": 7.2863,
"eval_global_step": 18000,
"eval_loss": 1.5001791715621948,
"eval_rouge1": 59.8829,
"eval_rouge1_for_squad-like": 59.8829,
"eval_rouge1_for_task000_NewsQA_dev": 50.5762,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2014,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1701,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.1226,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.4758,
"eval_rouge1_for_task000_SQuAD_dev": 82.8425,
"eval_rouge1_for_task000_SearchQA_dev": 6.9512,
"eval_rougeL": 59.8296,
"eval_rougeL_for_squad-like": 59.8296,
"eval_rougeL_for_task000_NewsQA_dev": 50.4466,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.1325,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0927,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0963,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.4105,
"eval_rougeL_for_task000_SQuAD_dev": 82.7958,
"eval_rougeL_for_task000_SearchQA_dev": 6.924,
"eval_runtime": 5979.7657,
"eval_samples_per_second": 11.604,
"eval_steps_per_second": 5.802,
"step": 18000
},
{
"epoch": 1.9,
"learning_rate": 5e-05,
"loss": 0.6105,
"step": 19000
},
{
"epoch": 1.9,
"eval_exact_match": 48.3542,
"eval_exact_match_for_squad-like": 48.3542,
"eval_exact_match_for_task000_NewsQA_dev": 35.1377,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.4972,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.3895,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.4382,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.9198,
"eval_exact_match_for_task000_SQuAD_dev": 71.1335,
"eval_exact_match_for_task000_SearchQA_dev": 2.9034,
"eval_f1": 60.527,
"eval_f1_for_squad-like": 60.527,
"eval_f1_for_task000_NewsQA_dev": 53.3584,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 78.0211,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8373,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6907,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.7447,
"eval_f1_for_task000_SQuAD_dev": 83.3642,
"eval_f1_for_task000_SearchQA_dev": 6.712,
"eval_gen_len": 6.1527,
"eval_global_step": 19000,
"eval_loss": 1.4464625120162964,
"eval_rouge1": 60.7571,
"eval_rouge1_for_squad-like": 60.7571,
"eval_rouge1_for_task000_NewsQA_dev": 53.7911,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.9828,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 82.3105,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.1026,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.486,
"eval_rouge1_for_task000_SQuAD_dev": 83.7045,
"eval_rouge1_for_task000_SearchQA_dev": 7.0408,
"eval_rougeL": 60.7148,
"eval_rougeL_for_squad-like": 60.7148,
"eval_rougeL_for_task000_NewsQA_dev": 53.7027,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.9168,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 82.2503,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0755,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.4296,
"eval_rougeL_for_task000_SQuAD_dev": 83.673,
"eval_rougeL_for_task000_SearchQA_dev": 7.0247,
"eval_runtime": 5228.9365,
"eval_samples_per_second": 13.27,
"eval_steps_per_second": 6.635,
"step": 19000
},
{
"epoch": 2.0,
"learning_rate": 5e-05,
"loss": 0.6235,
"step": 20000
},
{
"epoch": 2.0,
"eval_exact_match": 48.1741,
"eval_exact_match_for_squad-like": 48.1741,
"eval_exact_match_for_task000_NewsQA_dev": 33.3571,
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9813,
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.6163,
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.5673,
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.7668,
"eval_exact_match_for_task000_SQuAD_dev": 70.8099,
"eval_exact_match_for_task000_SearchQA_dev": 3.0153,
"eval_f1": 60.3279,
"eval_f1_for_squad-like": 60.3279,
"eval_f1_for_task000_NewsQA_dev": 51.5268,
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7923,
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.3576,
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6284,
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.1448,
"eval_f1_for_task000_SQuAD_dev": 83.223,
"eval_f1_for_task000_SearchQA_dev": 7.181,
"eval_gen_len": 6.5938,
"eval_global_step": 20000,
"eval_loss": 1.4583032131195068,
"eval_rouge1": 60.5924,
"eval_rouge1_for_squad-like": 60.5924,
"eval_rouge1_for_task000_NewsQA_dev": 52.0147,
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7925,
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8888,
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.0775,
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.956,
"eval_rouge1_for_task000_SQuAD_dev": 83.5919,
"eval_rouge1_for_task000_SearchQA_dev": 7.507,
"eval_rougeL": 60.5491,
"eval_rougeL_for_squad-like": 60.5491,
"eval_rougeL_for_task000_NewsQA_dev": 51.9291,
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.7222,
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8211,
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0484,
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9099,
"eval_rougeL_for_task000_SQuAD_dev": 83.5543,
"eval_rougeL_for_task000_SearchQA_dev": 7.4907,
"eval_runtime": 5575.4762,
"eval_samples_per_second": 12.446,
"eval_steps_per_second": 6.223,
"step": 20000
}
],
"max_steps": 20000,
"num_train_epochs": 2,
"total_flos": 3.03606909632512e+16,
"trial_name": null,
"trial_params": null
}