|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5652, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_exact_match": 38.3413, |
|
"eval_exact_match_for_squad-like": 38.3413, |
|
"eval_exact_match_for_task000_NewsQA_dev": 23.8367, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 50.0455, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 52.0534, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 53.7009, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 50.0357, |
|
"eval_exact_match_for_task000_SQuAD_dev": 55.8009, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.0554, |
|
"eval_f1": 52.1049, |
|
"eval_f1_for_squad-like": 52.1049, |
|
"eval_f1_for_task000_NewsQA_dev": 40.9979, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 68.6298, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.9454, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 69.6682, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 66.9921, |
|
"eval_f1_for_task000_SQuAD_dev": 71.031, |
|
"eval_f1_for_task000_SearchQA_dev": 6.183, |
|
"eval_gen_len": 6.4379, |
|
"eval_global_step": 1000, |
|
"eval_loss": 1.6005868911743164, |
|
"eval_rouge1": 51.5552, |
|
"eval_rouge1_for_squad-like": 51.5552, |
|
"eval_rouge1_for_task000_NewsQA_dev": 40.9869, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 68.2955, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.008, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 68.733, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 65.7541, |
|
"eval_rouge1_for_task000_SQuAD_dev": 70.1352, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.3956, |
|
"eval_rougeL": 51.4856, |
|
"eval_rougeL_for_squad-like": 51.4856, |
|
"eval_rougeL_for_task000_NewsQA_dev": 40.8766, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 68.2039, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 68.8974, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 68.6842, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 65.6558, |
|
"eval_rougeL_for_task000_SQuAD_dev": 70.0603, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.3754, |
|
"eval_runtime": 5547.761, |
|
"eval_samples_per_second": 12.508, |
|
"eval_steps_per_second": 6.254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0877, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_exact_match": 42.2972, |
|
"eval_exact_match_for_squad-like": 42.2972, |
|
"eval_exact_match_for_task000_NewsQA_dev": 28.8936, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 55.0531, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 56.4374, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 58.6687, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 54.8506, |
|
"eval_exact_match_for_task000_SQuAD_dev": 61.3876, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.821, |
|
"eval_f1": 54.8049, |
|
"eval_f1_for_squad-like": 54.8049, |
|
"eval_f1_for_task000_NewsQA_dev": 46.0401, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 71.6103, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.681, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 72.9829, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 69.6718, |
|
"eval_f1_for_task000_SQuAD_dev": 74.8343, |
|
"eval_f1_for_task000_SearchQA_dev": 7.0869, |
|
"eval_gen_len": 5.9867, |
|
"eval_global_step": 2000, |
|
"eval_loss": 1.5502516031265259, |
|
"eval_rouge1": 54.5481, |
|
"eval_rouge1_for_squad-like": 54.5481, |
|
"eval_rouge1_for_task000_NewsQA_dev": 46.2121, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 71.3277, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.3257, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 72.7441, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 68.4653, |
|
"eval_rouge1_for_task000_SQuAD_dev": 74.5387, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.3461, |
|
"eval_rougeL": 54.4937, |
|
"eval_rougeL_for_squad-like": 54.4937, |
|
"eval_rougeL_for_task000_NewsQA_dev": 46.1235, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 71.2521, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 72.2345, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 72.7112, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 68.3891, |
|
"eval_rougeL_for_task000_SQuAD_dev": 74.4799, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.3323, |
|
"eval_runtime": 5244.5628, |
|
"eval_samples_per_second": 13.231, |
|
"eval_steps_per_second": 6.615, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0892, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_exact_match": 42.9471, |
|
"eval_exact_match_for_squad-like": 42.9471, |
|
"eval_exact_match_for_task000_NewsQA_dev": 28.4188, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.3784, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 58.2514, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.7417, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 55.177, |
|
"eval_exact_match_for_task000_SQuAD_dev": 62.387, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.5324, |
|
"eval_f1": 56.2906, |
|
"eval_f1_for_squad-like": 56.2906, |
|
"eval_f1_for_task000_NewsQA_dev": 46.7128, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 73.6538, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.7309, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.2649, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.4267, |
|
"eval_f1_for_task000_SQuAD_dev": 76.8954, |
|
"eval_f1_for_task000_SearchQA_dev": 6.7347, |
|
"eval_gen_len": 6.1098, |
|
"eval_global_step": 3000, |
|
"eval_loss": 1.4990659952163696, |
|
"eval_rouge1": 56.0886, |
|
"eval_rouge1_for_squad-like": 56.0886, |
|
"eval_rouge1_for_task000_NewsQA_dev": 46.9106, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.4005, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5803, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.0322, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.2838, |
|
"eval_rouge1_for_task000_SQuAD_dev": 76.6652, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.018, |
|
"eval_rougeL": 56.0392, |
|
"eval_rougeL_for_squad-like": 56.0392, |
|
"eval_rougeL_for_task000_NewsQA_dev": 46.8585, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.3224, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5113, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.0019, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.2237, |
|
"eval_rougeL_for_task000_SQuAD_dev": 76.5983, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.0011, |
|
"eval_runtime": 5239.5633, |
|
"eval_samples_per_second": 13.243, |
|
"eval_steps_per_second": 6.622, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9586, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_exact_match": 43.2815, |
|
"eval_exact_match_for_squad-like": 43.2815, |
|
"eval_exact_match_for_task000_NewsQA_dev": 29.416, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.6313, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 59.0199, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.8013, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 56.2583, |
|
"eval_exact_match_for_task000_SQuAD_dev": 62.406, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.4735, |
|
"eval_f1": 56.5384, |
|
"eval_f1_for_squad-like": 56.5384, |
|
"eval_f1_for_task000_NewsQA_dev": 47.1146, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 74.0447, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.4587, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.331, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.9027, |
|
"eval_f1_for_task000_SQuAD_dev": 77.2248, |
|
"eval_f1_for_task000_SearchQA_dev": 6.562, |
|
"eval_gen_len": 6.5536, |
|
"eval_global_step": 4000, |
|
"eval_loss": 1.4763996601104736, |
|
"eval_rouge1": 56.4181, |
|
"eval_rouge1_for_squad-like": 56.4181, |
|
"eval_rouge1_for_task000_NewsQA_dev": 47.2027, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.8315, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.3451, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.187, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.9723, |
|
"eval_rouge1_for_task000_SQuAD_dev": 77.2012, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.8629, |
|
"eval_rougeL": 56.3585, |
|
"eval_rougeL_for_squad-like": 56.3585, |
|
"eval_rougeL_for_task000_NewsQA_dev": 47.124, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.7295, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.2619, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.156, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.9096, |
|
"eval_rougeL_for_task000_SQuAD_dev": 77.129, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.8364, |
|
"eval_runtime": 5411.8899, |
|
"eval_samples_per_second": 12.822, |
|
"eval_steps_per_second": 6.411, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8703, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_exact_match": 44.8091, |
|
"eval_exact_match_for_squad-like": 44.8091, |
|
"eval_exact_match_for_task000_NewsQA_dev": 29.1548, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.8569, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.7458, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.6294, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.7881, |
|
"eval_exact_match_for_task000_SQuAD_dev": 65.661, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.1201, |
|
"eval_f1": 57.5067, |
|
"eval_f1_for_squad-like": 57.5067, |
|
"eval_f1_for_task000_NewsQA_dev": 47.528, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.5801, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.4224, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.6725, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.7737, |
|
"eval_f1_for_task000_SQuAD_dev": 79.5359, |
|
"eval_f1_for_task000_SearchQA_dev": 5.7665, |
|
"eval_gen_len": 6.5617, |
|
"eval_global_step": 5000, |
|
"eval_loss": 1.4937905073165894, |
|
"eval_rouge1": 57.5127, |
|
"eval_rouge1_for_squad-like": 57.5127, |
|
"eval_rouge1_for_task000_NewsQA_dev": 47.7949, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.4332, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5705, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.5839, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 73.2737, |
|
"eval_rouge1_for_task000_SQuAD_dev": 79.6618, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.0046, |
|
"eval_rougeL": 57.4686, |
|
"eval_rougeL_for_squad-like": 57.4686, |
|
"eval_rougeL_for_task000_NewsQA_dev": 47.6893, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.3688, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5261, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.5531, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.2278, |
|
"eval_rougeL_for_task000_SQuAD_dev": 79.6122, |
|
"eval_rougeL_for_task000_SearchQA_dev": 5.9839, |
|
"eval_runtime": 5610.9818, |
|
"eval_samples_per_second": 12.367, |
|
"eval_steps_per_second": 6.183, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9186, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 45.2284, |
|
"eval_exact_match_for_squad-like": 45.2284, |
|
"eval_exact_match_for_task000_NewsQA_dev": 31.529, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.9277, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.4308, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.5897, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.8595, |
|
"eval_exact_match_for_task000_SQuAD_dev": 66.0893, |
|
"eval_exact_match_for_task000_SearchQA_dev": 3.0683, |
|
"eval_f1": 57.6298, |
|
"eval_f1_for_squad-like": 57.6298, |
|
"eval_f1_for_task000_NewsQA_dev": 49.1496, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.4059, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.631, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.5732, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.1298, |
|
"eval_f1_for_task000_SQuAD_dev": 79.143, |
|
"eval_f1_for_task000_SearchQA_dev": 7.0128, |
|
"eval_gen_len": 5.6208, |
|
"eval_global_step": 6000, |
|
"eval_loss": 1.4303617477416992, |
|
"eval_rouge1": 57.5885, |
|
"eval_rouge1_for_squad-like": 57.5885, |
|
"eval_rouge1_for_task000_NewsQA_dev": 49.3293, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.2476, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6477, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.397, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 72.5855, |
|
"eval_rouge1_for_task000_SQuAD_dev": 79.167, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.2874, |
|
"eval_rougeL": 57.5543, |
|
"eval_rougeL_for_squad-like": 57.5543, |
|
"eval_rougeL_for_task000_NewsQA_dev": 49.2858, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.2047, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6027, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.369, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 72.54, |
|
"eval_rougeL_for_task000_SQuAD_dev": 79.1235, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.2743, |
|
"eval_runtime": 4976.2912, |
|
"eval_samples_per_second": 13.944, |
|
"eval_steps_per_second": 6.972, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8626, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_exact_match": 46.145, |
|
"eval_exact_match_for_squad-like": 46.145, |
|
"eval_exact_match_for_task000_NewsQA_dev": 30.1282, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.6055, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.5676, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.5469, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.2775, |
|
"eval_exact_match_for_task000_SQuAD_dev": 67.7453, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.2968, |
|
"eval_f1": 58.3057, |
|
"eval_f1_for_squad-like": 58.3057, |
|
"eval_f1_for_task000_NewsQA_dev": 48.4752, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.1677, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2423, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.2045, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.6885, |
|
"eval_f1_for_task000_SQuAD_dev": 80.3613, |
|
"eval_f1_for_task000_SearchQA_dev": 5.657, |
|
"eval_gen_len": 5.8196, |
|
"eval_global_step": 7000, |
|
"eval_loss": 1.3808618783950806, |
|
"eval_rouge1": 58.2675, |
|
"eval_rouge1_for_squad-like": 58.2675, |
|
"eval_rouge1_for_task000_NewsQA_dev": 48.7542, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.9378, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.3134, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.1724, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.0395, |
|
"eval_rouge1_for_task000_SQuAD_dev": 80.3759, |
|
"eval_rouge1_for_task000_SearchQA_dev": 5.9169, |
|
"eval_rougeL": 58.2236, |
|
"eval_rougeL_for_squad-like": 58.2236, |
|
"eval_rougeL_for_task000_NewsQA_dev": 48.6986, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.8622, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2595, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.1477, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.978, |
|
"eval_rougeL_for_task000_SQuAD_dev": 80.3316, |
|
"eval_rougeL_for_task000_SearchQA_dev": 5.8983, |
|
"eval_runtime": 5035.1499, |
|
"eval_samples_per_second": 13.781, |
|
"eval_steps_per_second": 6.891, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8322, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_exact_match": 46.3338, |
|
"eval_exact_match_for_squad-like": 46.3338, |
|
"eval_exact_match_for_task000_NewsQA_dev": 31.5527, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6576, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.4669, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.8351, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5325, |
|
"eval_exact_match_for_task000_SQuAD_dev": 67.0505, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.2615, |
|
"eval_f1": 58.5957, |
|
"eval_f1_for_squad-like": 58.5957, |
|
"eval_f1_for_task000_NewsQA_dev": 49.4319, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.9441, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.665, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.3856, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.1449, |
|
"eval_f1_for_task000_SQuAD_dev": 80.3231, |
|
"eval_f1_for_task000_SearchQA_dev": 5.608, |
|
"eval_gen_len": 5.9989, |
|
"eval_global_step": 8000, |
|
"eval_loss": 1.4950507879257202, |
|
"eval_rouge1": 58.6055, |
|
"eval_rouge1_for_squad-like": 58.6055, |
|
"eval_rouge1_for_task000_NewsQA_dev": 49.7455, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.7235, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.4596, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.565, |
|
"eval_rouge1_for_task000_SQuAD_dev": 80.4772, |
|
"eval_rouge1_for_task000_SearchQA_dev": 5.8525, |
|
"eval_rougeL": 58.5662, |
|
"eval_rougeL_for_squad-like": 58.5662, |
|
"eval_rougeL_for_task000_NewsQA_dev": 49.6823, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.6541, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7031, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.4393, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.5146, |
|
"eval_rougeL_for_task000_SQuAD_dev": 80.4338, |
|
"eval_rougeL_for_task000_SearchQA_dev": 5.84, |
|
"eval_runtime": 5037.486, |
|
"eval_samples_per_second": 13.775, |
|
"eval_steps_per_second": 6.887, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8616, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_exact_match": 46.805, |
|
"eval_exact_match_for_squad-like": 46.805, |
|
"eval_exact_match_for_task000_NewsQA_dev": 31.6002, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9611, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.0086, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.686, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.0935, |
|
"eval_exact_match_for_task000_SQuAD_dev": 68.4686, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.6325, |
|
"eval_f1": 58.9037, |
|
"eval_f1_for_squad-like": 58.9037, |
|
"eval_f1_for_task000_NewsQA_dev": 49.0476, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.1558, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.4691, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.349, |
|
"eval_f1_for_task000_SQuAD_dev": 81.0326, |
|
"eval_f1_for_task000_SearchQA_dev": 6.1907, |
|
"eval_gen_len": 6.0577, |
|
"eval_global_step": 9000, |
|
"eval_loss": 1.3887202739715576, |
|
"eval_rouge1": 59.0007, |
|
"eval_rouge1_for_squad-like": 59.0007, |
|
"eval_rouge1_for_task000_NewsQA_dev": 49.3458, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.0702, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9826, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.6904, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.9073, |
|
"eval_rouge1_for_task000_SQuAD_dev": 81.2597, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.4396, |
|
"eval_rougeL": 58.9619, |
|
"eval_rougeL_for_squad-like": 58.9619, |
|
"eval_rougeL_for_task000_NewsQA_dev": 49.2793, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.0101, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9214, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.6681, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.8591, |
|
"eval_rougeL_for_task000_SQuAD_dev": 81.2181, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.4279, |
|
"eval_runtime": 5346.6704, |
|
"eval_samples_per_second": 12.978, |
|
"eval_steps_per_second": 6.489, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.76, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 47.0558, |
|
"eval_exact_match_for_squad-like": 47.0558, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.075, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.7688, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.6762, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.0139, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3894, |
|
"eval_exact_match_for_task000_SQuAD_dev": 68.7066, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.8269, |
|
"eval_f1": 59.6922, |
|
"eval_f1_for_squad-like": 59.6922, |
|
"eval_f1_for_task000_NewsQA_dev": 50.5612, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7643, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.537, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.1097, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.3824, |
|
"eval_f1_for_task000_SQuAD_dev": 81.8903, |
|
"eval_f1_for_task000_SearchQA_dev": 6.8104, |
|
"eval_gen_len": 6.2411, |
|
"eval_global_step": 10000, |
|
"eval_loss": 1.4306766986846924, |
|
"eval_rouge1": 59.8786, |
|
"eval_rouge1_for_squad-like": 59.8786, |
|
"eval_rouge1_for_task000_NewsQA_dev": 51.1274, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7056, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8227, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.5036, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.0271, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.2167, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.1018, |
|
"eval_rougeL": 59.8423, |
|
"eval_rougeL_for_squad-like": 59.8423, |
|
"eval_rougeL_for_task000_NewsQA_dev": 51.0694, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.66, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7668, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4863, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9724, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.178, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.0864, |
|
"eval_runtime": 5392.0755, |
|
"eval_samples_per_second": 12.869, |
|
"eval_steps_per_second": 6.434, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6424, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_exact_match": 47.7403, |
|
"eval_exact_match_for_squad-like": 47.7403, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.9772, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.1128, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.9912, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.7327, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.8892, |
|
"eval_exact_match_for_task000_SQuAD_dev": 70.1342, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.8622, |
|
"eval_f1": 59.9977, |
|
"eval_f1_for_squad-like": 59.9977, |
|
"eval_f1_for_task000_NewsQA_dev": 51.0113, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6964, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.5474, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.0965, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.6229, |
|
"eval_f1_for_task000_SQuAD_dev": 82.6467, |
|
"eval_f1_for_task000_SearchQA_dev": 6.79, |
|
"eval_gen_len": 6.0952, |
|
"eval_global_step": 11000, |
|
"eval_loss": 1.458894968032837, |
|
"eval_rouge1": 60.1081, |
|
"eval_rouge1_for_squad-like": 60.1081, |
|
"eval_rouge1_for_task000_NewsQA_dev": 51.3906, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.5815, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8219, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.3102, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.1436, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.8847, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.0882, |
|
"eval_rougeL": 60.0649, |
|
"eval_rougeL_for_squad-like": 60.0649, |
|
"eval_rougeL_for_task000_NewsQA_dev": 51.3201, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5161, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7665, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.2852, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.0825, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.8429, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.0691, |
|
"eval_runtime": 5216.6936, |
|
"eval_samples_per_second": 13.302, |
|
"eval_steps_per_second": 6.651, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6121, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 47.0053, |
|
"eval_exact_match_for_squad-like": 47.0053, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.2175, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.3541, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.9204, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.2921, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.1547, |
|
"eval_exact_match_for_task000_SQuAD_dev": 69.3633, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.7444, |
|
"eval_f1": 58.8892, |
|
"eval_f1_for_squad-like": 58.8892, |
|
"eval_f1_for_task000_NewsQA_dev": 50.0588, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.4924, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.1391, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.5755, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.0157, |
|
"eval_f1_for_task000_SQuAD_dev": 81.7388, |
|
"eval_f1_for_task000_SearchQA_dev": 6.2467, |
|
"eval_gen_len": 5.4838, |
|
"eval_global_step": 12000, |
|
"eval_loss": 1.5454285144805908, |
|
"eval_rouge1": 59.1209, |
|
"eval_rouge1_for_squad-like": 59.1209, |
|
"eval_rouge1_for_task000_NewsQA_dev": 50.5568, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.4035, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.652, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.959, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.82, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.0668, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.5645, |
|
"eval_rougeL": 59.0936, |
|
"eval_rougeL_for_squad-like": 59.0936, |
|
"eval_rougeL_for_task000_NewsQA_dev": 50.5017, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.3605, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.6138, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.939, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7889, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.0374, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.5579, |
|
"eval_runtime": 4739.7954, |
|
"eval_samples_per_second": 14.64, |
|
"eval_steps_per_second": 7.32, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6267, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_exact_match": 46.8425, |
|
"eval_exact_match_for_squad-like": 46.8425, |
|
"eval_exact_match_for_task000_NewsQA_dev": 31.2441, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.8381, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8652, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7491, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3081, |
|
"eval_exact_match_for_task000_SQuAD_dev": 69.0968, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.5618, |
|
"eval_f1": 59.0311, |
|
"eval_f1_for_squad-like": 59.0311, |
|
"eval_f1_for_task000_NewsQA_dev": 49.2573, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.2242, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.2601, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.0037, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.8635, |
|
"eval_f1_for_task000_SQuAD_dev": 81.6974, |
|
"eval_f1_for_task000_SearchQA_dev": 6.5171, |
|
"eval_gen_len": 6.1312, |
|
"eval_global_step": 13000, |
|
"eval_loss": 1.3520243167877197, |
|
"eval_rouge1": 59.3382, |
|
"eval_rouge1_for_squad-like": 59.3382, |
|
"eval_rouge1_for_task000_NewsQA_dev": 49.8146, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.3477, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7192, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.3855, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.8334, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.1319, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.8695, |
|
"eval_rougeL": 59.2915, |
|
"eval_rougeL_for_squad-like": 59.2915, |
|
"eval_rougeL_for_task000_NewsQA_dev": 49.7207, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.2786, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.6445, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.3596, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7793, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.1035, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.8414, |
|
"eval_runtime": 5365.8691, |
|
"eval_samples_per_second": 12.932, |
|
"eval_steps_per_second": 6.466, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6173, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 47.0529, |
|
"eval_exact_match_for_squad-like": 47.0529, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.265, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0506, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4069, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7988, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3897, |
|
"eval_exact_match_for_task000_SQuAD_dev": 68.5828, |
|
"eval_exact_match_for_task000_SearchQA_dev": 3.033, |
|
"eval_f1": 59.4463, |
|
"eval_f1_for_squad-like": 59.4463, |
|
"eval_f1_for_task000_NewsQA_dev": 49.3437, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.8803, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.055, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.972, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.2939, |
|
"eval_f1_for_task000_SQuAD_dev": 81.7056, |
|
"eval_f1_for_task000_SearchQA_dev": 7.2043, |
|
"eval_gen_len": 6.2021, |
|
"eval_global_step": 14000, |
|
"eval_loss": 1.421161413192749, |
|
"eval_rouge1": 59.7198, |
|
"eval_rouge1_for_squad-like": 59.7198, |
|
"eval_rouge1_for_task000_NewsQA_dev": 49.8919, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.9311, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4792, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.4279, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.1727, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.0675, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.5337, |
|
"eval_rougeL": 59.6836, |
|
"eval_rougeL_for_squad-like": 59.6836, |
|
"eval_rougeL_for_task000_NewsQA_dev": 49.8304, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.8717, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4312, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4022, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.1244, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.035, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.5215, |
|
"eval_runtime": 5210.8523, |
|
"eval_samples_per_second": 13.316, |
|
"eval_steps_per_second": 6.658, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6122, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_exact_match": 47.7187, |
|
"eval_exact_match_for_squad-like": 47.7187, |
|
"eval_exact_match_for_task000_NewsQA_dev": 31.9801, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6677, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.7722, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.1003, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.4302, |
|
"eval_exact_match_for_task000_SQuAD_dev": 70.4197, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.7856, |
|
"eval_f1": 60.0424, |
|
"eval_f1_for_squad-like": 60.0424, |
|
"eval_f1_for_task000_NewsQA_dev": 49.8937, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6636, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1636, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.9381, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.2199, |
|
"eval_f1_for_task000_SQuAD_dev": 82.7855, |
|
"eval_f1_for_task000_SearchQA_dev": 7.2213, |
|
"eval_gen_len": 6.7893, |
|
"eval_global_step": 15000, |
|
"eval_loss": 1.4648000001907349, |
|
"eval_rouge1": 60.3199, |
|
"eval_rouge1_for_squad-like": 60.3199, |
|
"eval_rouge1_for_task000_NewsQA_dev": 50.5586, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.6507, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.6021, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.4139, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.9828, |
|
"eval_rouge1_for_task000_SQuAD_dev": 83.1733, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.608, |
|
"eval_rougeL": 60.27, |
|
"eval_rougeL_for_squad-like": 60.27, |
|
"eval_rougeL_for_task000_NewsQA_dev": 50.4674, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5673, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5322, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.3954, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9265, |
|
"eval_rougeL_for_task000_SQuAD_dev": 83.1281, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.5794, |
|
"eval_runtime": 5599.7297, |
|
"eval_samples_per_second": 12.392, |
|
"eval_steps_per_second": 6.196, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5975, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_exact_match": 47.7028, |
|
"eval_exact_match_for_squad-like": 47.7028, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.906, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6373, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4825, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.0373, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3996, |
|
"eval_exact_match_for_task000_SQuAD_dev": 70.1056, |
|
"eval_exact_match_for_task000_SearchQA_dev": 3.4865, |
|
"eval_f1": 60.0719, |
|
"eval_f1_for_squad-like": 60.0719, |
|
"eval_f1_for_task000_NewsQA_dev": 50.6484, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.4577, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7913, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.553, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.9467, |
|
"eval_f1_for_task000_SQuAD_dev": 82.5792, |
|
"eval_f1_for_task000_SearchQA_dev": 7.9625, |
|
"eval_gen_len": 6.5056, |
|
"eval_global_step": 16000, |
|
"eval_loss": 1.4460376501083374, |
|
"eval_rouge1": 60.3765, |
|
"eval_rouge1_for_squad-like": 60.3765, |
|
"eval_rouge1_for_task000_NewsQA_dev": 51.3099, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.4873, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.2315, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.091, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.8312, |
|
"eval_rouge1_for_task000_SQuAD_dev": 83.0172, |
|
"eval_rouge1_for_task000_SearchQA_dev": 8.2968, |
|
"eval_rougeL": 60.3322, |
|
"eval_rougeL_for_squad-like": 60.3322, |
|
"eval_rougeL_for_task000_NewsQA_dev": 51.2196, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.4096, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1818, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0692, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.7759, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.9814, |
|
"eval_rougeL_for_task000_SearchQA_dev": 8.2737, |
|
"eval_runtime": 5456.9286, |
|
"eval_samples_per_second": 12.716, |
|
"eval_steps_per_second": 6.358, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5826, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 47.6063, |
|
"eval_exact_match_for_squad-like": 47.6063, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.6923, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.2428, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.8478, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.6433, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3384, |
|
"eval_exact_match_for_task000_SQuAD_dev": 69.9534, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.9741, |
|
"eval_f1": 59.786, |
|
"eval_f1_for_squad-like": 59.786, |
|
"eval_f1_for_task000_NewsQA_dev": 50.6166, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0853, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0205, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6158, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.6525, |
|
"eval_f1_for_task000_SQuAD_dev": 82.5741, |
|
"eval_f1_for_task000_SearchQA_dev": 7.0474, |
|
"eval_gen_len": 6.2252, |
|
"eval_global_step": 17000, |
|
"eval_loss": 1.4596614837646484, |
|
"eval_rouge1": 60.1349, |
|
"eval_rouge1_for_squad-like": 60.1349, |
|
"eval_rouge1_for_task000_NewsQA_dev": 51.2595, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2979, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5869, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.098, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.5838, |
|
"eval_rouge1_for_task000_SQuAD_dev": 83.0602, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.378, |
|
"eval_rougeL": 60.0935, |
|
"eval_rougeL_for_squad-like": 60.0935, |
|
"eval_rougeL_for_task000_NewsQA_dev": 51.189, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.2209, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5258, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0791, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.5283, |
|
"eval_rougeL_for_task000_SQuAD_dev": 83.0318, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.3608, |
|
"eval_runtime": 5176.1682, |
|
"eval_samples_per_second": 13.406, |
|
"eval_steps_per_second": 6.703, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5955, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 47.0024, |
|
"eval_exact_match_for_squad-like": 47.0024, |
|
"eval_exact_match_for_task000_NewsQA_dev": 32.3837, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0303, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8526, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7591, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5631, |
|
"eval_exact_match_for_task000_SQuAD_dev": 69.3728, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.5029, |
|
"eval_f1": 59.6025, |
|
"eval_f1_for_squad-like": 59.6025, |
|
"eval_f1_for_task000_NewsQA_dev": 49.9466, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0998, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7246, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6329, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.662, |
|
"eval_f1_for_task000_SQuAD_dev": 82.5021, |
|
"eval_f1_for_task000_SearchQA_dev": 6.6223, |
|
"eval_gen_len": 7.2863, |
|
"eval_global_step": 18000, |
|
"eval_loss": 1.5001791715621948, |
|
"eval_rouge1": 59.8829, |
|
"eval_rouge1_for_squad-like": 59.8829, |
|
"eval_rouge1_for_task000_NewsQA_dev": 50.5762, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2014, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1701, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.1226, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.4758, |
|
"eval_rouge1_for_task000_SQuAD_dev": 82.8425, |
|
"eval_rouge1_for_task000_SearchQA_dev": 6.9512, |
|
"eval_rougeL": 59.8296, |
|
"eval_rougeL_for_squad-like": 59.8296, |
|
"eval_rougeL_for_task000_NewsQA_dev": 50.4466, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.1325, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0927, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0963, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.4105, |
|
"eval_rougeL_for_task000_SQuAD_dev": 82.7958, |
|
"eval_rougeL_for_task000_SearchQA_dev": 6.924, |
|
"eval_runtime": 5979.7657, |
|
"eval_samples_per_second": 11.604, |
|
"eval_steps_per_second": 5.802, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6105, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_exact_match": 48.3542, |
|
"eval_exact_match_for_squad-like": 48.3542, |
|
"eval_exact_match_for_task000_NewsQA_dev": 35.1377, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.4972, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.3895, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.4382, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.9198, |
|
"eval_exact_match_for_task000_SQuAD_dev": 71.1335, |
|
"eval_exact_match_for_task000_SearchQA_dev": 2.9034, |
|
"eval_f1": 60.527, |
|
"eval_f1_for_squad-like": 60.527, |
|
"eval_f1_for_task000_NewsQA_dev": 53.3584, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 78.0211, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8373, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6907, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.7447, |
|
"eval_f1_for_task000_SQuAD_dev": 83.3642, |
|
"eval_f1_for_task000_SearchQA_dev": 6.712, |
|
"eval_gen_len": 6.1527, |
|
"eval_global_step": 19000, |
|
"eval_loss": 1.4464625120162964, |
|
"eval_rouge1": 60.7571, |
|
"eval_rouge1_for_squad-like": 60.7571, |
|
"eval_rouge1_for_task000_NewsQA_dev": 53.7911, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.9828, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 82.3105, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.1026, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.486, |
|
"eval_rouge1_for_task000_SQuAD_dev": 83.7045, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.0408, |
|
"eval_rougeL": 60.7148, |
|
"eval_rougeL_for_squad-like": 60.7148, |
|
"eval_rougeL_for_task000_NewsQA_dev": 53.7027, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.9168, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 82.2503, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0755, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.4296, |
|
"eval_rougeL_for_task000_SQuAD_dev": 83.673, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.0247, |
|
"eval_runtime": 5228.9365, |
|
"eval_samples_per_second": 13.27, |
|
"eval_steps_per_second": 6.635, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6235, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 48.1741, |
|
"eval_exact_match_for_squad-like": 48.1741, |
|
"eval_exact_match_for_task000_NewsQA_dev": 33.3571, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9813, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.6163, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.5673, |
|
"eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.7668, |
|
"eval_exact_match_for_task000_SQuAD_dev": 70.8099, |
|
"eval_exact_match_for_task000_SearchQA_dev": 3.0153, |
|
"eval_f1": 60.3279, |
|
"eval_f1_for_squad-like": 60.3279, |
|
"eval_f1_for_task000_NewsQA_dev": 51.5268, |
|
"eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7923, |
|
"eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.3576, |
|
"eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6284, |
|
"eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.1448, |
|
"eval_f1_for_task000_SQuAD_dev": 83.223, |
|
"eval_f1_for_task000_SearchQA_dev": 7.181, |
|
"eval_gen_len": 6.5938, |
|
"eval_global_step": 20000, |
|
"eval_loss": 1.4583032131195068, |
|
"eval_rouge1": 60.5924, |
|
"eval_rouge1_for_squad-like": 60.5924, |
|
"eval_rouge1_for_task000_NewsQA_dev": 52.0147, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7925, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8888, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.0775, |
|
"eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.956, |
|
"eval_rouge1_for_task000_SQuAD_dev": 83.5919, |
|
"eval_rouge1_for_task000_SearchQA_dev": 7.507, |
|
"eval_rougeL": 60.5491, |
|
"eval_rougeL_for_squad-like": 60.5491, |
|
"eval_rougeL_for_task000_NewsQA_dev": 51.9291, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.7222, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8211, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0484, |
|
"eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9099, |
|
"eval_rougeL_for_task000_SQuAD_dev": 83.5543, |
|
"eval_rougeL_for_task000_SearchQA_dev": 7.4907, |
|
"eval_runtime": 5575.4762, |
|
"eval_samples_per_second": 12.446, |
|
"eval_steps_per_second": 6.223, |
|
"step": 20000 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 2, |
|
"total_flos": 3.03606909632512e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|