{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 5e-05, "loss": 1.5652, "step": 1000 }, { "epoch": 0.1, "eval_exact_match": 38.3413, "eval_exact_match_for_squad-like": 38.3413, "eval_exact_match_for_task000_NewsQA_dev": 23.8367, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 50.0455, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 52.0534, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 53.7009, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 50.0357, "eval_exact_match_for_task000_SQuAD_dev": 55.8009, "eval_exact_match_for_task000_SearchQA_dev": 2.0554, "eval_f1": 52.1049, "eval_f1_for_squad-like": 52.1049, "eval_f1_for_task000_NewsQA_dev": 40.9979, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 68.6298, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.9454, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 69.6682, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 66.9921, "eval_f1_for_task000_SQuAD_dev": 71.031, "eval_f1_for_task000_SearchQA_dev": 6.183, "eval_gen_len": 6.4379, "eval_global_step": 1000, "eval_loss": 1.6005868911743164, "eval_rouge1": 51.5552, "eval_rouge1_for_squad-like": 51.5552, "eval_rouge1_for_task000_NewsQA_dev": 40.9869, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 68.2955, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 69.008, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 68.733, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 65.7541, "eval_rouge1_for_task000_SQuAD_dev": 70.1352, "eval_rouge1_for_task000_SearchQA_dev": 6.3956, "eval_rougeL": 51.4856, "eval_rougeL_for_squad-like": 51.4856, "eval_rougeL_for_task000_NewsQA_dev": 40.8766, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 68.2039, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 68.8974, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 68.6842, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 65.6558, "eval_rougeL_for_task000_SQuAD_dev": 70.0603, "eval_rougeL_for_task000_SearchQA_dev": 6.3754, "eval_runtime": 5547.761, "eval_samples_per_second": 12.508, "eval_steps_per_second": 6.254, "step": 1000 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 1.0877, "step": 2000 }, { "epoch": 0.2, "eval_exact_match": 42.2972, "eval_exact_match_for_squad-like": 42.2972, "eval_exact_match_for_task000_NewsQA_dev": 28.8936, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 55.0531, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 56.4374, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 58.6687, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 54.8506, "eval_exact_match_for_task000_SQuAD_dev": 61.3876, "eval_exact_match_for_task000_SearchQA_dev": 2.821, "eval_f1": 54.8049, "eval_f1_for_squad-like": 54.8049, "eval_f1_for_task000_NewsQA_dev": 46.0401, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 71.6103, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.681, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 72.9829, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 69.6718, "eval_f1_for_task000_SQuAD_dev": 74.8343, "eval_f1_for_task000_SearchQA_dev": 7.0869, "eval_gen_len": 5.9867, "eval_global_step": 2000, "eval_loss": 1.5502516031265259, "eval_rouge1": 54.5481, "eval_rouge1_for_squad-like": 54.5481, "eval_rouge1_for_task000_NewsQA_dev": 46.2121, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 71.3277, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 72.3257, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 72.7441, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 68.4653, "eval_rouge1_for_task000_SQuAD_dev": 74.5387, "eval_rouge1_for_task000_SearchQA_dev": 7.3461, "eval_rougeL": 54.4937, "eval_rougeL_for_squad-like": 54.4937, "eval_rougeL_for_task000_NewsQA_dev": 46.1235, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 71.2521, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 72.2345, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 72.7112, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 68.3891, "eval_rougeL_for_task000_SQuAD_dev": 74.4799, "eval_rougeL_for_task000_SearchQA_dev": 7.3323, "eval_runtime": 5244.5628, "eval_samples_per_second": 13.231, "eval_steps_per_second": 6.615, "step": 2000 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 1.0892, "step": 3000 }, { "epoch": 0.3, "eval_exact_match": 42.9471, "eval_exact_match_for_squad-like": 42.9471, "eval_exact_match_for_task000_NewsQA_dev": 28.4188, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.3784, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 58.2514, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.7417, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 55.177, "eval_exact_match_for_task000_SQuAD_dev": 62.387, "eval_exact_match_for_task000_SearchQA_dev": 2.5324, "eval_f1": 56.2906, "eval_f1_for_squad-like": 56.2906, "eval_f1_for_task000_NewsQA_dev": 46.7128, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 73.6538, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.7309, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.2649, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.4267, "eval_f1_for_task000_SQuAD_dev": 76.8954, "eval_f1_for_task000_SearchQA_dev": 6.7347, "eval_gen_len": 6.1098, "eval_global_step": 3000, "eval_loss": 1.4990659952163696, "eval_rouge1": 56.0886, "eval_rouge1_for_squad-like": 56.0886, "eval_rouge1_for_task000_NewsQA_dev": 46.9106, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.4005, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5803, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.0322, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.2838, "eval_rouge1_for_task000_SQuAD_dev": 76.6652, "eval_rouge1_for_task000_SearchQA_dev": 7.018, "eval_rougeL": 56.0392, "eval_rougeL_for_squad-like": 56.0392, "eval_rougeL_for_task000_NewsQA_dev": 46.8585, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.3224, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 75.5113, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.0019, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.2237, "eval_rougeL_for_task000_SQuAD_dev": 76.5983, "eval_rougeL_for_task000_SearchQA_dev": 7.0011, "eval_runtime": 5239.5633, "eval_samples_per_second": 13.243, "eval_steps_per_second": 6.622, "step": 3000 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 0.9586, "step": 4000 }, { "epoch": 0.4, "eval_exact_match": 43.2815, "eval_exact_match_for_squad-like": 43.2815, "eval_exact_match_for_task000_NewsQA_dev": 29.416, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 56.6313, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 59.0199, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 59.8013, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 56.2583, "eval_exact_match_for_task000_SQuAD_dev": 62.406, "eval_exact_match_for_task000_SearchQA_dev": 2.4735, "eval_f1": 56.5384, "eval_f1_for_squad-like": 56.5384, "eval_f1_for_task000_NewsQA_dev": 47.1146, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 74.0447, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.4587, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 75.331, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 71.9027, "eval_f1_for_task000_SQuAD_dev": 77.2248, "eval_f1_for_task000_SearchQA_dev": 6.562, "eval_gen_len": 6.5536, "eval_global_step": 4000, "eval_loss": 1.4763996601104736, "eval_rouge1": 56.4181, "eval_rouge1_for_squad-like": 56.4181, "eval_rouge1_for_task000_NewsQA_dev": 47.2027, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 73.8315, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.3451, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 75.187, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 70.9723, "eval_rouge1_for_task000_SQuAD_dev": 77.2012, "eval_rouge1_for_task000_SearchQA_dev": 6.8629, "eval_rougeL": 56.3585, "eval_rougeL_for_squad-like": 56.3585, "eval_rougeL_for_task000_NewsQA_dev": 47.124, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 73.7295, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.2619, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 75.156, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 70.9096, "eval_rougeL_for_task000_SQuAD_dev": 77.129, "eval_rougeL_for_task000_SearchQA_dev": 6.8364, "eval_runtime": 5411.8899, "eval_samples_per_second": 12.822, "eval_steps_per_second": 6.411, "step": 4000 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 0.8703, "step": 5000 }, { "epoch": 0.5, "eval_exact_match": 44.8091, "eval_exact_match_for_squad-like": 44.8091, "eval_exact_match_for_task000_NewsQA_dev": 29.1548, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.8569, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.7458, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.6294, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.7881, "eval_exact_match_for_task000_SQuAD_dev": 65.661, "eval_exact_match_for_task000_SearchQA_dev": 2.1201, "eval_f1": 57.5067, "eval_f1_for_squad-like": 57.5067, "eval_f1_for_task000_NewsQA_dev": 47.528, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.5801, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.4224, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.6725, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.7737, "eval_f1_for_task000_SQuAD_dev": 79.5359, "eval_f1_for_task000_SearchQA_dev": 5.7665, "eval_gen_len": 6.5617, "eval_global_step": 5000, "eval_loss": 1.4937905073165894, "eval_rouge1": 57.5127, "eval_rouge1_for_squad-like": 57.5127, "eval_rouge1_for_task000_NewsQA_dev": 47.7949, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.4332, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5705, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.5839, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 73.2737, "eval_rouge1_for_task000_SQuAD_dev": 79.6618, "eval_rouge1_for_task000_SearchQA_dev": 6.0046, "eval_rougeL": 57.4686, "eval_rougeL_for_squad-like": 57.4686, "eval_rougeL_for_task000_NewsQA_dev": 47.6893, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.3688, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 77.5261, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.5531, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.2278, "eval_rougeL_for_task000_SQuAD_dev": 79.6122, "eval_rougeL_for_task000_SearchQA_dev": 5.9839, "eval_runtime": 5610.9818, "eval_samples_per_second": 12.367, "eval_steps_per_second": 6.183, "step": 5000 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 0.9186, "step": 6000 }, { "epoch": 0.6, "eval_exact_match": 45.2284, "eval_exact_match_for_squad-like": 45.2284, "eval_exact_match_for_task000_NewsQA_dev": 31.529, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 58.9277, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 60.4308, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 61.5897, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 58.8595, "eval_exact_match_for_task000_SQuAD_dev": 66.0893, "eval_exact_match_for_task000_SearchQA_dev": 3.0683, "eval_f1": 57.6298, "eval_f1_for_squad-like": 57.6298, "eval_f1_for_task000_NewsQA_dev": 49.1496, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 75.4059, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.631, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 76.5732, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 73.1298, "eval_f1_for_task000_SQuAD_dev": 79.143, "eval_f1_for_task000_SearchQA_dev": 7.0128, "eval_gen_len": 5.6208, "eval_global_step": 6000, "eval_loss": 1.4303617477416992, "eval_rouge1": 57.5885, "eval_rouge1_for_squad-like": 57.5885, "eval_rouge1_for_task000_NewsQA_dev": 49.3293, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.2476, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6477, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 76.397, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 72.5855, "eval_rouge1_for_task000_SQuAD_dev": 79.167, "eval_rouge1_for_task000_SearchQA_dev": 7.2874, "eval_rougeL": 57.5543, "eval_rougeL_for_squad-like": 57.5543, "eval_rougeL_for_task000_NewsQA_dev": 49.2858, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.2047, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 76.6027, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 76.369, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 72.54, "eval_rougeL_for_task000_SQuAD_dev": 79.1235, "eval_rougeL_for_task000_SearchQA_dev": 7.2743, "eval_runtime": 4976.2912, "eval_samples_per_second": 13.944, "eval_steps_per_second": 6.972, "step": 6000 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 0.8626, "step": 7000 }, { "epoch": 0.7, "eval_exact_match": 46.145, "eval_exact_match_for_squad-like": 46.145, "eval_exact_match_for_task000_NewsQA_dev": 30.1282, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.6055, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.5676, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.5469, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.2775, "eval_exact_match_for_task000_SQuAD_dev": 67.7453, "eval_exact_match_for_task000_SearchQA_dev": 2.2968, "eval_f1": 58.3057, "eval_f1_for_squad-like": 58.3057, "eval_f1_for_task000_NewsQA_dev": 48.4752, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.1677, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2423, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.2045, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.6885, "eval_f1_for_task000_SQuAD_dev": 80.3613, "eval_f1_for_task000_SearchQA_dev": 5.657, "eval_gen_len": 5.8196, "eval_global_step": 7000, "eval_loss": 1.3808618783950806, "eval_rouge1": 58.2675, "eval_rouge1_for_squad-like": 58.2675, "eval_rouge1_for_task000_NewsQA_dev": 48.7542, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 75.9378, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.3134, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.1724, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.0395, "eval_rouge1_for_task000_SQuAD_dev": 80.3759, "eval_rouge1_for_task000_SearchQA_dev": 5.9169, "eval_rougeL": 58.2236, "eval_rougeL_for_squad-like": 58.2236, "eval_rougeL_for_task000_NewsQA_dev": 48.6986, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 75.8622, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.2595, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.1477, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 73.978, "eval_rougeL_for_task000_SQuAD_dev": 80.3316, "eval_rougeL_for_task000_SearchQA_dev": 5.8983, "eval_runtime": 5035.1499, "eval_samples_per_second": 13.781, "eval_steps_per_second": 6.891, "step": 7000 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 0.8322, "step": 8000 }, { "epoch": 0.8, "eval_exact_match": 46.3338, "eval_exact_match_for_squad-like": 46.3338, "eval_exact_match_for_task000_NewsQA_dev": 31.5527, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6576, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.4669, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.8351, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5325, "eval_exact_match_for_task000_SQuAD_dev": 67.0505, "eval_exact_match_for_task000_SearchQA_dev": 2.2615, "eval_f1": 58.5957, "eval_f1_for_squad-like": 58.5957, "eval_f1_for_task000_NewsQA_dev": 49.4319, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.9441, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.665, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.3856, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.1449, "eval_f1_for_task000_SQuAD_dev": 80.3231, "eval_f1_for_task000_SearchQA_dev": 5.608, "eval_gen_len": 5.9989, "eval_global_step": 8000, "eval_loss": 1.4950507879257202, "eval_rouge1": 58.6055, "eval_rouge1_for_squad-like": 58.6055, "eval_rouge1_for_task000_NewsQA_dev": 49.7455, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.7235, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.4596, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.565, "eval_rouge1_for_task000_SQuAD_dev": 80.4772, "eval_rouge1_for_task000_SearchQA_dev": 5.8525, "eval_rougeL": 58.5662, "eval_rougeL_for_squad-like": 58.5662, "eval_rougeL_for_task000_NewsQA_dev": 49.6823, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.6541, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7031, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.4393, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.5146, "eval_rougeL_for_task000_SQuAD_dev": 80.4338, "eval_rougeL_for_task000_SearchQA_dev": 5.84, "eval_runtime": 5037.486, "eval_samples_per_second": 13.775, "eval_steps_per_second": 6.887, "step": 8000 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 0.8616, "step": 9000 }, { "epoch": 0.9, "eval_exact_match": 46.805, "eval_exact_match_for_squad-like": 46.805, "eval_exact_match_for_task000_NewsQA_dev": 31.6002, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9611, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.0086, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 63.686, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.0935, "eval_exact_match_for_task000_SQuAD_dev": 68.4686, "eval_exact_match_for_task000_SearchQA_dev": 2.6325, "eval_f1": 58.9037, "eval_f1_for_squad-like": 58.9037, "eval_f1_for_task000_NewsQA_dev": 49.0476, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.1558, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.7546, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.4691, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.349, "eval_f1_for_task000_SQuAD_dev": 81.0326, "eval_f1_for_task000_SearchQA_dev": 6.1907, "eval_gen_len": 6.0577, "eval_global_step": 9000, "eval_loss": 1.3887202739715576, "eval_rouge1": 59.0007, "eval_rouge1_for_squad-like": 59.0007, "eval_rouge1_for_task000_NewsQA_dev": 49.3458, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.0702, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9826, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.6904, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.9073, "eval_rouge1_for_task000_SQuAD_dev": 81.2597, "eval_rouge1_for_task000_SearchQA_dev": 6.4396, "eval_rougeL": 58.9619, "eval_rougeL_for_squad-like": 58.9619, "eval_rougeL_for_task000_NewsQA_dev": 49.2793, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.0101, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.9214, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.6681, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.8591, "eval_rougeL_for_task000_SQuAD_dev": 81.2181, "eval_rougeL_for_task000_SearchQA_dev": 6.4279, "eval_runtime": 5346.6704, "eval_samples_per_second": 12.978, "eval_steps_per_second": 6.489, "step": 9000 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 0.76, "step": 10000 }, { "epoch": 1.0, "eval_exact_match": 47.0558, "eval_exact_match_for_squad-like": 47.0558, "eval_exact_match_for_task000_NewsQA_dev": 32.075, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.7688, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.6762, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.0139, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3894, "eval_exact_match_for_task000_SQuAD_dev": 68.7066, "eval_exact_match_for_task000_SearchQA_dev": 2.8269, "eval_f1": 59.6922, "eval_f1_for_squad-like": 59.6922, "eval_f1_for_task000_NewsQA_dev": 50.5612, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7643, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.537, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.1097, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.3824, "eval_f1_for_task000_SQuAD_dev": 81.8903, "eval_f1_for_task000_SearchQA_dev": 6.8104, "eval_gen_len": 6.2411, "eval_global_step": 10000, "eval_loss": 1.4306766986846924, "eval_rouge1": 59.8786, "eval_rouge1_for_squad-like": 59.8786, "eval_rouge1_for_task000_NewsQA_dev": 51.1274, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7056, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8227, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.5036, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.0271, "eval_rouge1_for_task000_SQuAD_dev": 82.2167, "eval_rouge1_for_task000_SearchQA_dev": 7.1018, "eval_rougeL": 59.8423, "eval_rougeL_for_squad-like": 59.8423, "eval_rougeL_for_task000_NewsQA_dev": 51.0694, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.66, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7668, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4863, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9724, "eval_rougeL_for_task000_SQuAD_dev": 82.178, "eval_rougeL_for_task000_SearchQA_dev": 7.0864, "eval_runtime": 5392.0755, "eval_samples_per_second": 12.869, "eval_steps_per_second": 6.434, "step": 10000 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 0.6424, "step": 11000 }, { "epoch": 1.1, "eval_exact_match": 47.7403, "eval_exact_match_for_squad-like": 47.7403, "eval_exact_match_for_task000_NewsQA_dev": 32.9772, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.1128, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.9912, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.7327, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.8892, "eval_exact_match_for_task000_SQuAD_dev": 70.1342, "eval_exact_match_for_task000_SearchQA_dev": 2.8622, "eval_f1": 59.9977, "eval_f1_for_squad-like": 59.9977, "eval_f1_for_task000_NewsQA_dev": 51.0113, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6964, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.5474, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.0965, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.6229, "eval_f1_for_task000_SQuAD_dev": 82.6467, "eval_f1_for_task000_SearchQA_dev": 6.79, "eval_gen_len": 6.0952, "eval_global_step": 11000, "eval_loss": 1.458894968032837, "eval_rouge1": 60.1081, "eval_rouge1_for_squad-like": 60.1081, "eval_rouge1_for_task000_NewsQA_dev": 51.3906, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.5815, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.8219, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.3102, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.1436, "eval_rouge1_for_task000_SQuAD_dev": 82.8847, "eval_rouge1_for_task000_SearchQA_dev": 7.0882, "eval_rougeL": 60.0649, "eval_rougeL_for_squad-like": 60.0649, "eval_rougeL_for_task000_NewsQA_dev": 51.3201, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5161, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7665, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.2852, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.0825, "eval_rougeL_for_task000_SQuAD_dev": 82.8429, "eval_rougeL_for_task000_SearchQA_dev": 7.0691, "eval_runtime": 5216.6936, "eval_samples_per_second": 13.302, "eval_steps_per_second": 6.651, "step": 11000 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 0.6121, "step": 12000 }, { "epoch": 1.2, "eval_exact_match": 47.0053, "eval_exact_match_for_squad-like": 47.0053, "eval_exact_match_for_task000_NewsQA_dev": 32.2175, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.3541, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 63.9204, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.2921, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.1547, "eval_exact_match_for_task000_SQuAD_dev": 69.3633, "eval_exact_match_for_task000_SearchQA_dev": 2.7444, "eval_f1": 58.8892, "eval_f1_for_squad-like": 58.8892, "eval_f1_for_task000_NewsQA_dev": 50.0588, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.4924, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.1391, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.5755, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.0157, "eval_f1_for_task000_SQuAD_dev": 81.7388, "eval_f1_for_task000_SearchQA_dev": 6.2467, "eval_gen_len": 5.4838, "eval_global_step": 12000, "eval_loss": 1.5454285144805908, "eval_rouge1": 59.1209, "eval_rouge1_for_squad-like": 59.1209, "eval_rouge1_for_task000_NewsQA_dev": 50.5568, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.4035, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 79.652, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 78.959, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.82, "eval_rouge1_for_task000_SQuAD_dev": 82.0668, "eval_rouge1_for_task000_SearchQA_dev": 6.5645, "eval_rougeL": 59.0936, "eval_rougeL_for_squad-like": 59.0936, "eval_rougeL_for_task000_NewsQA_dev": 50.5017, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.3605, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 79.6138, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 78.939, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7889, "eval_rougeL_for_task000_SQuAD_dev": 82.0374, "eval_rougeL_for_task000_SearchQA_dev": 6.5579, "eval_runtime": 4739.7954, "eval_samples_per_second": 14.64, "eval_steps_per_second": 7.32, "step": 12000 }, { "epoch": 1.3, "learning_rate": 5e-05, "loss": 0.6267, "step": 13000 }, { "epoch": 1.3, "eval_exact_match": 46.8425, "eval_exact_match_for_squad-like": 46.8425, "eval_exact_match_for_task000_NewsQA_dev": 31.2441, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 59.8381, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8652, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7491, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3081, "eval_exact_match_for_task000_SQuAD_dev": 69.0968, "eval_exact_match_for_task000_SearchQA_dev": 2.5618, "eval_f1": 59.0311, "eval_f1_for_squad-like": 59.0311, "eval_f1_for_task000_NewsQA_dev": 49.2573, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.2242, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.2601, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.0037, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 74.8635, "eval_f1_for_task000_SQuAD_dev": 81.6974, "eval_f1_for_task000_SearchQA_dev": 6.5171, "eval_gen_len": 6.1312, "eval_global_step": 13000, "eval_loss": 1.3520243167877197, "eval_rouge1": 59.3382, "eval_rouge1_for_squad-like": 59.3382, "eval_rouge1_for_task000_NewsQA_dev": 49.8146, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.3477, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7192, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.3855, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 74.8334, "eval_rouge1_for_task000_SQuAD_dev": 82.1319, "eval_rouge1_for_task000_SearchQA_dev": 6.8695, "eval_rougeL": 59.2915, "eval_rougeL_for_squad-like": 59.2915, "eval_rougeL_for_task000_NewsQA_dev": 49.7207, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.2786, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 80.6445, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.3596, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 74.7793, "eval_rougeL_for_task000_SQuAD_dev": 82.1035, "eval_rougeL_for_task000_SearchQA_dev": 6.8414, "eval_runtime": 5365.8691, "eval_samples_per_second": 12.932, "eval_steps_per_second": 6.466, "step": 13000 }, { "epoch": 1.4, "learning_rate": 5e-05, "loss": 0.6173, "step": 14000 }, { "epoch": 1.4, "eval_exact_match": 47.0529, "eval_exact_match_for_squad-like": 47.0529, "eval_exact_match_for_task000_NewsQA_dev": 32.265, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0506, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4069, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7988, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.3897, "eval_exact_match_for_task000_SQuAD_dev": 68.5828, "eval_exact_match_for_task000_SearchQA_dev": 3.033, "eval_f1": 59.4463, "eval_f1_for_squad-like": 59.4463, "eval_f1_for_task000_NewsQA_dev": 49.3437, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 76.8803, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.055, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 78.972, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.2939, "eval_f1_for_task000_SQuAD_dev": 81.7056, "eval_f1_for_task000_SearchQA_dev": 7.2043, "eval_gen_len": 6.2021, "eval_global_step": 14000, "eval_loss": 1.421161413192749, "eval_rouge1": 59.7198, "eval_rouge1_for_squad-like": 59.7198, "eval_rouge1_for_task000_NewsQA_dev": 49.8919, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 76.9311, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4792, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 79.4279, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.1727, "eval_rouge1_for_task000_SQuAD_dev": 82.0675, "eval_rouge1_for_task000_SearchQA_dev": 7.5337, "eval_rougeL": 59.6836, "eval_rougeL_for_squad-like": 59.6836, "eval_rougeL_for_task000_NewsQA_dev": 49.8304, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 76.8717, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.4312, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 79.4022, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.1244, "eval_rougeL_for_task000_SQuAD_dev": 82.035, "eval_rougeL_for_task000_SearchQA_dev": 7.5215, "eval_runtime": 5210.8523, "eval_samples_per_second": 13.316, "eval_steps_per_second": 6.658, "step": 14000 }, { "epoch": 1.5, "learning_rate": 5e-05, "loss": 0.6122, "step": 15000 }, { "epoch": 1.5, "eval_exact_match": 47.7187, "eval_exact_match_for_squad-like": 47.7187, "eval_exact_match_for_task000_NewsQA_dev": 31.9801, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6677, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.7722, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.1003, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.4302, "eval_exact_match_for_task000_SQuAD_dev": 70.4197, "eval_exact_match_for_task000_SearchQA_dev": 2.7856, "eval_f1": 60.0424, "eval_f1_for_squad-like": 60.0424, "eval_f1_for_task000_NewsQA_dev": 49.8937, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.6636, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1636, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.9381, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.2199, "eval_f1_for_task000_SQuAD_dev": 82.7855, "eval_f1_for_task000_SearchQA_dev": 7.2213, "eval_gen_len": 6.7893, "eval_global_step": 15000, "eval_loss": 1.4648000001907349, "eval_rouge1": 60.3199, "eval_rouge1_for_squad-like": 60.3199, "eval_rouge1_for_task000_NewsQA_dev": 50.5586, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.6507, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.6021, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.4139, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.9828, "eval_rouge1_for_task000_SQuAD_dev": 83.1733, "eval_rouge1_for_task000_SearchQA_dev": 7.608, "eval_rougeL": 60.27, "eval_rougeL_for_squad-like": 60.27, "eval_rougeL_for_task000_NewsQA_dev": 50.4674, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.5673, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5322, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.3954, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9265, "eval_rougeL_for_task000_SQuAD_dev": 83.1281, "eval_rougeL_for_task000_SearchQA_dev": 7.5794, "eval_runtime": 5599.7297, "eval_samples_per_second": 12.392, "eval_steps_per_second": 6.196, "step": 15000 }, { "epoch": 1.6, "learning_rate": 5e-05, "loss": 0.5975, "step": 16000 }, { "epoch": 1.6, "eval_exact_match": 47.7028, "eval_exact_match_for_squad-like": 47.7028, "eval_exact_match_for_task000_NewsQA_dev": 32.906, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.6373, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.4825, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.0373, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3996, "eval_exact_match_for_task000_SQuAD_dev": 70.1056, "eval_exact_match_for_task000_SearchQA_dev": 3.4865, "eval_f1": 60.0719, "eval_f1_for_squad-like": 60.0719, "eval_f1_for_task000_NewsQA_dev": 50.6484, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.4577, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7913, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.553, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.9467, "eval_f1_for_task000_SQuAD_dev": 82.5792, "eval_f1_for_task000_SearchQA_dev": 7.9625, "eval_gen_len": 6.5056, "eval_global_step": 16000, "eval_loss": 1.4460376501083374, "eval_rouge1": 60.3765, "eval_rouge1_for_squad-like": 60.3765, "eval_rouge1_for_task000_NewsQA_dev": 51.3099, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.4873, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.2315, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.091, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.8312, "eval_rouge1_for_task000_SQuAD_dev": 83.0172, "eval_rouge1_for_task000_SearchQA_dev": 8.2968, "eval_rougeL": 60.3322, "eval_rougeL_for_squad-like": 60.3322, "eval_rougeL_for_task000_NewsQA_dev": 51.2196, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.4096, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1818, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0692, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.7759, "eval_rougeL_for_task000_SQuAD_dev": 82.9814, "eval_rougeL_for_task000_SearchQA_dev": 8.2737, "eval_runtime": 5456.9286, "eval_samples_per_second": 12.716, "eval_steps_per_second": 6.358, "step": 16000 }, { "epoch": 1.7, "learning_rate": 5e-05, "loss": 0.5826, "step": 17000 }, { "epoch": 1.7, "eval_exact_match": 47.6063, "eval_exact_match_for_squad-like": 47.6063, "eval_exact_match_for_task000_NewsQA_dev": 32.6923, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.2428, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 65.8478, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 65.6433, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.3384, "eval_exact_match_for_task000_SQuAD_dev": 69.9534, "eval_exact_match_for_task000_SearchQA_dev": 2.9741, "eval_f1": 59.786, "eval_f1_for_squad-like": 59.786, "eval_f1_for_task000_NewsQA_dev": 50.6166, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0853, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0205, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6158, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.6525, "eval_f1_for_task000_SQuAD_dev": 82.5741, "eval_f1_for_task000_SearchQA_dev": 7.0474, "eval_gen_len": 6.2252, "eval_global_step": 17000, "eval_loss": 1.4596614837646484, "eval_rouge1": 60.1349, "eval_rouge1_for_squad-like": 60.1349, "eval_rouge1_for_task000_NewsQA_dev": 51.2595, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2979, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5869, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.098, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.5838, "eval_rouge1_for_task000_SQuAD_dev": 83.0602, "eval_rouge1_for_task000_SearchQA_dev": 7.378, "eval_rougeL": 60.0935, "eval_rougeL_for_squad-like": 60.0935, "eval_rougeL_for_task000_NewsQA_dev": 51.189, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.2209, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.5258, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0791, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.5283, "eval_rougeL_for_task000_SQuAD_dev": 83.0318, "eval_rougeL_for_task000_SearchQA_dev": 7.3608, "eval_runtime": 5176.1682, "eval_samples_per_second": 13.406, "eval_steps_per_second": 6.703, "step": 17000 }, { "epoch": 1.8, "learning_rate": 5e-05, "loss": 0.5955, "step": 18000 }, { "epoch": 1.8, "eval_exact_match": 47.0024, "eval_exact_match_for_squad-like": 47.0024, "eval_exact_match_for_task000_NewsQA_dev": 32.3837, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.0303, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 64.8526, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 64.7591, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 60.5631, "eval_exact_match_for_task000_SQuAD_dev": 69.3728, "eval_exact_match_for_task000_SearchQA_dev": 2.5029, "eval_f1": 59.6025, "eval_f1_for_squad-like": 59.6025, "eval_f1_for_task000_NewsQA_dev": 49.9466, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.0998, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 80.7246, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 79.6329, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 75.662, "eval_f1_for_task000_SQuAD_dev": 82.5021, "eval_f1_for_task000_SearchQA_dev": 6.6223, "eval_gen_len": 7.2863, "eval_global_step": 18000, "eval_loss": 1.5001791715621948, "eval_rouge1": 59.8829, "eval_rouge1_for_squad-like": 59.8829, "eval_rouge1_for_task000_NewsQA_dev": 50.5762, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.2014, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.1701, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 80.1226, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.4758, "eval_rouge1_for_task000_SQuAD_dev": 82.8425, "eval_rouge1_for_task000_SearchQA_dev": 6.9512, "eval_rougeL": 59.8296, "eval_rougeL_for_squad-like": 59.8296, "eval_rougeL_for_task000_NewsQA_dev": 50.4466, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.1325, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.0927, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 80.0963, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.4105, "eval_rougeL_for_task000_SQuAD_dev": 82.7958, "eval_rougeL_for_task000_SearchQA_dev": 6.924, "eval_runtime": 5979.7657, "eval_samples_per_second": 11.604, "eval_steps_per_second": 5.802, "step": 18000 }, { "epoch": 1.9, "learning_rate": 5e-05, "loss": 0.6105, "step": 19000 }, { "epoch": 1.9, "eval_exact_match": 48.3542, "eval_exact_match_for_squad-like": 48.3542, "eval_exact_match_for_task000_NewsQA_dev": 35.1377, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 61.4972, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.3895, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.4382, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.9198, "eval_exact_match_for_task000_SQuAD_dev": 71.1335, "eval_exact_match_for_task000_SearchQA_dev": 2.9034, "eval_f1": 60.527, "eval_f1_for_squad-like": 60.527, "eval_f1_for_task000_NewsQA_dev": 53.3584, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 78.0211, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8373, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6907, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.7447, "eval_f1_for_task000_SQuAD_dev": 83.3642, "eval_f1_for_task000_SearchQA_dev": 6.712, "eval_gen_len": 6.1527, "eval_global_step": 19000, "eval_loss": 1.4464625120162964, "eval_rouge1": 60.7571, "eval_rouge1_for_squad-like": 60.7571, "eval_rouge1_for_task000_NewsQA_dev": 53.7911, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.9828, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 82.3105, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.1026, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 76.486, "eval_rouge1_for_task000_SQuAD_dev": 83.7045, "eval_rouge1_for_task000_SearchQA_dev": 7.0408, "eval_rougeL": 60.7148, "eval_rougeL_for_squad-like": 60.7148, "eval_rougeL_for_task000_NewsQA_dev": 53.7027, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.9168, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 82.2503, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0755, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 76.4296, "eval_rougeL_for_task000_SQuAD_dev": 83.673, "eval_rougeL_for_task000_SearchQA_dev": 7.0247, "eval_runtime": 5228.9365, "eval_samples_per_second": 13.27, "eval_steps_per_second": 6.635, "step": 19000 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 0.6235, "step": 20000 }, { "epoch": 2.0, "eval_exact_match": 48.1741, "eval_exact_match_for_squad-like": 48.1741, "eval_exact_match_for_task000_NewsQA_dev": 33.3571, "eval_exact_match_for_task000_SQuAD_Shifts_amazon_dev": 60.9813, "eval_exact_match_for_task000_SQuAD_Shifts_new_wiki_dev": 66.6163, "eval_exact_match_for_task000_SQuAD_Shifts_nyt_dev": 66.5673, "eval_exact_match_for_task000_SQuAD_Shifts_reddit_dev": 61.7668, "eval_exact_match_for_task000_SQuAD_dev": 70.8099, "eval_exact_match_for_task000_SearchQA_dev": 3.0153, "eval_f1": 60.3279, "eval_f1_for_squad-like": 60.3279, "eval_f1_for_task000_NewsQA_dev": 51.5268, "eval_f1_for_task000_SQuAD_Shifts_amazon_dev": 77.7923, "eval_f1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.3576, "eval_f1_for_task000_SQuAD_Shifts_nyt_dev": 80.6284, "eval_f1_for_task000_SQuAD_Shifts_reddit_dev": 76.1448, "eval_f1_for_task000_SQuAD_dev": 83.223, "eval_f1_for_task000_SearchQA_dev": 7.181, "eval_gen_len": 6.5938, "eval_global_step": 20000, "eval_loss": 1.4583032131195068, "eval_rouge1": 60.5924, "eval_rouge1_for_squad-like": 60.5924, "eval_rouge1_for_task000_NewsQA_dev": 52.0147, "eval_rouge1_for_task000_SQuAD_Shifts_amazon_dev": 77.7925, "eval_rouge1_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8888, "eval_rouge1_for_task000_SQuAD_Shifts_nyt_dev": 81.0775, "eval_rouge1_for_task000_SQuAD_Shifts_reddit_dev": 75.956, "eval_rouge1_for_task000_SQuAD_dev": 83.5919, "eval_rouge1_for_task000_SearchQA_dev": 7.507, "eval_rougeL": 60.5491, "eval_rougeL_for_squad-like": 60.5491, "eval_rougeL_for_task000_NewsQA_dev": 51.9291, "eval_rougeL_for_task000_SQuAD_Shifts_amazon_dev": 77.7222, "eval_rougeL_for_task000_SQuAD_Shifts_new_wiki_dev": 81.8211, "eval_rougeL_for_task000_SQuAD_Shifts_nyt_dev": 81.0484, "eval_rougeL_for_task000_SQuAD_Shifts_reddit_dev": 75.9099, "eval_rougeL_for_task000_SQuAD_dev": 83.5543, "eval_rougeL_for_task000_SearchQA_dev": 7.4907, "eval_runtime": 5575.4762, "eval_samples_per_second": 12.446, "eval_steps_per_second": 6.223, "step": 20000 } ], "max_steps": 20000, "num_train_epochs": 2, "total_flos": 3.03606909632512e+16, "trial_name": null, "trial_params": null }