{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0926472194908774, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 6.1875, "step": 1 }, { "epoch": 0.0, "eval_exact_match": 9.6809, "eval_exact_match_for_answerability_classification": 17.7692, "eval_exact_match_for_cause_effect_classification": 0.1429, "eval_exact_match_for_coreference_resolution": 13.6429, "eval_exact_match_for_data_to_text": 2.1792, "eval_exact_match_for_dialogue_act_recognition": 22.4286, "eval_exact_match_for_grammar_error_correction": 4.0, "eval_exact_match_for_keyword_tagging": 14.4, "eval_exact_match_for_overlap_extraction": 4.5, "eval_exact_match_for_question_rewriting": 0.4545, "eval_exact_match_for_task020_mctaco_answerability_classification": 35.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 7.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 1.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 9.0, "eval_exact_match_for_task050_multirc_answerability_classification": 40.0, "eval_exact_match_for_task102_commongen_data_to_text": 3.0, "eval_exact_match_for_task1152_bard_word_analogy": 0.0, "eval_exact_match_for_task1153_bard_word_analogy": 2.0, "eval_exact_match_for_task1154_bard_word_analogy": 0.0, "eval_exact_match_for_task1155_bard_word_analogy": 0.0, "eval_exact_match_for_task1156_bard_word_analogy": 5.0, "eval_exact_match_for_task1157_bard_word_analogy": 0.0, "eval_exact_match_for_task1158_bard_word_analogy": 0.0, "eval_exact_match_for_task1159_bard_word_analogy": 4.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 44.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, "eval_exact_match_for_task1387_anli_textual_entailment": 5.0, "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 1.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, "eval_exact_match_for_task1407_dart_data_to_text": 1.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 25.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 0.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 9.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 4.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 0.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 8.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 28.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 4.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 22.0, "eval_exact_match_for_task1659_billsum_title_generation": 0.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 3.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 25.0, "eval_exact_match_for_task200_multinli_textual_entailment": 25.0, "eval_exact_match_for_task201_multinli_textual_entailment": 22.0, "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, "eval_exact_match_for_task219_rocstories_title_generation": 4.0, "eval_exact_match_for_task220_rocstories_title_generation": 75.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 44.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 19.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 1.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, "eval_exact_match_for_task330_gap_coreference_resolution": 8.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 8.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 21.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 0.0, "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 8.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 1.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 0.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 49.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 46.0, "eval_exact_match_for_task743_eurlex_title_generation": 0.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 49.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 0.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 6.0, "eval_exact_match_for_task891_gap_coreference_resolution": 32.0, "eval_exact_match_for_task892_gap_coreference_resolution": 14.0, "eval_exact_match_for_task893_gap_coreference_resolution": 26.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 33.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 6.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, "eval_exact_match_for_textual_entailment": 12.875, "eval_exact_match_for_title_generation": 7.9036, "eval_exact_match_for_word_analogy": 1.375, "eval_f1": 26.8473, "eval_f1_for_answerability_classification": 18.7926, "eval_f1_for_cause_effect_classification": 22.0441, "eval_f1_for_coreference_resolution": 22.0192, "eval_f1_for_data_to_text": 50.8769, "eval_f1_for_dialogue_act_recognition": 25.44, "eval_f1_for_grammar_error_correction": 53.4525, "eval_f1_for_keyword_tagging": 29.8915, "eval_f1_for_overlap_extraction": 31.4115, "eval_f1_for_question_rewriting": 59.7762, "eval_f1_for_task020_mctaco_answerability_classification": 35.1257, "eval_f1_for_task033_winogrande_coreference_resolution": 7.0, "eval_f1_for_task034_winogrande_question_rewriting": 65.8388, "eval_f1_for_task035_winogrande_question_rewriting": 70.0901, "eval_f1_for_task036_qasc_keyword_tagging": 45.6515, "eval_f1_for_task039_qasc_overlap_extraction": 18.1698, "eval_f1_for_task050_multirc_answerability_classification": 40.0, "eval_f1_for_task102_commongen_data_to_text": 61.7543, "eval_f1_for_task1152_bard_word_analogy": 0.0, "eval_f1_for_task1153_bard_word_analogy": 2.6667, "eval_f1_for_task1154_bard_word_analogy": 0.0, "eval_f1_for_task1155_bard_word_analogy": 0.0, "eval_f1_for_task1156_bard_word_analogy": 5.6667, "eval_f1_for_task1157_bard_word_analogy": 0.0, "eval_f1_for_task1158_bard_word_analogy": 0.0, "eval_f1_for_task1159_bard_word_analogy": 4.0, "eval_f1_for_task1161_coda_19_title_generation": 27.4485, "eval_f1_for_task1195_disfl_qa_question_rewriting": 71.5192, "eval_f1_for_task121_atomic_question_rewriting": 47.9722, "eval_f1_for_task133_winowhy_coreference_resolution": 44.3527, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.4092, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 31.0146, "eval_f1_for_task1356_xlsum_title_generation": 10.5431, "eval_f1_for_task1358_xlsum_title_generation": 31.9612, "eval_f1_for_task1385_anli_textual_entailment": 19.6705, "eval_f1_for_task1386_anli_textual_entailment": 3.0615, "eval_f1_for_task1387_anli_textual_entailment": 11.8377, "eval_f1_for_task1388_cb_textual_entailment": 8.219, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, "eval_f1_for_task1391_winogrande_coreference_resolution": 0.0, "eval_f1_for_task1393_copa_cause_effect_classification": 1.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 1.2264, "eval_f1_for_task1407_dart_data_to_text": 41.147, "eval_f1_for_task1409_dart_data_to_text": 53.273, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6352, "eval_f1_for_task1439_doqa_answerability_classification": 0.6086, "eval_f1_for_task1442_doqa_answerability_classification": 2.4198, "eval_f1_for_task1516_imppres_textual_entailment": 25.5689, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 2.9674, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 9.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 4.5, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0755, "eval_f1_for_task1540_peer_read_title_generation": 11.2864, "eval_f1_for_task1554_scitail_textual_entailment": 2.7532, "eval_f1_for_task1557_jfleg_grammar_error_correction": 77.2699, "eval_f1_for_task1562_zest_question_rewriting": 57.2093, "eval_f1_for_task1586_scifact_title_generation": 27.5331, "eval_f1_for_task1598_nyc_data_to_text": 53.4539, "eval_f1_for_task1612_sick_textual_entailment": 3.146, "eval_f1_for_task1615_sick_textual_entailment": 33.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 72.2277, "eval_f1_for_task1624_disfl_qa_answerability_classification": 28.4934, "eval_f1_for_task1631_open_pi_data_to_text": 63.1483, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 22.0256, "eval_f1_for_task1659_billsum_title_generation": 19.7515, "eval_f1_for_task1664_wino_bias_coreference_resolution": 48.2333, "eval_f1_for_task1728_web_nlg_data_to_text": 60.6622, "eval_f1_for_task190_snli_textual_entailment": 2.4744, "eval_f1_for_task199_multinli_textual_entailment": 27.0, "eval_f1_for_task200_multinli_textual_entailment": 25.0, "eval_f1_for_task201_multinli_textual_entailment": 23.204, "eval_f1_for_task202_multinli_textual_entailment": 33.0, "eval_f1_for_task219_rocstories_title_generation": 18.3193, "eval_f1_for_task220_rocstories_title_generation": 75.0803, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.4124, "eval_f1_for_task232_iirc_answerability_classification": 2.9836, "eval_f1_for_task233_iirc_answerability_classification": 1.2321, "eval_f1_for_task242_tweetqa_answerability_classification": 44.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 27.8929, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.6531, "eval_f1_for_task288_gigaword_title_generation": 29.653, "eval_f1_for_task290_tellmewhy_answerability_classification": 4.5868, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.8957, "eval_f1_for_task329_gap_coreference_resolution": 32.3, "eval_f1_for_task330_gap_coreference_resolution": 15.9333, "eval_f1_for_task349_squad2.0_answerability_classification": 8.2353, "eval_f1_for_task362_spolin_dialogue_act_recognition": 40.2778, "eval_f1_for_task391_cod3s_cause_effect_classification": 37.3, "eval_f1_for_task392_cod3s_cause_effect_classification": 23.1896, "eval_f1_for_task393_cod3s_cause_effect_classification": 23.9656, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 7.246, "eval_f1_for_task402_grailqa_question_rewriting": 57.8297, "eval_f1_for_task418_persent_title_generation": 15.8731, "eval_f1_for_task442_com_qa_question_rewriting": 59.4344, "eval_f1_for_task500_scruples_title_generation": 14.4384, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.2205, "eval_f1_for_task520_aquamuse_answerability_classification": 52.1804, "eval_f1_for_task569_recipe_nlg_title_generation": 42.4757, "eval_f1_for_task602_wikitext_title_generation": 5.6369, "eval_f1_for_task613_liar_keyword_tagging": 14.8333, "eval_f1_for_task614_glucose_cause_effect_classification": 38.4533, "eval_f1_for_task619_ohsumed_title_generation": 37.0978, "eval_f1_for_task620_ohsumed_keyword_tagging": 16.7534, "eval_f1_for_task623_ohsumed_keyword_tagging": 2.5712, "eval_f1_for_task640_e_snli_textual_entailment": 1.8613, "eval_f1_for_task641_e_snli_textual_entailment": 0.25, "eval_f1_for_task642_e_snli_textual_entailment": 20.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 69.648, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.4152, "eval_f1_for_task670_ambigqa_question_rewriting": 72.8116, "eval_f1_for_task671_ambigqa_question_rewriting": 51.5913, "eval_f1_for_task677_ollie_data_to_text": 32.3577, "eval_f1_for_task738_perspectrum_textual_entailment": 46.6667, "eval_f1_for_task743_eurlex_title_generation": 21.4068, "eval_f1_for_task760_msr_sqa_data_to_text": 7.7287, "eval_f1_for_task769_qed_title_generation": 66.848, "eval_f1_for_task827_copa_cause_effect_classification": 0.0, "eval_f1_for_task828_copa_cause_effect_classification": 30.4, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 30.0, "eval_f1_for_task890_gwsd_textual_entailment": 7.4124, "eval_f1_for_task891_gap_coreference_resolution": 39.6667, "eval_f1_for_task892_gap_coreference_resolution": 14.0, "eval_f1_for_task893_gap_coreference_resolution": 26.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 34.3333, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 17.4606, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 6.2859, "eval_f1_for_task957_e2e_data_to_text": 52.4371, "eval_f1_for_task970_sherliic_textual_entailment": 0.0, "eval_f1_for_textual_entailment": 16.8822, "eval_f1_for_title_generation": 27.9193, "eval_f1_for_word_analogy": 1.5417, "eval_gen_len": 36.6346, "eval_global_step": 1, "eval_loss": 5.386216163635254, "eval_rouge1": 29.221, "eval_rouge1_for_answerability_classification": 18.7741, "eval_rouge1_for_cause_effect_classification": 27.801, "eval_rouge1_for_coreference_resolution": 22.3337, "eval_rouge1_for_data_to_text": 53.5713, "eval_rouge1_for_dialogue_act_recognition": 26.7607, "eval_rouge1_for_grammar_error_correction": 58.489, "eval_rouge1_for_keyword_tagging": 34.029, "eval_rouge1_for_overlap_extraction": 33.561, "eval_rouge1_for_question_rewriting": 61.5221, "eval_rouge1_for_task020_mctaco_answerability_classification": 35.1247, "eval_rouge1_for_task033_winogrande_coreference_resolution": 7.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 65.9167, "eval_rouge1_for_task035_winogrande_question_rewriting": 70.7371, "eval_rouge1_for_task036_qasc_keyword_tagging": 52.7489, "eval_rouge1_for_task039_qasc_overlap_extraction": 21.6976, "eval_rouge1_for_task050_multirc_answerability_classification": 40.0, "eval_rouge1_for_task102_commongen_data_to_text": 73.5415, "eval_rouge1_for_task1152_bard_word_analogy": 0.0, "eval_rouge1_for_task1153_bard_word_analogy": 2.6667, "eval_rouge1_for_task1154_bard_word_analogy": 0.0, "eval_rouge1_for_task1155_bard_word_analogy": 0.0, "eval_rouge1_for_task1156_bard_word_analogy": 5.6667, "eval_rouge1_for_task1157_bard_word_analogy": 0.0, "eval_rouge1_for_task1158_bard_word_analogy": 0.0, "eval_rouge1_for_task1159_bard_word_analogy": 4.0, "eval_rouge1_for_task1161_coda_19_title_generation": 31.0522, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 72.5426, "eval_rouge1_for_task121_atomic_question_rewriting": 50.3881, "eval_rouge1_for_task133_winowhy_coreference_resolution": 44.3519, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2059, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 34.4477, "eval_rouge1_for_task1356_xlsum_title_generation": 13.3458, "eval_rouge1_for_task1358_xlsum_title_generation": 36.9829, "eval_rouge1_for_task1385_anli_textual_entailment": 19.6705, "eval_rouge1_for_task1386_anli_textual_entailment": 3.057, "eval_rouge1_for_task1387_anli_textual_entailment": 11.7124, "eval_rouge1_for_task1388_cb_textual_entailment": 8.146, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 0.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 1.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.8192, "eval_rouge1_for_task1407_dart_data_to_text": 41.7377, "eval_rouge1_for_task1409_dart_data_to_text": 55.2234, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3096, "eval_rouge1_for_task1439_doqa_answerability_classification": 0.5802, "eval_rouge1_for_task1442_doqa_answerability_classification": 2.2885, "eval_rouge1_for_task1516_imppres_textual_entailment": 25.5236, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 2.9616, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 9.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 4.5, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0615, "eval_rouge1_for_task1540_peer_read_title_generation": 12.8334, "eval_rouge1_for_task1554_scitail_textual_entailment": 2.742, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 80.6684, "eval_rouge1_for_task1562_zest_question_rewriting": 59.8613, "eval_rouge1_for_task1586_scifact_title_generation": 30.1217, "eval_rouge1_for_task1598_nyc_data_to_text": 55.342, "eval_rouge1_for_task1612_sick_textual_entailment": 2.9018, "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.9947, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 28.487, "eval_rouge1_for_task1631_open_pi_data_to_text": 63.3477, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 22.0256, "eval_rouge1_for_task1659_billsum_title_generation": 20.6854, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 48.2333, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.7445, "eval_rouge1_for_task190_snli_textual_entailment": 2.4653, "eval_rouge1_for_task199_multinli_textual_entailment": 27.0, "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, "eval_rouge1_for_task201_multinli_textual_entailment": 23.1438, "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, "eval_rouge1_for_task219_rocstories_title_generation": 23.7494, "eval_rouge1_for_task220_rocstories_title_generation": 75.0803, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.3896, "eval_rouge1_for_task232_iirc_answerability_classification": 2.9714, "eval_rouge1_for_task233_iirc_answerability_classification": 1.2126, "eval_rouge1_for_task242_tweetqa_answerability_classification": 44.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 28.4881, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.4243, "eval_rouge1_for_task288_gigaword_title_generation": 32.0391, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 4.5727, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.5741, "eval_rouge1_for_task329_gap_coreference_resolution": 32.2917, "eval_rouge1_for_task330_gap_coreference_resolution": 15.9333, "eval_rouge1_for_task349_squad2.0_answerability_classification": 8.2324, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 40.2778, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 37.3, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 23.1896, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 24.4822, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 8.4077, "eval_rouge1_for_task402_grailqa_question_rewriting": 59.6859, "eval_rouge1_for_task418_persent_title_generation": 19.4105, "eval_rouge1_for_task442_com_qa_question_rewriting": 62.9625, "eval_rouge1_for_task500_scruples_title_generation": 16.0428, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.6988, "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.1786, "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.0994, "eval_rouge1_for_task602_wikitext_title_generation": 6.3047, "eval_rouge1_for_task613_liar_keyword_tagging": 24.0, "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.9021, "eval_rouge1_for_task619_ohsumed_title_generation": 39.8197, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 19.7677, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 2.5283, "eval_rouge1_for_task640_e_snli_textual_entailment": 1.8571, "eval_rouge1_for_task641_e_snli_textual_entailment": 0.25, "eval_rouge1_for_task642_e_snli_textual_entailment": 20.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 71.1004, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 24.3916, "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.7524, "eval_rouge1_for_task671_ambigqa_question_rewriting": 52.4544, "eval_rouge1_for_task677_ollie_data_to_text": 35.0477, "eval_rouge1_for_task738_perspectrum_textual_entailment": 46.6667, "eval_rouge1_for_task743_eurlex_title_generation": 22.6371, "eval_rouge1_for_task760_msr_sqa_data_to_text": 8.3794, "eval_rouge1_for_task769_qed_title_generation": 67.3846, "eval_rouge1_for_task827_copa_cause_effect_classification": 33.3333, "eval_rouge1_for_task828_copa_cause_effect_classification": 30.4, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 7.4124, "eval_rouge1_for_task891_gap_coreference_resolution": 39.6667, "eval_rouge1_for_task892_gap_coreference_resolution": 14.0, "eval_rouge1_for_task893_gap_coreference_resolution": 26.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 37.3333, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 45.4584, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 38.2781, "eval_rouge1_for_task957_e2e_data_to_text": 53.336, "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, "eval_rouge1_for_textual_entailment": 21.6769, "eval_rouge1_for_title_generation": 30.016, "eval_rouge1_for_word_analogy": 1.5417, "eval_rougeL": 27.7552, "eval_rougeL_for_answerability_classification": 18.7741, "eval_rougeL_for_cause_effect_classification": 26.6129, "eval_rougeL_for_coreference_resolution": 22.075, "eval_rougeL_for_data_to_text": 46.0381, "eval_rougeL_for_dialogue_act_recognition": 26.7508, "eval_rougeL_for_grammar_error_correction": 57.621, "eval_rougeL_for_keyword_tagging": 32.9357, "eval_rougeL_for_overlap_extraction": 32.9762, "eval_rougeL_for_question_rewriting": 57.3623, "eval_rougeL_for_task020_mctaco_answerability_classification": 35.1247, "eval_rougeL_for_task033_winogrande_coreference_resolution": 7.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 65.2165, "eval_rougeL_for_task035_winogrande_question_rewriting": 66.8884, "eval_rougeL_for_task036_qasc_keyword_tagging": 50.123, "eval_rougeL_for_task039_qasc_overlap_extraction": 21.6976, "eval_rougeL_for_task050_multirc_answerability_classification": 40.0, "eval_rougeL_for_task102_commongen_data_to_text": 65.5943, "eval_rougeL_for_task1152_bard_word_analogy": 0.0, "eval_rougeL_for_task1153_bard_word_analogy": 2.6667, "eval_rougeL_for_task1154_bard_word_analogy": 0.0, "eval_rougeL_for_task1155_bard_word_analogy": 0.0, "eval_rougeL_for_task1156_bard_word_analogy": 5.6667, "eval_rougeL_for_task1157_bard_word_analogy": 0.0, "eval_rougeL_for_task1158_bard_word_analogy": 0.0, "eval_rougeL_for_task1159_bard_word_analogy": 4.0, "eval_rougeL_for_task1161_coda_19_title_generation": 24.7961, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 70.099, "eval_rougeL_for_task121_atomic_question_rewriting": 45.0941, "eval_rougeL_for_task133_winowhy_coreference_resolution": 44.3519, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.198, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 30.1515, "eval_rougeL_for_task1356_xlsum_title_generation": 11.263, "eval_rougeL_for_task1358_xlsum_title_generation": 30.8987, "eval_rougeL_for_task1385_anli_textual_entailment": 19.6705, "eval_rougeL_for_task1386_anli_textual_entailment": 3.057, "eval_rougeL_for_task1387_anli_textual_entailment": 11.7124, "eval_rougeL_for_task1388_cb_textual_entailment": 8.146, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 21.3333, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 0.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 1.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.7494, "eval_rougeL_for_task1407_dart_data_to_text": 37.8486, "eval_rougeL_for_task1409_dart_data_to_text": 46.5167, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9619, "eval_rougeL_for_task1439_doqa_answerability_classification": 0.5802, "eval_rougeL_for_task1442_doqa_answerability_classification": 2.2885, "eval_rougeL_for_task1516_imppres_textual_entailment": 25.5236, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 2.9616, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 9.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 4.5, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0615, "eval_rougeL_for_task1540_peer_read_title_generation": 11.0377, "eval_rougeL_for_task1554_scitail_textual_entailment": 2.742, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 79.2801, "eval_rougeL_for_task1562_zest_question_rewriting": 51.8325, "eval_rougeL_for_task1586_scifact_title_generation": 24.4521, "eval_rougeL_for_task1598_nyc_data_to_text": 41.7464, "eval_rougeL_for_task1612_sick_textual_entailment": 2.9018, "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 72.2453, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 28.487, "eval_rougeL_for_task1631_open_pi_data_to_text": 61.961, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 22.0256, "eval_rougeL_for_task1659_billsum_title_generation": 17.2232, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 45.0429, "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.5739, "eval_rougeL_for_task190_snli_textual_entailment": 2.4653, "eval_rougeL_for_task199_multinli_textual_entailment": 27.0, "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, "eval_rougeL_for_task201_multinli_textual_entailment": 23.1438, "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.5272, "eval_rougeL_for_task220_rocstories_title_generation": 75.0803, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 2.3896, "eval_rougeL_for_task232_iirc_answerability_classification": 2.9714, "eval_rougeL_for_task233_iirc_answerability_classification": 1.2126, "eval_rougeL_for_task242_tweetqa_answerability_classification": 44.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 28.2024, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.2548, "eval_rougeL_for_task288_gigaword_title_generation": 28.4022, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 4.5727, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.5741, "eval_rougeL_for_task329_gap_coreference_resolution": 32.2917, "eval_rougeL_for_task330_gap_coreference_resolution": 15.9333, "eval_rougeL_for_task349_squad2.0_answerability_classification": 8.2324, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 40.2778, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 37.3, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 23.1896, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 22.3364, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 8.4077, "eval_rougeL_for_task402_grailqa_question_rewriting": 51.2002, "eval_rougeL_for_task418_persent_title_generation": 17.0545, "eval_rougeL_for_task442_com_qa_question_rewriting": 55.7407, "eval_rougeL_for_task500_scruples_title_generation": 14.1112, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.1171, "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.1786, "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.3899, "eval_rougeL_for_task602_wikitext_title_generation": 6.22, "eval_rougeL_for_task613_liar_keyword_tagging": 24.0, "eval_rougeL_for_task614_glucose_cause_effect_classification": 38.7309, "eval_rougeL_for_task619_ohsumed_title_generation": 34.8616, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 19.1267, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 2.5283, "eval_rougeL_for_task640_e_snli_textual_entailment": 1.8571, "eval_rougeL_for_task641_e_snli_textual_entailment": 0.25, "eval_rougeL_for_task642_e_snli_textual_entailment": 20.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 68.9004, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 24.2455, "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.6867, "eval_rougeL_for_task671_ambigqa_question_rewriting": 50.8308, "eval_rougeL_for_task677_ollie_data_to_text": 28.8542, "eval_rougeL_for_task738_perspectrum_textual_entailment": 46.6667, "eval_rougeL_for_task743_eurlex_title_generation": 19.2719, "eval_rougeL_for_task760_msr_sqa_data_to_text": 7.2901, "eval_rougeL_for_task769_qed_title_generation": 67.3637, "eval_rougeL_for_task827_copa_cause_effect_classification": 33.3333, "eval_rougeL_for_task828_copa_cause_effect_classification": 30.4, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 7.4124, "eval_rougeL_for_task891_gap_coreference_resolution": 39.6667, "eval_rougeL_for_task892_gap_coreference_resolution": 14.0, "eval_rougeL_for_task893_gap_coreference_resolution": 26.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 37.3333, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 45.4584, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 38.2781, "eval_rougeL_for_task957_e2e_data_to_text": 42.2841, "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, "eval_rougeL_for_textual_entailment": 21.6769, "eval_rougeL_for_title_generation": 27.4256, "eval_rougeL_for_word_analogy": 1.5417, "eval_runtime": 2851.9569, "eval_samples_per_second": 4.176, "eval_steps_per_second": 0.261, "step": 1 }, { "epoch": 0.01, "learning_rate": 5e-05, "loss": 1.7618, "step": 50 }, { "epoch": 0.01, "eval_exact_match": 29.2024, "eval_exact_match_for_answerability_classification": 49.8462, "eval_exact_match_for_cause_effect_classification": 35.7143, "eval_exact_match_for_coreference_resolution": 38.2857, "eval_exact_match_for_data_to_text": 7.6271, "eval_exact_match_for_dialogue_act_recognition": 45.4286, "eval_exact_match_for_grammar_error_correction": 9.0, "eval_exact_match_for_keyword_tagging": 43.4, "eval_exact_match_for_overlap_extraction": 10.5, "eval_exact_match_for_question_rewriting": 1.8182, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 21.0, "eval_exact_match_for_task1153_bard_word_analogy": 25.0, "eval_exact_match_for_task1154_bard_word_analogy": 16.0, "eval_exact_match_for_task1155_bard_word_analogy": 50.0, "eval_exact_match_for_task1156_bard_word_analogy": 46.0, "eval_exact_match_for_task1157_bard_word_analogy": 55.0, "eval_exact_match_for_task1158_bard_word_analogy": 23.0, "eval_exact_match_for_task1159_bard_word_analogy": 17.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 57.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 18.0, "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 56.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 1.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 30.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, "eval_exact_match_for_task219_rocstories_title_generation": 7.0, "eval_exact_match_for_task220_rocstories_title_generation": 75.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 41.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 8.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 18.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 25.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 11.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 76.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, "eval_exact_match_for_task893_gap_coreference_resolution": 37.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 45.0, "eval_exact_match_for_textual_entailment": 39.2917, "eval_exact_match_for_title_generation": 10.7063, "eval_exact_match_for_word_analogy": 31.625, "eval_f1": 46.0933, "eval_f1_for_answerability_classification": 52.4103, "eval_f1_for_cause_effect_classification": 55.7142, "eval_f1_for_coreference_resolution": 45.1074, "eval_f1_for_data_to_text": 53.8438, "eval_f1_for_dialogue_act_recognition": 49.0, "eval_f1_for_grammar_error_correction": 56.9407, "eval_f1_for_keyword_tagging": 55.3024, "eval_f1_for_overlap_extraction": 34.4972, "eval_f1_for_question_rewriting": 66.5207, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 42.0, "eval_f1_for_task034_winogrande_question_rewriting": 72.5511, "eval_f1_for_task035_winogrande_question_rewriting": 83.3835, "eval_f1_for_task036_qasc_keyword_tagging": 70.7167, "eval_f1_for_task039_qasc_overlap_extraction": 24.3333, "eval_f1_for_task050_multirc_answerability_classification": 50.0, "eval_f1_for_task102_commongen_data_to_text": 56.0761, "eval_f1_for_task1152_bard_word_analogy": 21.0, "eval_f1_for_task1153_bard_word_analogy": 25.0, "eval_f1_for_task1154_bard_word_analogy": 16.0, "eval_f1_for_task1155_bard_word_analogy": 50.0, "eval_f1_for_task1156_bard_word_analogy": 46.0, "eval_f1_for_task1157_bard_word_analogy": 55.0, "eval_f1_for_task1158_bard_word_analogy": 23.0, "eval_f1_for_task1159_bard_word_analogy": 17.0, "eval_f1_for_task1161_coda_19_title_generation": 28.7181, "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.1539, "eval_f1_for_task121_atomic_question_rewriting": 49.0983, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.4245, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 37.5067, "eval_f1_for_task1356_xlsum_title_generation": 14.2003, "eval_f1_for_task1358_xlsum_title_generation": 35.9439, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 34.0, "eval_f1_for_task1387_anli_textual_entailment": 33.0, "eval_f1_for_task1388_cb_textual_entailment": 20.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 57.0, "eval_f1_for_task1407_dart_data_to_text": 40.7465, "eval_f1_for_task1409_dart_data_to_text": 53.2023, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.3148, "eval_f1_for_task1439_doqa_answerability_classification": 49.0, "eval_f1_for_task1442_doqa_answerability_classification": 51.0, "eval_f1_for_task1516_imppres_textual_entailment": 33.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 35.7754, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5666, "eval_f1_for_task1562_zest_question_rewriting": 50.1974, "eval_f1_for_task1586_scifact_title_generation": 31.1942, "eval_f1_for_task1598_nyc_data_to_text": 47.8929, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 33.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.9193, "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_f1_for_task1631_open_pi_data_to_text": 94.9709, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 37.7866, "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.9524, "eval_f1_for_task1728_web_nlg_data_to_text": 57.9505, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 34.0, "eval_f1_for_task201_multinli_textual_entailment": 34.0, "eval_f1_for_task202_multinli_textual_entailment": 33.0, "eval_f1_for_task219_rocstories_title_generation": 19.6216, "eval_f1_for_task220_rocstories_title_generation": 75.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_f1_for_task232_iirc_answerability_classification": 49.0, "eval_f1_for_task233_iirc_answerability_classification": 51.0, "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.2048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 44.661, "eval_f1_for_task288_gigaword_title_generation": 32.4417, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 9.3333, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 67.0905, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 32.5551, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.6667, "eval_f1_for_task402_grailqa_question_rewriting": 82.319, "eval_f1_for_task418_persent_title_generation": 25.4611, "eval_f1_for_task442_com_qa_question_rewriting": 68.6107, "eval_f1_for_task500_scruples_title_generation": 18.7237, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.9776, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 43.7596, "eval_f1_for_task602_wikitext_title_generation": 13.2755, "eval_f1_for_task613_liar_keyword_tagging": 18.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 40.7774, "eval_f1_for_task619_ohsumed_title_generation": 37.9964, "eval_f1_for_task620_ohsumed_keyword_tagging": 46.0, "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_f1_for_task640_e_snli_textual_entailment": 30.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 48.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.1286, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 28.7556, "eval_f1_for_task670_ambigqa_question_rewriting": 72.5278, "eval_f1_for_task671_ambigqa_question_rewriting": 58.4596, "eval_f1_for_task677_ollie_data_to_text": 35.334, "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, "eval_f1_for_task743_eurlex_title_generation": 29.0484, "eval_f1_for_task760_msr_sqa_data_to_text": 7.1889, "eval_f1_for_task769_qed_title_generation": 82.7863, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 50.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_f1_for_task890_gwsd_textual_entailment": 34.0, "eval_f1_for_task891_gap_coreference_resolution": 58.5, "eval_f1_for_task892_gap_coreference_resolution": 49.0, "eval_f1_for_task893_gap_coreference_resolution": 37.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_f1_for_task957_e2e_data_to_text": 56.7074, "eval_f1_for_task970_sherliic_textual_entailment": 45.0, "eval_f1_for_textual_entailment": 39.2917, "eval_f1_for_title_generation": 34.3055, "eval_f1_for_word_analogy": 31.625, "eval_gen_len": 10.3191, "eval_global_step": 50, "eval_loss": 1.0946542024612427, "eval_rouge1": 48.0335, "eval_rouge1_for_answerability_classification": 52.4103, "eval_rouge1_for_cause_effect_classification": 56.8007, "eval_rouge1_for_coreference_resolution": 46.1912, "eval_rouge1_for_data_to_text": 56.8368, "eval_rouge1_for_dialogue_act_recognition": 53.7551, "eval_rouge1_for_grammar_error_correction": 61.924, "eval_rouge1_for_keyword_tagging": 60.029, "eval_rouge1_for_overlap_extraction": 38.4496, "eval_rouge1_for_question_rewriting": 68.2683, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 46.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 72.5731, "eval_rouge1_for_task035_winogrande_question_rewriting": 84.4531, "eval_rouge1_for_task036_qasc_keyword_tagging": 77.8833, "eval_rouge1_for_task039_qasc_overlap_extraction": 31.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.8629, "eval_rouge1_for_task1152_bard_word_analogy": 21.0, "eval_rouge1_for_task1153_bard_word_analogy": 25.0, "eval_rouge1_for_task1154_bard_word_analogy": 16.0, "eval_rouge1_for_task1155_bard_word_analogy": 50.0, "eval_rouge1_for_task1156_bard_word_analogy": 46.0, "eval_rouge1_for_task1157_bard_word_analogy": 55.0, "eval_rouge1_for_task1158_bard_word_analogy": 23.0, "eval_rouge1_for_task1159_bard_word_analogy": 17.0, "eval_rouge1_for_task1161_coda_19_title_generation": 32.7158, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.9537, "eval_rouge1_for_task121_atomic_question_rewriting": 51.3883, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.9195, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 40.8293, "eval_rouge1_for_task1356_xlsum_title_generation": 17.864, "eval_rouge1_for_task1358_xlsum_title_generation": 42.0183, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.2857, "eval_rouge1_for_task1407_dart_data_to_text": 41.8804, "eval_rouge1_for_task1409_dart_data_to_text": 54.0022, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9918, "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 38.6926, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.8562, "eval_rouge1_for_task1562_zest_question_rewriting": 53.3238, "eval_rouge1_for_task1586_scifact_title_generation": 35.0911, "eval_rouge1_for_task1598_nyc_data_to_text": 50.5792, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.6169, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.2173, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.2031, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.9524, "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.5793, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, "eval_rouge1_for_task219_rocstories_title_generation": 27.1041, "eval_rouge1_for_task220_rocstories_title_generation": 75.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, "eval_rouge1_for_task233_iirc_answerability_classification": 51.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.05, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 45.5658, "eval_rouge1_for_task288_gigaword_title_generation": 35.0351, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.3333, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.019, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.3793, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.3333, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.8019, "eval_rouge1_for_task418_persent_title_generation": 28.752, "eval_rouge1_for_task442_com_qa_question_rewriting": 71.8318, "eval_rouge1_for_task500_scruples_title_generation": 20.4716, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.7422, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.9778, "eval_rouge1_for_task602_wikitext_title_generation": 13.8177, "eval_rouge1_for_task613_liar_keyword_tagging": 31.8333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 47.5587, "eval_rouge1_for_task619_ohsumed_title_generation": 41.0394, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.3, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.1286, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 28.6556, "eval_rouge1_for_task670_ambigqa_question_rewriting": 73.8098, "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.3699, "eval_rouge1_for_task677_ollie_data_to_text": 37.9904, "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, "eval_rouge1_for_task743_eurlex_title_generation": 30.5236, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.6401, "eval_rouge1_for_task769_qed_title_generation": 83.1529, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, "eval_rouge1_for_task891_gap_coreference_resolution": 58.3333, "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, "eval_rouge1_for_task893_gap_coreference_resolution": 37.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.374, "eval_rouge1_for_task970_sherliic_textual_entailment": 45.0, "eval_rouge1_for_textual_entailment": 41.1528, "eval_rouge1_for_title_generation": 36.8223, "eval_rouge1_for_word_analogy": 31.625, "eval_rougeL": 46.5269, "eval_rougeL_for_answerability_classification": 52.4103, "eval_rougeL_for_cause_effect_classification": 55.9625, "eval_rougeL_for_coreference_resolution": 46.1912, "eval_rougeL_for_data_to_text": 48.4114, "eval_rougeL_for_dialogue_act_recognition": 53.7551, "eval_rougeL_for_grammar_error_correction": 61.2403, "eval_rougeL_for_keyword_tagging": 59.5624, "eval_rougeL_for_overlap_extraction": 37.8964, "eval_rougeL_for_question_rewriting": 64.1442, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 46.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 72.4352, "eval_rougeL_for_task035_winogrande_question_rewriting": 82.9875, "eval_rougeL_for_task036_qasc_keyword_tagging": 77.55, "eval_rougeL_for_task039_qasc_overlap_extraction": 31.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, "eval_rougeL_for_task102_commongen_data_to_text": 59.682, "eval_rougeL_for_task1152_bard_word_analogy": 21.0, "eval_rougeL_for_task1153_bard_word_analogy": 25.0, "eval_rougeL_for_task1154_bard_word_analogy": 16.0, "eval_rougeL_for_task1155_bard_word_analogy": 50.0, "eval_rougeL_for_task1156_bard_word_analogy": 46.0, "eval_rougeL_for_task1157_bard_word_analogy": 55.0, "eval_rougeL_for_task1158_bard_word_analogy": 23.0, "eval_rougeL_for_task1159_bard_word_analogy": 17.0, "eval_rougeL_for_task1161_coda_19_title_generation": 25.4434, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.0012, "eval_rougeL_for_task121_atomic_question_rewriting": 46.9653, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.3696, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 37.1878, "eval_rougeL_for_task1356_xlsum_title_generation": 14.9765, "eval_rougeL_for_task1358_xlsum_title_generation": 35.0009, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.2857, "eval_rougeL_for_task1407_dart_data_to_text": 33.4385, "eval_rougeL_for_task1409_dart_data_to_text": 44.023, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.4892, "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 36.0262, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9913, "eval_rougeL_for_task1562_zest_question_rewriting": 47.4892, "eval_rougeL_for_task1586_scifact_title_generation": 27.9526, "eval_rougeL_for_task1598_nyc_data_to_text": 37.8924, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.8567, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0071, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 33.7868, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.9524, "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.9496, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, "eval_rougeL_for_task219_rocstories_title_generation": 27.1041, "eval_rougeL_for_task220_rocstories_title_generation": 75.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, "eval_rougeL_for_task233_iirc_answerability_classification": 51.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.05, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.4595, "eval_rougeL_for_task288_gigaword_title_generation": 30.3075, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.3333, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.019, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.766, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.3333, "eval_rougeL_for_task402_grailqa_question_rewriting": 68.5633, "eval_rougeL_for_task418_persent_title_generation": 24.6613, "eval_rougeL_for_task442_com_qa_question_rewriting": 66.3295, "eval_rougeL_for_task500_scruples_title_generation": 18.7792, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.3418, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.4078, "eval_rougeL_for_task602_wikitext_title_generation": 13.8177, "eval_rougeL_for_task613_liar_keyword_tagging": 31.8333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.3048, "eval_rougeL_for_task619_ohsumed_title_generation": 36.784, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.7, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.7286, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 28.6556, "eval_rougeL_for_task670_ambigqa_question_rewriting": 71.5592, "eval_rougeL_for_task671_ambigqa_question_rewriting": 57.2108, "eval_rougeL_for_task677_ollie_data_to_text": 30.4476, "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, "eval_rougeL_for_task743_eurlex_title_generation": 27.4044, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.5213, "eval_rougeL_for_task769_qed_title_generation": 83.1529, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, "eval_rougeL_for_task891_gap_coreference_resolution": 58.3333, "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, "eval_rougeL_for_task893_gap_coreference_resolution": 37.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 70.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 58.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.7428, "eval_rougeL_for_task970_sherliic_textual_entailment": 45.0, "eval_rougeL_for_textual_entailment": 41.1528, "eval_rougeL_for_title_generation": 33.8064, "eval_rougeL_for_word_analogy": 31.625, "eval_runtime": 1035.4211, "eval_samples_per_second": 11.503, "eval_steps_per_second": 0.72, "step": 50 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 1.2952, "step": 100 }, { "epoch": 0.02, "eval_exact_match": 27.8002, "eval_exact_match_for_answerability_classification": 42.3846, "eval_exact_match_for_cause_effect_classification": 35.7143, "eval_exact_match_for_coreference_resolution": 36.5714, "eval_exact_match_for_data_to_text": 6.7797, "eval_exact_match_for_dialogue_act_recognition": 46.7143, "eval_exact_match_for_grammar_error_correction": 7.5, "eval_exact_match_for_keyword_tagging": 38.8, "eval_exact_match_for_overlap_extraction": 11.5, "eval_exact_match_for_question_rewriting": 1.7273, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 43.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 38.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 20.0, "eval_exact_match_for_task1153_bard_word_analogy": 21.0, "eval_exact_match_for_task1154_bard_word_analogy": 17.0, "eval_exact_match_for_task1155_bard_word_analogy": 63.0, "eval_exact_match_for_task1156_bard_word_analogy": 36.0, "eval_exact_match_for_task1157_bard_word_analogy": 51.0, "eval_exact_match_for_task1158_bard_word_analogy": 18.0, "eval_exact_match_for_task1159_bard_word_analogy": 17.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 40.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 54.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 0.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 4.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 40.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 27.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 49.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, "eval_exact_match_for_task219_rocstories_title_generation": 12.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_exact_match_for_task232_iirc_answerability_classification": 2.0, "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 46.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 16.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 12.0, "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 32.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 81.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 7.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 71.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 44.0, "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, "eval_exact_match_for_task893_gap_coreference_resolution": 48.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, "eval_exact_match_for_textual_entailment": 39.5, "eval_exact_match_for_title_generation": 9.6973, "eval_exact_match_for_word_analogy": 30.375, "eval_f1": 44.6228, "eval_f1_for_answerability_classification": 44.8974, "eval_f1_for_cause_effect_classification": 54.8739, "eval_f1_for_coreference_resolution": 42.1175, "eval_f1_for_data_to_text": 54.0899, "eval_f1_for_dialogue_act_recognition": 50.3571, "eval_f1_for_grammar_error_correction": 56.8646, "eval_f1_for_keyword_tagging": 50.5376, "eval_f1_for_overlap_extraction": 34.0076, "eval_f1_for_question_rewriting": 68.4843, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 44.6667, "eval_f1_for_task034_winogrande_question_rewriting": 73.8096, "eval_f1_for_task035_winogrande_question_rewriting": 86.1814, "eval_f1_for_task036_qasc_keyword_tagging": 66.031, "eval_f1_for_task039_qasc_overlap_extraction": 28.3333, "eval_f1_for_task050_multirc_answerability_classification": 49.0, "eval_f1_for_task102_commongen_data_to_text": 55.65, "eval_f1_for_task1152_bard_word_analogy": 20.0, "eval_f1_for_task1153_bard_word_analogy": 21.0, "eval_f1_for_task1154_bard_word_analogy": 17.0, "eval_f1_for_task1155_bard_word_analogy": 63.0, "eval_f1_for_task1156_bard_word_analogy": 36.6667, "eval_f1_for_task1157_bard_word_analogy": 51.0, "eval_f1_for_task1158_bard_word_analogy": 18.0, "eval_f1_for_task1159_bard_word_analogy": 17.0, "eval_f1_for_task1161_coda_19_title_generation": 26.0095, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.9955, "eval_f1_for_task121_atomic_question_rewriting": 50.9824, "eval_f1_for_task133_winowhy_coreference_resolution": 40.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0786, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.2366, "eval_f1_for_task1356_xlsum_title_generation": 13.5849, "eval_f1_for_task1358_xlsum_title_generation": 35.3944, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 34.0, "eval_f1_for_task1387_anli_textual_entailment": 35.0, "eval_f1_for_task1388_cb_textual_entailment": 20.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 54.0, "eval_f1_for_task1407_dart_data_to_text": 42.8334, "eval_f1_for_task1409_dart_data_to_text": 51.4929, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.979, "eval_f1_for_task1439_doqa_answerability_classification": 50.0, "eval_f1_for_task1442_doqa_answerability_classification": 50.0, "eval_f1_for_task1516_imppres_textual_entailment": 35.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 35.4504, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7502, "eval_f1_for_task1562_zest_question_rewriting": 51.8576, "eval_f1_for_task1586_scifact_title_generation": 28.3939, "eval_f1_for_task1598_nyc_data_to_text": 52.4526, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 40.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5016, "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_f1_for_task1631_open_pi_data_to_text": 93.0537, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 37.1482, "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.4159, "eval_f1_for_task1728_web_nlg_data_to_text": 58.1463, "eval_f1_for_task190_snli_textual_entailment": 49.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 34.0, "eval_f1_for_task201_multinli_textual_entailment": 34.0, "eval_f1_for_task202_multinli_textual_entailment": 33.0, "eval_f1_for_task219_rocstories_title_generation": 24.9079, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_f1_for_task232_iirc_answerability_classification": 2.0, "eval_f1_for_task233_iirc_answerability_classification": 0.0, "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.3048, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 39.6819, "eval_f1_for_task288_gigaword_title_generation": 30.1867, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_f1_for_task329_gap_coreference_resolution": 33.0, "eval_f1_for_task330_gap_coreference_resolution": 66.2524, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.9156, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 17.3333, "eval_f1_for_task402_grailqa_question_rewriting": 82.5101, "eval_f1_for_task418_persent_title_generation": 23.4526, "eval_f1_for_task442_com_qa_question_rewriting": 71.2501, "eval_f1_for_task500_scruples_title_generation": 14.3921, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.3334, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 45.3202, "eval_f1_for_task602_wikitext_title_generation": 16.1325, "eval_f1_for_task613_liar_keyword_tagging": 13.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 37.535, "eval_f1_for_task619_ohsumed_title_generation": 40.1032, "eval_f1_for_task620_ohsumed_keyword_tagging": 33.8, "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_f1_for_task640_e_snli_textual_entailment": 32.0, "eval_f1_for_task641_e_snli_textual_entailment": 34.0, "eval_f1_for_task642_e_snli_textual_entailment": 50.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 89.5238, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 1.7524, "eval_f1_for_task670_ambigqa_question_rewriting": 75.4903, "eval_f1_for_task671_ambigqa_question_rewriting": 62.5126, "eval_f1_for_task677_ollie_data_to_text": 34.8555, "eval_f1_for_task738_perspectrum_textual_entailment": 7.0, "eval_f1_for_task743_eurlex_title_generation": 27.9732, "eval_f1_for_task760_msr_sqa_data_to_text": 5.6103, "eval_f1_for_task769_qed_title_generation": 78.1585, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 50.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_f1_for_task890_gwsd_textual_entailment": 44.0, "eval_f1_for_task891_gap_coreference_resolution": 53.919, "eval_f1_for_task892_gap_coreference_resolution": 50.0, "eval_f1_for_task893_gap_coreference_resolution": 48.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 56.8398, "eval_f1_for_task970_sherliic_textual_entailment": 50.0, "eval_f1_for_textual_entailment": 39.5, "eval_f1_for_title_generation": 32.4237, "eval_f1_for_word_analogy": 30.4583, "eval_gen_len": 10.7377, "eval_global_step": 100, "eval_loss": 1.0964491367340088, "eval_rouge1": 46.6235, "eval_rouge1_for_answerability_classification": 44.8974, "eval_rouge1_for_cause_effect_classification": 55.9809, "eval_rouge1_for_coreference_resolution": 42.8074, "eval_rouge1_for_data_to_text": 57.3528, "eval_rouge1_for_dialogue_act_recognition": 51.6313, "eval_rouge1_for_grammar_error_correction": 61.8388, "eval_rouge1_for_keyword_tagging": 55.57, "eval_rouge1_for_overlap_extraction": 36.388, "eval_rouge1_for_question_rewriting": 70.1195, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.1667, "eval_rouge1_for_task034_winogrande_question_rewriting": 73.8198, "eval_rouge1_for_task035_winogrande_question_rewriting": 86.9038, "eval_rouge1_for_task036_qasc_keyword_tagging": 71.25, "eval_rouge1_for_task039_qasc_overlap_extraction": 32.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.8998, "eval_rouge1_for_task1152_bard_word_analogy": 21.0, "eval_rouge1_for_task1153_bard_word_analogy": 21.0, "eval_rouge1_for_task1154_bard_word_analogy": 17.0, "eval_rouge1_for_task1155_bard_word_analogy": 63.0, "eval_rouge1_for_task1156_bard_word_analogy": 36.6667, "eval_rouge1_for_task1157_bard_word_analogy": 51.0, "eval_rouge1_for_task1158_bard_word_analogy": 18.0, "eval_rouge1_for_task1159_bard_word_analogy": 17.0, "eval_rouge1_for_task1161_coda_19_title_generation": 29.6399, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2653, "eval_rouge1_for_task121_atomic_question_rewriting": 53.2166, "eval_rouge1_for_task133_winowhy_coreference_resolution": 40.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5257, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.7105, "eval_rouge1_for_task1356_xlsum_title_generation": 16.6282, "eval_rouge1_for_task1358_xlsum_title_generation": 40.3165, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 55.5857, "eval_rouge1_for_task1407_dart_data_to_text": 44.7196, "eval_rouge1_for_task1409_dart_data_to_text": 52.8526, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.7038, "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 38.0099, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9738, "eval_rouge1_for_task1562_zest_question_rewriting": 54.9184, "eval_rouge1_for_task1586_scifact_title_generation": 31.8055, "eval_rouge1_for_task1598_nyc_data_to_text": 55.1983, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 80.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.8334, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 93.2754, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.8318, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.4159, "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.7108, "eval_rouge1_for_task190_snli_textual_entailment": 49.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, "eval_rouge1_for_task219_rocstories_title_generation": 29.8651, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rouge1_for_task232_iirc_answerability_classification": 2.0, "eval_rouge1_for_task233_iirc_answerability_classification": 0.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.15, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 40.4427, "eval_rouge1_for_task288_gigaword_title_generation": 32.3759, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, "eval_rouge1_for_task330_gap_coreference_resolution": 66.7524, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.393, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 23.3333, "eval_rouge1_for_task402_grailqa_question_rewriting": 85.0657, "eval_rouge1_for_task418_persent_title_generation": 26.0799, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6724, "eval_rouge1_for_task500_scruples_title_generation": 15.6704, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.9497, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 46.5651, "eval_rouge1_for_task602_wikitext_title_generation": 16.6334, "eval_rouge1_for_task613_liar_keyword_tagging": 26.6667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 44.8068, "eval_rouge1_for_task619_ohsumed_title_generation": 43.8459, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.4333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 32.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 90.5, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 1.7333, "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.2163, "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.6918, "eval_rouge1_for_task677_ollie_data_to_text": 37.9316, "eval_rouge1_for_task738_perspectrum_textual_entailment": 55.0, "eval_rouge1_for_task743_eurlex_title_generation": 29.5426, "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.8599, "eval_rouge1_for_task769_qed_title_generation": 77.9585, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 44.0, "eval_rouge1_for_task891_gap_coreference_resolution": 53.7524, "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, "eval_rouge1_for_task893_gap_coreference_resolution": 48.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.6221, "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, "eval_rouge1_for_textual_entailment": 43.1667, "eval_rouge1_for_title_generation": 34.5618, "eval_rouge1_for_word_analogy": 30.5833, "eval_rougeL": 45.1592, "eval_rougeL_for_answerability_classification": 44.8974, "eval_rougeL_for_cause_effect_classification": 55.0944, "eval_rougeL_for_coreference_resolution": 42.8074, "eval_rougeL_for_data_to_text": 48.8064, "eval_rougeL_for_dialogue_act_recognition": 51.6313, "eval_rougeL_for_grammar_error_correction": 61.0773, "eval_rougeL_for_keyword_tagging": 55.1167, "eval_rougeL_for_overlap_extraction": 35.9003, "eval_rougeL_for_question_rewriting": 66.2438, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.1667, "eval_rougeL_for_task034_winogrande_question_rewriting": 73.8198, "eval_rougeL_for_task035_winogrande_question_rewriting": 85.8189, "eval_rougeL_for_task036_qasc_keyword_tagging": 70.6833, "eval_rougeL_for_task039_qasc_overlap_extraction": 32.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, "eval_rougeL_for_task102_commongen_data_to_text": 57.4588, "eval_rougeL_for_task1152_bard_word_analogy": 21.0, "eval_rougeL_for_task1153_bard_word_analogy": 21.0, "eval_rougeL_for_task1154_bard_word_analogy": 17.0, "eval_rougeL_for_task1155_bard_word_analogy": 63.0, "eval_rougeL_for_task1156_bard_word_analogy": 36.6667, "eval_rougeL_for_task1157_bard_word_analogy": 51.0, "eval_rougeL_for_task1158_bard_word_analogy": 18.0, "eval_rougeL_for_task1159_bard_word_analogy": 17.0, "eval_rougeL_for_task1161_coda_19_title_generation": 23.5262, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9782, "eval_rougeL_for_task121_atomic_question_rewriting": 48.7308, "eval_rougeL_for_task133_winowhy_coreference_resolution": 40.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.9758, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.4191, "eval_rougeL_for_task1356_xlsum_title_generation": 13.624, "eval_rougeL_for_task1358_xlsum_title_generation": 33.5237, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 55.5857, "eval_rougeL_for_task1407_dart_data_to_text": 35.374, "eval_rougeL_for_task1409_dart_data_to_text": 44.2283, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.2046, "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 35.4738, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.95, "eval_rougeL_for_task1562_zest_question_rewriting": 49.1408, "eval_rougeL_for_task1586_scifact_title_generation": 25.3767, "eval_rougeL_for_task1598_nyc_data_to_text": 43.4539, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 80.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.247, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 92.9897, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 33.672, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.4159, "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.2437, "eval_rougeL_for_task190_snli_textual_entailment": 49.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, "eval_rougeL_for_task219_rocstories_title_generation": 29.8651, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rougeL_for_task232_iirc_answerability_classification": 2.0, "eval_rougeL_for_task233_iirc_answerability_classification": 0.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.15, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.4673, "eval_rougeL_for_task288_gigaword_title_generation": 28.252, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, "eval_rougeL_for_task330_gap_coreference_resolution": 66.7524, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.8677, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 23.3333, "eval_rougeL_for_task402_grailqa_question_rewriting": 68.1395, "eval_rougeL_for_task418_persent_title_generation": 22.5188, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8442, "eval_rougeL_for_task500_scruples_title_generation": 14.8141, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.3626, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 44.3324, "eval_rougeL_for_task602_wikitext_title_generation": 16.6334, "eval_rougeL_for_task613_liar_keyword_tagging": 26.6667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 40.1262, "eval_rougeL_for_task619_ohsumed_title_generation": 38.4738, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.1333, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 32.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 90.1, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 1.7333, "eval_rougeL_for_task670_ambigqa_question_rewriting": 74.2873, "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.2564, "eval_rougeL_for_task677_ollie_data_to_text": 30.4698, "eval_rougeL_for_task738_perspectrum_textual_entailment": 55.0, "eval_rougeL_for_task743_eurlex_title_generation": 26.9057, "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.0347, "eval_rougeL_for_task769_qed_title_generation": 77.7085, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 44.0, "eval_rougeL_for_task891_gap_coreference_resolution": 53.7524, "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, "eval_rougeL_for_task893_gap_coreference_resolution": 48.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 45.614, "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, "eval_rougeL_for_textual_entailment": 43.1667, "eval_rougeL_for_title_generation": 31.7476, "eval_rougeL_for_word_analogy": 30.5833, "eval_runtime": 1083.9815, "eval_samples_per_second": 10.987, "eval_steps_per_second": 0.687, "step": 100 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 1.2831, "step": 200 }, { "epoch": 0.04, "eval_exact_match": 30.6465, "eval_exact_match_for_answerability_classification": 50.3846, "eval_exact_match_for_cause_effect_classification": 35.8571, "eval_exact_match_for_coreference_resolution": 41.0714, "eval_exact_match_for_data_to_text": 8.2324, "eval_exact_match_for_dialogue_act_recognition": 45.7143, "eval_exact_match_for_grammar_error_correction": 6.0, "eval_exact_match_for_keyword_tagging": 40.4, "eval_exact_match_for_overlap_extraction": 10.5, "eval_exact_match_for_question_rewriting": 4.0909, "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 51.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 21.0, "eval_exact_match_for_task050_multirc_answerability_classification": 54.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 23.0, "eval_exact_match_for_task1153_bard_word_analogy": 26.0, "eval_exact_match_for_task1154_bard_word_analogy": 19.0, "eval_exact_match_for_task1155_bard_word_analogy": 69.0, "eval_exact_match_for_task1156_bard_word_analogy": 43.0, "eval_exact_match_for_task1157_bard_word_analogy": 66.0, "eval_exact_match_for_task1158_bard_word_analogy": 39.0, "eval_exact_match_for_task1159_bard_word_analogy": 30.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 22.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 24.0, "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, "eval_exact_match_for_task1387_anli_textual_entailment": 41.0, "eval_exact_match_for_task1388_cb_textual_entailment": 45.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 70.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 43.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 27.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 12.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 46.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 17.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 60.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 35.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 19.0, "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, "eval_exact_match_for_task200_multinli_textual_entailment": 52.0, "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, "eval_exact_match_for_task219_rocstories_title_generation": 11.0, "eval_exact_match_for_task220_rocstories_title_generation": 54.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_exact_match_for_task232_iirc_answerability_classification": 58.0, "eval_exact_match_for_task233_iirc_answerability_classification": 37.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 60.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 13.0, "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 43.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 48.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 29.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 51.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 80.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 0.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 52.0, "eval_exact_match_for_task743_eurlex_title_generation": 0.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 60.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, "eval_exact_match_for_task893_gap_coreference_resolution": 51.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 67.0, "eval_exact_match_for_textual_entailment": 42.9167, "eval_exact_match_for_title_generation": 8.7444, "eval_exact_match_for_word_analogy": 39.375, "eval_f1": 46.9066, "eval_f1_for_answerability_classification": 52.9487, "eval_f1_for_cause_effect_classification": 55.3795, "eval_f1_for_coreference_resolution": 47.6416, "eval_f1_for_data_to_text": 53.7237, "eval_f1_for_dialogue_act_recognition": 49.7857, "eval_f1_for_grammar_error_correction": 54.7354, "eval_f1_for_keyword_tagging": 52.5141, "eval_f1_for_overlap_extraction": 30.681, "eval_f1_for_question_rewriting": 62.979, "eval_f1_for_task020_mctaco_answerability_classification": 51.0, "eval_f1_for_task033_winogrande_coreference_resolution": 51.0, "eval_f1_for_task034_winogrande_question_rewriting": 30.8391, "eval_f1_for_task035_winogrande_question_rewriting": 83.4332, "eval_f1_for_task036_qasc_keyword_tagging": 64.4073, "eval_f1_for_task039_qasc_overlap_extraction": 26.3333, "eval_f1_for_task050_multirc_answerability_classification": 54.0, "eval_f1_for_task102_commongen_data_to_text": 53.4101, "eval_f1_for_task1152_bard_word_analogy": 23.0, "eval_f1_for_task1153_bard_word_analogy": 26.0, "eval_f1_for_task1154_bard_word_analogy": 19.0, "eval_f1_for_task1155_bard_word_analogy": 69.0, "eval_f1_for_task1156_bard_word_analogy": 43.0, "eval_f1_for_task1157_bard_word_analogy": 66.0, "eval_f1_for_task1158_bard_word_analogy": 39.0, "eval_f1_for_task1159_bard_word_analogy": 30.0, "eval_f1_for_task1161_coda_19_title_generation": 28.0348, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.4147, "eval_f1_for_task121_atomic_question_rewriting": 51.7155, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.3404, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.3548, "eval_f1_for_task1356_xlsum_title_generation": 15.5346, "eval_f1_for_task1358_xlsum_title_generation": 32.943, "eval_f1_for_task1385_anli_textual_entailment": 24.0, "eval_f1_for_task1386_anli_textual_entailment": 32.0, "eval_f1_for_task1387_anli_textual_entailment": 41.0, "eval_f1_for_task1388_cb_textual_entailment": 45.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 70.0, "eval_f1_for_task1407_dart_data_to_text": 43.2767, "eval_f1_for_task1409_dart_data_to_text": 52.1074, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.2503, "eval_f1_for_task1439_doqa_answerability_classification": 43.0, "eval_f1_for_task1442_doqa_answerability_classification": 51.0, "eval_f1_for_task1516_imppres_textual_entailment": 35.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_f1_for_task1540_peer_read_title_generation": 32.7426, "eval_f1_for_task1554_scitail_textual_entailment": 50.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 80.2205, "eval_f1_for_task1562_zest_question_rewriting": 50.2497, "eval_f1_for_task1586_scifact_title_generation": 29.8392, "eval_f1_for_task1598_nyc_data_to_text": 50.5065, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 46.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.6669, "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_f1_for_task1631_open_pi_data_to_text": 94.9647, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, "eval_f1_for_task1659_billsum_title_generation": 36.5227, "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.5714, "eval_f1_for_task1728_web_nlg_data_to_text": 55.7374, "eval_f1_for_task190_snli_textual_entailment": 19.0, "eval_f1_for_task199_multinli_textual_entailment": 49.0, "eval_f1_for_task200_multinli_textual_entailment": 52.0, "eval_f1_for_task201_multinli_textual_entailment": 35.0, "eval_f1_for_task202_multinli_textual_entailment": 24.0, "eval_f1_for_task219_rocstories_title_generation": 29.5159, "eval_f1_for_task220_rocstories_title_generation": 54.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_f1_for_task232_iirc_answerability_classification": 58.0, "eval_f1_for_task233_iirc_answerability_classification": 37.0, "eval_f1_for_task242_tweetqa_answerability_classification": 60.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.9333, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.0287, "eval_f1_for_task288_gigaword_title_generation": 26.7279, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 34.3333, "eval_f1_for_task329_gap_coreference_resolution": 35.0, "eval_f1_for_task330_gap_coreference_resolution": 69.8778, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 71.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_f1_for_task392_cod3s_cause_effect_classification": 82.6667, "eval_f1_for_task393_cod3s_cause_effect_classification": 32.4216, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.6667, "eval_f1_for_task402_grailqa_question_rewriting": 80.4987, "eval_f1_for_task418_persent_title_generation": 21.0078, "eval_f1_for_task442_com_qa_question_rewriting": 68.4025, "eval_f1_for_task500_scruples_title_generation": 18.2181, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 44.6618, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 42.9971, "eval_f1_for_task602_wikitext_title_generation": 12.7204, "eval_f1_for_task613_liar_keyword_tagging": 22.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 36.9018, "eval_f1_for_task619_ohsumed_title_generation": 40.4833, "eval_f1_for_task620_ohsumed_keyword_tagging": 38.7396, "eval_f1_for_task623_ohsumed_keyword_tagging": 51.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 32.0, "eval_f1_for_task642_e_snli_textual_entailment": 43.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 86.0905, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 0.0, "eval_f1_for_task670_ambigqa_question_rewriting": 73.8566, "eval_f1_for_task671_ambigqa_question_rewriting": 54.3376, "eval_f1_for_task677_ollie_data_to_text": 35.2752, "eval_f1_for_task738_perspectrum_textual_entailment": 52.0, "eval_f1_for_task743_eurlex_title_generation": 29.6274, "eval_f1_for_task760_msr_sqa_data_to_text": 2.2315, "eval_f1_for_task769_qed_title_generation": 73.2963, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 52.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 31.0, "eval_f1_for_task890_gwsd_textual_entailment": 35.0, "eval_f1_for_task891_gap_coreference_resolution": 61.6, "eval_f1_for_task892_gap_coreference_resolution": 44.0, "eval_f1_for_task893_gap_coreference_resolution": 51.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, "eval_f1_for_task957_e2e_data_to_text": 57.8993, "eval_f1_for_task970_sherliic_textual_entailment": 67.0, "eval_f1_for_textual_entailment": 42.9167, "eval_f1_for_title_generation": 32.5212, "eval_f1_for_word_analogy": 39.375, "eval_gen_len": 10.2397, "eval_global_step": 200, "eval_loss": 1.0702121257781982, "eval_rouge1": 48.6396, "eval_rouge1_for_answerability_classification": 52.9487, "eval_rouge1_for_cause_effect_classification": 56.4862, "eval_rouge1_for_coreference_resolution": 48.466, "eval_rouge1_for_data_to_text": 56.5999, "eval_rouge1_for_dialogue_act_recognition": 53.3286, "eval_rouge1_for_grammar_error_correction": 59.8279, "eval_rouge1_for_keyword_tagging": 57.4537, "eval_rouge1_for_overlap_extraction": 33.6289, "eval_rouge1_for_question_rewriting": 64.7618, "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 51.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 30.8973, "eval_rouge1_for_task035_winogrande_question_rewriting": 84.0663, "eval_rouge1_for_task036_qasc_keyword_tagging": 69.7861, "eval_rouge1_for_task039_qasc_overlap_extraction": 31.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 54.0, "eval_rouge1_for_task102_commongen_data_to_text": 66.0293, "eval_rouge1_for_task1152_bard_word_analogy": 23.0, "eval_rouge1_for_task1153_bard_word_analogy": 26.0, "eval_rouge1_for_task1154_bard_word_analogy": 19.0, "eval_rouge1_for_task1155_bard_word_analogy": 69.0, "eval_rouge1_for_task1156_bard_word_analogy": 43.0, "eval_rouge1_for_task1157_bard_word_analogy": 66.0, "eval_rouge1_for_task1158_bard_word_analogy": 39.0, "eval_rouge1_for_task1159_bard_word_analogy": 30.0, "eval_rouge1_for_task1161_coda_19_title_generation": 31.4257, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.0086, "eval_rouge1_for_task121_atomic_question_rewriting": 53.9791, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.7797, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.1923, "eval_rouge1_for_task1356_xlsum_title_generation": 18.1996, "eval_rouge1_for_task1358_xlsum_title_generation": 37.654, "eval_rouge1_for_task1385_anli_textual_entailment": 24.0, "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, "eval_rouge1_for_task1388_cb_textual_entailment": 45.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 74.8, "eval_rouge1_for_task1407_dart_data_to_text": 44.4255, "eval_rouge1_for_task1409_dart_data_to_text": 53.5381, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.4676, "eval_rouge1_for_task1439_doqa_answerability_classification": 43.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1540_peer_read_title_generation": 35.0534, "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 83.1882, "eval_rouge1_for_task1562_zest_question_rewriting": 54.6914, "eval_rouge1_for_task1586_scifact_title_generation": 33.3433, "eval_rouge1_for_task1598_nyc_data_to_text": 52.948, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 82.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.0529, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.125, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.5171, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.5714, "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.5233, "eval_rouge1_for_task190_snli_textual_entailment": 19.0, "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, "eval_rouge1_for_task200_multinli_textual_entailment": 52.0, "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, "eval_rouge1_for_task219_rocstories_title_generation": 34.562, "eval_rouge1_for_task220_rocstories_title_generation": 54.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rouge1_for_task232_iirc_answerability_classification": 58.0, "eval_rouge1_for_task233_iirc_answerability_classification": 37.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 60.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.5524, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.9246, "eval_rouge1_for_task288_gigaword_title_generation": 29.0718, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 35.8, "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, "eval_rouge1_for_task330_gap_coreference_resolution": 69.8333, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 71.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 82.6667, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.1363, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.3333, "eval_rouge1_for_task402_grailqa_question_rewriting": 83.4559, "eval_rouge1_for_task418_persent_title_generation": 23.7489, "eval_rouge1_for_task442_com_qa_question_rewriting": 71.6548, "eval_rouge1_for_task500_scruples_title_generation": 20.025, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 45.3612, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.424, "eval_rouge1_for_task602_wikitext_title_generation": 13.7451, "eval_rouge1_for_task613_liar_keyword_tagging": 34.9667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 43.9335, "eval_rouge1_for_task619_ohsumed_title_generation": 43.2663, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.6396, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 51.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 86.8762, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 0.0, "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.038, "eval_rouge1_for_task671_ambigqa_question_rewriting": 55.343, "eval_rouge1_for_task677_ollie_data_to_text": 37.5787, "eval_rouge1_for_task738_perspectrum_textual_entailment": 57.0, "eval_rouge1_for_task743_eurlex_title_generation": 30.8368, "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.2913, "eval_rouge1_for_task769_qed_title_generation": 73.5271, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, "eval_rouge1_for_task891_gap_coreference_resolution": 61.4333, "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, "eval_rouge1_for_task893_gap_coreference_resolution": 51.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, "eval_rouge1_for_task957_e2e_data_to_text": 59.7514, "eval_rouge1_for_task970_sherliic_textual_entailment": 67.0, "eval_rouge1_for_textual_entailment": 44.625, "eval_rouge1_for_title_generation": 34.6044, "eval_rouge1_for_word_analogy": 39.375, "eval_rougeL": 47.1494, "eval_rougeL_for_answerability_classification": 52.9487, "eval_rougeL_for_cause_effect_classification": 55.6277, "eval_rougeL_for_coreference_resolution": 48.466, "eval_rougeL_for_data_to_text": 48.5963, "eval_rougeL_for_dialogue_act_recognition": 53.3286, "eval_rougeL_for_grammar_error_correction": 58.8474, "eval_rougeL_for_keyword_tagging": 56.9404, "eval_rougeL_for_overlap_extraction": 32.9263, "eval_rougeL_for_question_rewriting": 60.6373, "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 51.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 30.8973, "eval_rougeL_for_task035_winogrande_question_rewriting": 82.9356, "eval_rougeL_for_task036_qasc_keyword_tagging": 68.2195, "eval_rougeL_for_task039_qasc_overlap_extraction": 31.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 54.0, "eval_rougeL_for_task102_commongen_data_to_text": 55.0518, "eval_rougeL_for_task1152_bard_word_analogy": 23.0, "eval_rougeL_for_task1153_bard_word_analogy": 26.0, "eval_rougeL_for_task1154_bard_word_analogy": 19.0, "eval_rougeL_for_task1155_bard_word_analogy": 69.0, "eval_rougeL_for_task1156_bard_word_analogy": 43.0, "eval_rougeL_for_task1157_bard_word_analogy": 66.0, "eval_rougeL_for_task1158_bard_word_analogy": 39.0, "eval_rougeL_for_task1159_bard_word_analogy": 30.0, "eval_rougeL_for_task1161_coda_19_title_generation": 24.7479, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.1404, "eval_rougeL_for_task121_atomic_question_rewriting": 49.1722, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.9741, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 37.9649, "eval_rougeL_for_task1356_xlsum_title_generation": 15.5253, "eval_rougeL_for_task1358_xlsum_title_generation": 30.9437, "eval_rougeL_for_task1385_anli_textual_entailment": 24.0, "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, "eval_rougeL_for_task1388_cb_textual_entailment": 45.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 74.8, "eval_rougeL_for_task1407_dart_data_to_text": 36.258, "eval_rougeL_for_task1409_dart_data_to_text": 44.9233, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.6587, "eval_rougeL_for_task1439_doqa_answerability_classification": 43.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 52.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1540_peer_read_title_generation": 31.5264, "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 82.036, "eval_rougeL_for_task1562_zest_question_rewriting": 48.7225, "eval_rougeL_for_task1586_scifact_title_generation": 26.7789, "eval_rougeL_for_task1598_nyc_data_to_text": 42.5241, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 82.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.1764, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0341, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, "eval_rougeL_for_task1659_billsum_title_generation": 32.7873, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.5714, "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.144, "eval_rougeL_for_task190_snli_textual_entailment": 19.0, "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, "eval_rougeL_for_task200_multinli_textual_entailment": 52.0, "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, "eval_rougeL_for_task219_rocstories_title_generation": 33.8286, "eval_rougeL_for_task220_rocstories_title_generation": 54.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rougeL_for_task232_iirc_answerability_classification": 58.0, "eval_rougeL_for_task233_iirc_answerability_classification": 37.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 60.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.5524, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.5193, "eval_rougeL_for_task288_gigaword_title_generation": 24.7312, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 35.8, "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, "eval_rougeL_for_task330_gap_coreference_resolution": 69.8333, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 71.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 82.6667, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.7852, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.3333, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.3873, "eval_rougeL_for_task418_persent_title_generation": 20.3789, "eval_rougeL_for_task442_com_qa_question_rewriting": 65.3368, "eval_rougeL_for_task500_scruples_title_generation": 18.5266, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 44.9589, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.3367, "eval_rougeL_for_task602_wikitext_title_generation": 13.7451, "eval_rougeL_for_task613_liar_keyword_tagging": 34.9667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 39.2753, "eval_rougeL_for_task619_ohsumed_title_generation": 37.2661, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.6396, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 51.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 86.8762, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 0.0, "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.0916, "eval_rougeL_for_task671_ambigqa_question_rewriting": 53.1859, "eval_rougeL_for_task677_ollie_data_to_text": 30.2724, "eval_rougeL_for_task738_perspectrum_textual_entailment": 57.0, "eval_rougeL_for_task743_eurlex_title_generation": 27.1215, "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.2153, "eval_rougeL_for_task769_qed_title_generation": 73.3049, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 43.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, "eval_rougeL_for_task891_gap_coreference_resolution": 61.4333, "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, "eval_rougeL_for_task893_gap_coreference_resolution": 51.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 54.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 58.0, "eval_rougeL_for_task957_e2e_data_to_text": 45.6216, "eval_rougeL_for_task970_sherliic_textual_entailment": 67.0, "eval_rougeL_for_textual_entailment": 44.625, "eval_rougeL_for_title_generation": 31.5742, "eval_rougeL_for_word_analogy": 39.375, "eval_runtime": 1041.1431, "eval_samples_per_second": 11.439, "eval_steps_per_second": 0.716, "step": 200 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 1.2014, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 30.3862, "eval_exact_match_for_answerability_classification": 50.0769, "eval_exact_match_for_cause_effect_classification": 38.2857, "eval_exact_match_for_coreference_resolution": 40.5714, "eval_exact_match_for_data_to_text": 6.9007, "eval_exact_match_for_dialogue_act_recognition": 47.1429, "eval_exact_match_for_grammar_error_correction": 8.5, "eval_exact_match_for_keyword_tagging": 39.0, "eval_exact_match_for_overlap_extraction": 9.0, "eval_exact_match_for_question_rewriting": 2.3636, "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 45.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 22.0, "eval_exact_match_for_task1153_bard_word_analogy": 28.0, "eval_exact_match_for_task1154_bard_word_analogy": 24.0, "eval_exact_match_for_task1155_bard_word_analogy": 67.0, "eval_exact_match_for_task1156_bard_word_analogy": 41.0, "eval_exact_match_for_task1157_bard_word_analogy": 54.0, "eval_exact_match_for_task1158_bard_word_analogy": 42.0, "eval_exact_match_for_task1159_bard_word_analogy": 34.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 46.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 55.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 34.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 55.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 32.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 51.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 17.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 32.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, "eval_exact_match_for_task190_snli_textual_entailment": 35.0, "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, "eval_exact_match_for_task200_multinli_textual_entailment": 40.0, "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, "eval_exact_match_for_task219_rocstories_title_generation": 10.0, "eval_exact_match_for_task220_rocstories_title_generation": 50.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 54.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 7.0, "eval_exact_match_for_task329_gap_coreference_resolution": 48.0, "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 57.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 53.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 13.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 9.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, "eval_exact_match_for_task743_eurlex_title_generation": 1.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 76.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 45.0, "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, "eval_exact_match_for_task892_gap_coreference_resolution": 32.0, "eval_exact_match_for_task893_gap_coreference_resolution": 47.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, "eval_exact_match_for_textual_entailment": 41.875, "eval_exact_match_for_title_generation": 9.6413, "eval_exact_match_for_word_analogy": 39.0, "eval_f1": 47.1842, "eval_f1_for_answerability_classification": 52.641, "eval_f1_for_cause_effect_classification": 55.2784, "eval_f1_for_coreference_resolution": 46.0074, "eval_f1_for_data_to_text": 53.0392, "eval_f1_for_dialogue_act_recognition": 50.5, "eval_f1_for_grammar_error_correction": 57.3021, "eval_f1_for_keyword_tagging": 53.5156, "eval_f1_for_overlap_extraction": 33.5263, "eval_f1_for_question_rewriting": 65.1695, "eval_f1_for_task020_mctaco_answerability_classification": 50.0, "eval_f1_for_task033_winogrande_coreference_resolution": 48.0, "eval_f1_for_task034_winogrande_question_rewriting": 46.3788, "eval_f1_for_task035_winogrande_question_rewriting": 82.9596, "eval_f1_for_task036_qasc_keyword_tagging": 63.7492, "eval_f1_for_task039_qasc_overlap_extraction": 25.8333, "eval_f1_for_task050_multirc_answerability_classification": 52.0, "eval_f1_for_task102_commongen_data_to_text": 54.4506, "eval_f1_for_task1152_bard_word_analogy": 22.0, "eval_f1_for_task1153_bard_word_analogy": 28.6667, "eval_f1_for_task1154_bard_word_analogy": 24.0, "eval_f1_for_task1155_bard_word_analogy": 67.0, "eval_f1_for_task1156_bard_word_analogy": 42.3333, "eval_f1_for_task1157_bard_word_analogy": 54.0, "eval_f1_for_task1158_bard_word_analogy": 42.0, "eval_f1_for_task1159_bard_word_analogy": 34.6667, "eval_f1_for_task1161_coda_19_title_generation": 34.6758, "eval_f1_for_task1195_disfl_qa_question_rewriting": 77.6967, "eval_f1_for_task121_atomic_question_rewriting": 51.0686, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.8882, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.3111, "eval_f1_for_task1356_xlsum_title_generation": 19.4232, "eval_f1_for_task1358_xlsum_title_generation": 36.105, "eval_f1_for_task1385_anli_textual_entailment": 34.0, "eval_f1_for_task1386_anli_textual_entailment": 34.0, "eval_f1_for_task1387_anli_textual_entailment": 33.0, "eval_f1_for_task1388_cb_textual_entailment": 21.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 46.0, "eval_f1_for_task1393_copa_cause_effect_classification": 55.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_f1_for_task1407_dart_data_to_text": 36.8875, "eval_f1_for_task1409_dart_data_to_text": 49.7025, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8243, "eval_f1_for_task1439_doqa_answerability_classification": 50.0, "eval_f1_for_task1442_doqa_answerability_classification": 51.0, "eval_f1_for_task1516_imppres_textual_entailment": 34.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 55.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 32.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 38.8587, "eval_f1_for_task1554_scitail_textual_entailment": 51.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7799, "eval_f1_for_task1562_zest_question_rewriting": 52.9146, "eval_f1_for_task1586_scifact_title_generation": 36.4892, "eval_f1_for_task1598_nyc_data_to_text": 50.7354, "eval_f1_for_task1612_sick_textual_entailment": 33.0, "eval_f1_for_task1615_sick_textual_entailment": 35.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.2265, "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_f1_for_task1631_open_pi_data_to_text": 93.2867, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_f1_for_task1659_billsum_title_generation": 37.4581, "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.5082, "eval_f1_for_task1728_web_nlg_data_to_text": 57.3273, "eval_f1_for_task190_snli_textual_entailment": 35.0, "eval_f1_for_task199_multinli_textual_entailment": 45.0, "eval_f1_for_task200_multinli_textual_entailment": 40.0, "eval_f1_for_task201_multinli_textual_entailment": 33.0, "eval_f1_for_task202_multinli_textual_entailment": 24.0, "eval_f1_for_task219_rocstories_title_generation": 24.297, "eval_f1_for_task220_rocstories_title_generation": 50.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_f1_for_task232_iirc_answerability_classification": 50.0, "eval_f1_for_task233_iirc_answerability_classification": 49.0, "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 65.8714, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.2193, "eval_f1_for_task288_gigaword_title_generation": 32.0841, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.7667, "eval_f1_for_task329_gap_coreference_resolution": 48.0, "eval_f1_for_task330_gap_coreference_resolution": 68.8048, "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 85.6667, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 27.7289, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 34.1667, "eval_f1_for_task402_grailqa_question_rewriting": 81.6426, "eval_f1_for_task418_persent_title_generation": 25.7146, "eval_f1_for_task442_com_qa_question_rewriting": 70.3664, "eval_f1_for_task500_scruples_title_generation": 18.6027, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.947, "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, "eval_f1_for_task569_recipe_nlg_title_generation": 42.9346, "eval_f1_for_task602_wikitext_title_generation": 14.196, "eval_f1_for_task613_liar_keyword_tagging": 19.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 31.2198, "eval_f1_for_task619_ohsumed_title_generation": 44.5412, "eval_f1_for_task620_ohsumed_keyword_tagging": 41.8667, "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 34.0, "eval_f1_for_task642_e_snli_textual_entailment": 45.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.6286, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.0, "eval_f1_for_task670_ambigqa_question_rewriting": 73.532, "eval_f1_for_task671_ambigqa_question_rewriting": 61.7676, "eval_f1_for_task677_ollie_data_to_text": 36.3201, "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, "eval_f1_for_task743_eurlex_title_generation": 32.6639, "eval_f1_for_task760_msr_sqa_data_to_text": 1.4842, "eval_f1_for_task769_qed_title_generation": 83.272, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 53.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 24.0, "eval_f1_for_task890_gwsd_textual_entailment": 45.0, "eval_f1_for_task891_gap_coreference_resolution": 62.9857, "eval_f1_for_task892_gap_coreference_resolution": 32.0, "eval_f1_for_task893_gap_coreference_resolution": 47.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, "eval_f1_for_task957_e2e_data_to_text": 59.0075, "eval_f1_for_task970_sherliic_textual_entailment": 51.0, "eval_f1_for_textual_entailment": 41.875, "eval_f1_for_title_generation": 35.139, "eval_f1_for_word_analogy": 39.3333, "eval_gen_len": 9.2726, "eval_global_step": 500, "eval_loss": 1.0761061906814575, "eval_rouge1": 49.1005, "eval_rouge1_for_answerability_classification": 52.641, "eval_rouge1_for_cause_effect_classification": 56.224, "eval_rouge1_for_coreference_resolution": 46.6246, "eval_rouge1_for_data_to_text": 55.9136, "eval_rouge1_for_dialogue_act_recognition": 53.8408, "eval_rouge1_for_grammar_error_correction": 62.2561, "eval_rouge1_for_keyword_tagging": 58.361, "eval_rouge1_for_overlap_extraction": 35.0683, "eval_rouge1_for_question_rewriting": 66.8362, "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 47.9, "eval_rouge1_for_task034_winogrande_question_rewriting": 46.3492, "eval_rouge1_for_task035_winogrande_question_rewriting": 83.5678, "eval_rouge1_for_task036_qasc_keyword_tagging": 69.3048, "eval_rouge1_for_task039_qasc_overlap_extraction": 27.6667, "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.921, "eval_rouge1_for_task1152_bard_word_analogy": 22.0, "eval_rouge1_for_task1153_bard_word_analogy": 28.6667, "eval_rouge1_for_task1154_bard_word_analogy": 24.0, "eval_rouge1_for_task1155_bard_word_analogy": 67.0, "eval_rouge1_for_task1156_bard_word_analogy": 42.3333, "eval_rouge1_for_task1157_bard_word_analogy": 54.0, "eval_rouge1_for_task1158_bard_word_analogy": 42.0, "eval_rouge1_for_task1159_bard_word_analogy": 35.6667, "eval_rouge1_for_task1161_coda_19_title_generation": 38.577, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.2652, "eval_rouge1_for_task121_atomic_question_rewriting": 53.5211, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.534, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.2861, "eval_rouge1_for_task1356_xlsum_title_generation": 22.1956, "eval_rouge1_for_task1358_xlsum_title_generation": 40.616, "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 21.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 46.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 55.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 73.3857, "eval_rouge1_for_task1407_dart_data_to_text": 37.8502, "eval_rouge1_for_task1409_dart_data_to_text": 50.4409, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5394, "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 34.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 55.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 42.2777, "eval_rouge1_for_task1554_scitail_textual_entailment": 51.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.9729, "eval_rouge1_for_task1562_zest_question_rewriting": 55.1003, "eval_rouge1_for_task1586_scifact_title_generation": 40.2575, "eval_rouge1_for_task1598_nyc_data_to_text": 52.0557, "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.5243, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 93.4244, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.6831, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.5082, "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.7805, "eval_rouge1_for_task190_snli_textual_entailment": 35.0, "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, "eval_rouge1_for_task200_multinli_textual_entailment": 40.0, "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, "eval_rouge1_for_task219_rocstories_title_generation": 28.8414, "eval_rouge1_for_task220_rocstories_title_generation": 50.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 66.7167, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 42.4699, "eval_rouge1_for_task288_gigaword_title_generation": 34.6334, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 12.9, "eval_rouge1_for_task329_gap_coreference_resolution": 48.0, "eval_rouge1_for_task330_gap_coreference_resolution": 68.7333, "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.6667, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.6385, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 40.8333, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.2289, "eval_rouge1_for_task418_persent_title_generation": 29.4352, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.5598, "eval_rouge1_for_task500_scruples_title_generation": 20.4533, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.5521, "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.2995, "eval_rouge1_for_task602_wikitext_title_generation": 15.3379, "eval_rouge1_for_task613_liar_keyword_tagging": 31.8, "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.9296, "eval_rouge1_for_task619_ohsumed_title_generation": 48.2393, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.2857, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.4143, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 11.8333, "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.5711, "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.2241, "eval_rouge1_for_task677_ollie_data_to_text": 39.021, "eval_rouge1_for_task738_perspectrum_textual_entailment": 74.0, "eval_rouge1_for_task743_eurlex_title_generation": 34.1613, "eval_rouge1_for_task760_msr_sqa_data_to_text": 1.6797, "eval_rouge1_for_task769_qed_title_generation": 83.2053, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 45.0, "eval_rouge1_for_task891_gap_coreference_resolution": 63.319, "eval_rouge1_for_task892_gap_coreference_resolution": 32.0, "eval_rouge1_for_task893_gap_coreference_resolution": 47.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, "eval_rouge1_for_task957_e2e_data_to_text": 60.916, "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, "eval_rouge1_for_textual_entailment": 44.6806, "eval_rouge1_for_title_generation": 37.4913, "eval_rouge1_for_word_analogy": 39.4583, "eval_rougeL": 47.5758, "eval_rougeL_for_answerability_classification": 52.641, "eval_rougeL_for_cause_effect_classification": 55.8309, "eval_rougeL_for_coreference_resolution": 46.6246, "eval_rougeL_for_data_to_text": 47.755, "eval_rougeL_for_dialogue_act_recognition": 53.8408, "eval_rougeL_for_grammar_error_correction": 61.5046, "eval_rougeL_for_keyword_tagging": 57.6302, "eval_rougeL_for_overlap_extraction": 34.358, "eval_rougeL_for_question_rewriting": 62.5878, "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 47.9, "eval_rougeL_for_task034_winogrande_question_rewriting": 45.8954, "eval_rougeL_for_task035_winogrande_question_rewriting": 82.5342, "eval_rougeL_for_task036_qasc_keyword_tagging": 67.4508, "eval_rougeL_for_task039_qasc_overlap_extraction": 27.6667, "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, "eval_rougeL_for_task102_commongen_data_to_text": 58.0729, "eval_rougeL_for_task1152_bard_word_analogy": 22.0, "eval_rougeL_for_task1153_bard_word_analogy": 28.6667, "eval_rougeL_for_task1154_bard_word_analogy": 24.0, "eval_rougeL_for_task1155_bard_word_analogy": 67.0, "eval_rougeL_for_task1156_bard_word_analogy": 42.3333, "eval_rougeL_for_task1157_bard_word_analogy": 54.0, "eval_rougeL_for_task1158_bard_word_analogy": 42.0, "eval_rougeL_for_task1159_bard_word_analogy": 35.6667, "eval_rougeL_for_task1161_coda_19_title_generation": 31.3051, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 76.6124, "eval_rougeL_for_task121_atomic_question_rewriting": 48.5986, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7934, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4309, "eval_rougeL_for_task1356_xlsum_title_generation": 18.3915, "eval_rougeL_for_task1358_xlsum_title_generation": 32.9547, "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 21.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 46.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 55.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 73.3857, "eval_rougeL_for_task1407_dart_data_to_text": 31.8496, "eval_rougeL_for_task1409_dart_data_to_text": 41.9212, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9012, "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 34.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 55.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.5284, "eval_rougeL_for_task1554_scitail_textual_entailment": 51.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 87.108, "eval_rougeL_for_task1562_zest_question_rewriting": 47.1551, "eval_rougeL_for_task1586_scifact_title_generation": 34.0356, "eval_rougeL_for_task1598_nyc_data_to_text": 40.3924, "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.3016, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 92.3204, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.161, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.5082, "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.7518, "eval_rougeL_for_task190_snli_textual_entailment": 35.0, "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, "eval_rougeL_for_task200_multinli_textual_entailment": 40.0, "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, "eval_rougeL_for_task219_rocstories_title_generation": 28.8414, "eval_rougeL_for_task220_rocstories_title_generation": 50.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 66.7167, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.0493, "eval_rougeL_for_task288_gigaword_title_generation": 30.2246, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 12.9, "eval_rougeL_for_task329_gap_coreference_resolution": 48.0, "eval_rougeL_for_task330_gap_coreference_resolution": 68.7333, "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.6667, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.9882, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 40.8333, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.2663, "eval_rougeL_for_task418_persent_title_generation": 26.1959, "eval_rougeL_for_task442_com_qa_question_rewriting": 68.4157, "eval_rougeL_for_task500_scruples_title_generation": 19.2319, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.6929, "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.7979, "eval_rougeL_for_task602_wikitext_title_generation": 15.3379, "eval_rougeL_for_task613_liar_keyword_tagging": 31.8, "eval_rougeL_for_task614_glucose_cause_effect_classification": 34.8283, "eval_rougeL_for_task619_ohsumed_title_generation": 40.6641, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.4857, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.4143, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 11.8333, "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.4622, "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.7939, "eval_rougeL_for_task677_ollie_data_to_text": 31.6707, "eval_rougeL_for_task738_perspectrum_textual_entailment": 74.0, "eval_rougeL_for_task743_eurlex_title_generation": 30.7891, "eval_rougeL_for_task760_msr_sqa_data_to_text": 1.6235, "eval_rougeL_for_task769_qed_title_generation": 83.2053, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 45.0, "eval_rougeL_for_task891_gap_coreference_resolution": 63.319, "eval_rougeL_for_task892_gap_coreference_resolution": 32.0, "eval_rougeL_for_task893_gap_coreference_resolution": 47.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 68.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, "eval_rougeL_for_task957_e2e_data_to_text": 46.0555, "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, "eval_rougeL_for_textual_entailment": 44.6806, "eval_rougeL_for_title_generation": 34.2319, "eval_rougeL_for_word_analogy": 39.4583, "eval_runtime": 888.0806, "eval_samples_per_second": 13.411, "eval_steps_per_second": 0.839, "step": 500 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 1.0919, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 30.6885, "eval_exact_match_for_answerability_classification": 53.4615, "eval_exact_match_for_cause_effect_classification": 40.4286, "eval_exact_match_for_coreference_resolution": 44.2143, "eval_exact_match_for_data_to_text": 7.9903, "eval_exact_match_for_dialogue_act_recognition": 43.0, "eval_exact_match_for_grammar_error_correction": 8.0, "eval_exact_match_for_keyword_tagging": 43.8, "eval_exact_match_for_overlap_extraction": 14.0, "eval_exact_match_for_question_rewriting": 2.7273, "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 42.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 27.0, "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 30.0, "eval_exact_match_for_task1153_bard_word_analogy": 25.0, "eval_exact_match_for_task1154_bard_word_analogy": 23.0, "eval_exact_match_for_task1155_bard_word_analogy": 67.0, "eval_exact_match_for_task1156_bard_word_analogy": 38.0, "eval_exact_match_for_task1157_bard_word_analogy": 55.0, "eval_exact_match_for_task1158_bard_word_analogy": 40.0, "eval_exact_match_for_task1159_bard_word_analogy": 32.0, "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, "eval_exact_match_for_task1387_anli_textual_entailment": 2.0, "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 55.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 65.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 2.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 38.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 27.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 33.0, "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 54.0, "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 7.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 56.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 34.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 8.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 41.0, "eval_exact_match_for_task200_multinli_textual_entailment": 66.0, "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, "eval_exact_match_for_task202_multinli_textual_entailment": 17.0, "eval_exact_match_for_task219_rocstories_title_generation": 6.0, "eval_exact_match_for_task220_rocstories_title_generation": 97.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_exact_match_for_task232_iirc_answerability_classification": 61.0, "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 58.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 57.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 84.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 75.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 13.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 47.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 57.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 67.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, "eval_exact_match_for_task891_gap_coreference_resolution": 60.0, "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, "eval_exact_match_for_task893_gap_coreference_resolution": 49.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 49.0, "eval_exact_match_for_textual_entailment": 37.3333, "eval_exact_match_for_title_generation": 10.7623, "eval_exact_match_for_word_analogy": 38.75, "eval_f1": 47.0903, "eval_f1_for_answerability_classification": 56.0256, "eval_f1_for_cause_effect_classification": 60.4689, "eval_f1_for_coreference_resolution": 50.6486, "eval_f1_for_data_to_text": 51.6378, "eval_f1_for_dialogue_act_recognition": 46.5714, "eval_f1_for_grammar_error_correction": 57.0915, "eval_f1_for_keyword_tagging": 58.0512, "eval_f1_for_overlap_extraction": 39.9381, "eval_f1_for_question_rewriting": 58.6965, "eval_f1_for_task020_mctaco_answerability_classification": 51.0, "eval_f1_for_task033_winogrande_coreference_resolution": 45.1667, "eval_f1_for_task034_winogrande_question_rewriting": 10.7637, "eval_f1_for_task035_winogrande_question_rewriting": 88.2127, "eval_f1_for_task036_qasc_keyword_tagging": 56.285, "eval_f1_for_task039_qasc_overlap_extraction": 34.1667, "eval_f1_for_task050_multirc_answerability_classification": 61.0, "eval_f1_for_task102_commongen_data_to_text": 51.6101, "eval_f1_for_task1152_bard_word_analogy": 30.0, "eval_f1_for_task1153_bard_word_analogy": 25.0, "eval_f1_for_task1154_bard_word_analogy": 23.0, "eval_f1_for_task1155_bard_word_analogy": 67.0, "eval_f1_for_task1156_bard_word_analogy": 38.0, "eval_f1_for_task1157_bard_word_analogy": 55.0, "eval_f1_for_task1158_bard_word_analogy": 40.0, "eval_f1_for_task1159_bard_word_analogy": 32.0, "eval_f1_for_task1161_coda_19_title_generation": 35.9366, "eval_f1_for_task1195_disfl_qa_question_rewriting": 76.3385, "eval_f1_for_task121_atomic_question_rewriting": 48.9669, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.379, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.3019, "eval_f1_for_task1356_xlsum_title_generation": 22.0104, "eval_f1_for_task1358_xlsum_title_generation": 34.8875, "eval_f1_for_task1385_anli_textual_entailment": 1.0, "eval_f1_for_task1386_anli_textual_entailment": 1.0, "eval_f1_for_task1387_anli_textual_entailment": 2.0, "eval_f1_for_task1388_cb_textual_entailment": 24.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 55.0, "eval_f1_for_task1393_copa_cause_effect_classification": 65.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 66.0, "eval_f1_for_task1407_dart_data_to_text": 29.3043, "eval_f1_for_task1409_dart_data_to_text": 47.906, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6561, "eval_f1_for_task1439_doqa_answerability_classification": 49.0, "eval_f1_for_task1442_doqa_answerability_classification": 58.0, "eval_f1_for_task1516_imppres_textual_entailment": 38.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 33.0, "eval_f1_for_task1540_peer_read_title_generation": 38.9441, "eval_f1_for_task1554_scitail_textual_entailment": 52.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5269, "eval_f1_for_task1562_zest_question_rewriting": 51.4579, "eval_f1_for_task1586_scifact_title_generation": 32.9273, "eval_f1_for_task1598_nyc_data_to_text": 52.7761, "eval_f1_for_task1612_sick_textual_entailment": 54.0, "eval_f1_for_task1615_sick_textual_entailment": 45.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.8382, "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, "eval_f1_for_task1631_open_pi_data_to_text": 94.7555, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_f1_for_task1659_billsum_title_generation": 34.9777, "eval_f1_for_task1664_wino_bias_coreference_resolution": 67.619, "eval_f1_for_task1728_web_nlg_data_to_text": 58.3536, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 41.0, "eval_f1_for_task200_multinli_textual_entailment": 66.0, "eval_f1_for_task201_multinli_textual_entailment": 25.0, "eval_f1_for_task202_multinli_textual_entailment": 17.0, "eval_f1_for_task219_rocstories_title_generation": 21.8549, "eval_f1_for_task220_rocstories_title_generation": 97.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_f1_for_task232_iirc_answerability_classification": 61.0, "eval_f1_for_task233_iirc_answerability_classification": 43.0, "eval_f1_for_task242_tweetqa_answerability_classification": 58.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.3, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.7095, "eval_f1_for_task288_gigaword_title_generation": 30.1573, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 31.7333, "eval_f1_for_task329_gap_coreference_resolution": 54.0, "eval_f1_for_task330_gap_coreference_resolution": 70.8952, "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.1314, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.3333, "eval_f1_for_task402_grailqa_question_rewriting": 81.5085, "eval_f1_for_task418_persent_title_generation": 24.202, "eval_f1_for_task442_com_qa_question_rewriting": 70.5151, "eval_f1_for_task500_scruples_title_generation": 21.5462, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.7976, "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, "eval_f1_for_task569_recipe_nlg_title_generation": 39.7572, "eval_f1_for_task602_wikitext_title_generation": 12.818, "eval_f1_for_task613_liar_keyword_tagging": 22.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 45.1507, "eval_f1_for_task619_ohsumed_title_generation": 43.7408, "eval_f1_for_task620_ohsumed_keyword_tagging": 37.3333, "eval_f1_for_task623_ohsumed_keyword_tagging": 84.0, "eval_f1_for_task640_e_snli_textual_entailment": 31.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 43.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 90.3043, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.5, "eval_f1_for_task670_ambigqa_question_rewriting": 76.3653, "eval_f1_for_task671_ambigqa_question_rewriting": 23.3932, "eval_f1_for_task677_ollie_data_to_text": 32.8918, "eval_f1_for_task738_perspectrum_textual_entailment": 47.0, "eval_f1_for_task743_eurlex_title_generation": 35.8779, "eval_f1_for_task760_msr_sqa_data_to_text": 3.1913, "eval_f1_for_task769_qed_title_generation": 78.4858, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 67.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, "eval_f1_for_task890_gwsd_textual_entailment": 42.0, "eval_f1_for_task891_gap_coreference_resolution": 65.5333, "eval_f1_for_task892_gap_coreference_resolution": 50.0, "eval_f1_for_task893_gap_coreference_resolution": 49.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_f1_for_task957_e2e_data_to_text": 58.1008, "eval_f1_for_task970_sherliic_textual_entailment": 49.0, "eval_f1_for_textual_entailment": 37.3333, "eval_f1_for_title_generation": 36.9534, "eval_f1_for_word_analogy": 38.75, "eval_gen_len": 9.8474, "eval_global_step": 1000, "eval_loss": 1.1213940382003784, "eval_rouge1": 49.949, "eval_rouge1_for_answerability_classification": 56.0256, "eval_rouge1_for_cause_effect_classification": 61.463, "eval_rouge1_for_coreference_resolution": 51.4316, "eval_rouge1_for_data_to_text": 54.0756, "eval_rouge1_for_dialogue_act_recognition": 50.7166, "eval_rouge1_for_grammar_error_correction": 61.967, "eval_rouge1_for_keyword_tagging": 62.9715, "eval_rouge1_for_overlap_extraction": 43.7424, "eval_rouge1_for_question_rewriting": 60.2724, "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 45.0667, "eval_rouge1_for_task034_winogrande_question_rewriting": 10.7223, "eval_rouge1_for_task035_winogrande_question_rewriting": 88.9041, "eval_rouge1_for_task036_qasc_keyword_tagging": 62.5675, "eval_rouge1_for_task039_qasc_overlap_extraction": 41.0, "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, "eval_rouge1_for_task102_commongen_data_to_text": 64.4387, "eval_rouge1_for_task1152_bard_word_analogy": 30.0, "eval_rouge1_for_task1153_bard_word_analogy": 26.0, "eval_rouge1_for_task1154_bard_word_analogy": 23.0, "eval_rouge1_for_task1155_bard_word_analogy": 67.0, "eval_rouge1_for_task1156_bard_word_analogy": 38.0, "eval_rouge1_for_task1157_bard_word_analogy": 55.0, "eval_rouge1_for_task1158_bard_word_analogy": 40.0, "eval_rouge1_for_task1159_bard_word_analogy": 32.0, "eval_rouge1_for_task1161_coda_19_title_generation": 39.3063, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 76.7595, "eval_rouge1_for_task121_atomic_question_rewriting": 51.4637, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.6657, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.3707, "eval_rouge1_for_task1356_xlsum_title_generation": 24.7831, "eval_rouge1_for_task1358_xlsum_title_generation": 38.6924, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 55.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 65.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 72.3492, "eval_rouge1_for_task1407_dart_data_to_text": 29.4527, "eval_rouge1_for_task1409_dart_data_to_text": 47.9519, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.2734, "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 38.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 33.0, "eval_rouge1_for_task1540_peer_read_title_generation": 42.1621, "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6606, "eval_rouge1_for_task1562_zest_question_rewriting": 54.1988, "eval_rouge1_for_task1586_scifact_title_generation": 37.3005, "eval_rouge1_for_task1598_nyc_data_to_text": 54.1017, "eval_rouge1_for_task1612_sick_textual_entailment": 54.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.3568, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 94.781, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_rouge1_for_task1659_billsum_title_generation": 36.1848, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 67.619, "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.8459, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 41.0, "eval_rouge1_for_task200_multinli_textual_entailment": 66.0, "eval_rouge1_for_task201_multinli_textual_entailment": 25.0, "eval_rouge1_for_task202_multinli_textual_entailment": 17.0, "eval_rouge1_for_task219_rocstories_title_generation": 26.3794, "eval_rouge1_for_task220_rocstories_title_generation": 97.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rouge1_for_task232_iirc_answerability_classification": 61.0, "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 58.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 66.9667, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.4848, "eval_rouge1_for_task288_gigaword_title_generation": 32.8173, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 31.9667, "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, "eval_rouge1_for_task330_gap_coreference_resolution": 70.8238, "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.8122, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 49.3333, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.1944, "eval_rouge1_for_task418_persent_title_generation": 27.6127, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.6349, "eval_rouge1_for_task500_scruples_title_generation": 23.1421, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.0805, "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.0765, "eval_rouge1_for_task602_wikitext_title_generation": 13.383, "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 51.4291, "eval_rouge1_for_task619_ohsumed_title_generation": 47.1126, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.3667, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 84.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.09, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.4, "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.3655, "eval_rouge1_for_task671_ambigqa_question_rewriting": 24.0264, "eval_rouge1_for_task677_ollie_data_to_text": 35.4735, "eval_rouge1_for_task738_perspectrum_textual_entailment": 72.0, "eval_rouge1_for_task743_eurlex_title_generation": 37.3452, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2379, "eval_rouge1_for_task769_qed_title_generation": 78.7859, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 67.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, "eval_rouge1_for_task891_gap_coreference_resolution": 65.8667, "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, "eval_rouge1_for_task893_gap_coreference_resolution": 49.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rouge1_for_task957_e2e_data_to_text": 59.7767, "eval_rouge1_for_task970_sherliic_textual_entailment": 49.0, "eval_rouge1_for_textual_entailment": 44.5694, "eval_rouge1_for_title_generation": 39.1642, "eval_rouge1_for_word_analogy": 38.875, "eval_rougeL": 48.554, "eval_rougeL_for_answerability_classification": 56.0256, "eval_rougeL_for_cause_effect_classification": 60.6589, "eval_rougeL_for_coreference_resolution": 51.4316, "eval_rougeL_for_data_to_text": 46.9266, "eval_rougeL_for_dialogue_act_recognition": 50.7166, "eval_rougeL_for_grammar_error_correction": 61.1608, "eval_rougeL_for_keyword_tagging": 62.8619, "eval_rougeL_for_overlap_extraction": 43.2583, "eval_rougeL_for_question_rewriting": 56.255, "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 45.0667, "eval_rougeL_for_task034_winogrande_question_rewriting": 10.7223, "eval_rougeL_for_task035_winogrande_question_rewriting": 88.344, "eval_rougeL_for_task036_qasc_keyword_tagging": 62.4197, "eval_rougeL_for_task039_qasc_overlap_extraction": 41.0, "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.6276, "eval_rougeL_for_task1152_bard_word_analogy": 30.0, "eval_rougeL_for_task1153_bard_word_analogy": 26.0, "eval_rougeL_for_task1154_bard_word_analogy": 23.0, "eval_rougeL_for_task1155_bard_word_analogy": 67.0, "eval_rougeL_for_task1156_bard_word_analogy": 38.0, "eval_rougeL_for_task1157_bard_word_analogy": 55.0, "eval_rougeL_for_task1158_bard_word_analogy": 40.0, "eval_rougeL_for_task1159_bard_word_analogy": 32.0, "eval_rougeL_for_task1161_coda_19_title_generation": 32.7293, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 75.1506, "eval_rougeL_for_task121_atomic_question_rewriting": 44.9085, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.0252, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.525, "eval_rougeL_for_task1356_xlsum_title_generation": 21.6444, "eval_rougeL_for_task1358_xlsum_title_generation": 32.6945, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 55.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 65.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 72.3492, "eval_rougeL_for_task1407_dart_data_to_text": 24.5924, "eval_rougeL_for_task1409_dart_data_to_text": 41.2221, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.5259, "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 38.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 33.0, "eval_rougeL_for_task1540_peer_read_title_generation": 38.6567, "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7957, "eval_rougeL_for_task1562_zest_question_rewriting": 48.5434, "eval_rougeL_for_task1586_scifact_title_generation": 30.1577, "eval_rougeL_for_task1598_nyc_data_to_text": 41.3406, "eval_rougeL_for_task1612_sick_textual_entailment": 54.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.7249, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 94.5992, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_rougeL_for_task1659_billsum_title_generation": 31.5664, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 67.619, "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.7076, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 41.0, "eval_rougeL_for_task200_multinli_textual_entailment": 66.0, "eval_rougeL_for_task201_multinli_textual_entailment": 25.0, "eval_rougeL_for_task202_multinli_textual_entailment": 17.0, "eval_rougeL_for_task219_rocstories_title_generation": 26.3794, "eval_rougeL_for_task220_rocstories_title_generation": 97.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, "eval_rougeL_for_task232_iirc_answerability_classification": 61.0, "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 58.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 66.9667, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 45.5166, "eval_rougeL_for_task288_gigaword_title_generation": 28.4488, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 31.9667, "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, "eval_rougeL_for_task330_gap_coreference_resolution": 70.8238, "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.0277, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 49.3333, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.8052, "eval_rougeL_for_task418_persent_title_generation": 23.9172, "eval_rougeL_for_task442_com_qa_question_rewriting": 68.037, "eval_rougeL_for_task500_scruples_title_generation": 22.2317, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.5861, "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.6098, "eval_rougeL_for_task602_wikitext_title_generation": 13.1449, "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 46.5842, "eval_rougeL_for_task619_ohsumed_title_generation": 38.9981, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.9667, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 84.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 91.09, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.4, "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.9324, "eval_rougeL_for_task671_ambigqa_question_rewriting": 22.1115, "eval_rougeL_for_task677_ollie_data_to_text": 28.7504, "eval_rougeL_for_task738_perspectrum_textual_entailment": 72.0, "eval_rougeL_for_task743_eurlex_title_generation": 33.0671, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0129, "eval_rougeL_for_task769_qed_title_generation": 78.7859, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 67.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 54.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, "eval_rougeL_for_task891_gap_coreference_resolution": 65.8667, "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, "eval_rougeL_for_task893_gap_coreference_resolution": 49.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rougeL_for_task957_e2e_data_to_text": 45.9902, "eval_rougeL_for_task970_sherliic_textual_entailment": 49.0, "eval_rougeL_for_textual_entailment": 44.5694, "eval_rougeL_for_title_generation": 36.1289, "eval_rougeL_for_word_analogy": 38.875, "eval_runtime": 1011.0487, "eval_samples_per_second": 11.78, "eval_steps_per_second": 0.737, "step": 1000 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 1.028, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 30.9488, "eval_exact_match_for_answerability_classification": 52.9231, "eval_exact_match_for_cause_effect_classification": 40.8571, "eval_exact_match_for_coreference_resolution": 42.0714, "eval_exact_match_for_data_to_text": 7.2639, "eval_exact_match_for_dialogue_act_recognition": 50.5714, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 49.0, "eval_exact_match_for_overlap_extraction": 9.5, "eval_exact_match_for_question_rewriting": 2.7273, "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 36.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, "eval_exact_match_for_task050_multirc_answerability_classification": 66.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 32.0, "eval_exact_match_for_task1153_bard_word_analogy": 29.0, "eval_exact_match_for_task1154_bard_word_analogy": 27.0, "eval_exact_match_for_task1155_bard_word_analogy": 68.0, "eval_exact_match_for_task1156_bard_word_analogy": 42.0, "eval_exact_match_for_task1157_bard_word_analogy": 55.0, "eval_exact_match_for_task1158_bard_word_analogy": 38.0, "eval_exact_match_for_task1159_bard_word_analogy": 39.0, "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, "eval_exact_match_for_task1386_anli_textual_entailment": 1.0, "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, "eval_exact_match_for_task1388_cb_textual_entailment": 21.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 70.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 60.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 27.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 46.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 52.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 36.0, "eval_exact_match_for_task1615_sick_textual_entailment": 45.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 39.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 0.0, "eval_exact_match_for_task199_multinli_textual_entailment": 36.0, "eval_exact_match_for_task200_multinli_textual_entailment": 73.0, "eval_exact_match_for_task201_multinli_textual_entailment": 27.0, "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, "eval_exact_match_for_task219_rocstories_title_generation": 5.0, "eval_exact_match_for_task220_rocstories_title_generation": 90.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, "eval_exact_match_for_task233_iirc_answerability_classification": 40.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 58.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 1.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 14.0, "eval_exact_match_for_task329_gap_coreference_resolution": 46.0, "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 49.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 36.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 2.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 96.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 77.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 43.0, "eval_exact_match_for_task743_eurlex_title_generation": 3.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 68.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, "eval_exact_match_for_task891_gap_coreference_resolution": 59.0, "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_exact_match_for_task957_e2e_data_to_text": 1.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 70.0, "eval_exact_match_for_textual_entailment": 36.5833, "eval_exact_match_for_title_generation": 10.8744, "eval_exact_match_for_word_analogy": 41.25, "eval_f1": 48.7971, "eval_f1_for_answerability_classification": 55.4872, "eval_f1_for_cause_effect_classification": 61.7299, "eval_f1_for_coreference_resolution": 48.8048, "eval_f1_for_data_to_text": 53.2762, "eval_f1_for_dialogue_act_recognition": 54.1429, "eval_f1_for_grammar_error_correction": 57.2086, "eval_f1_for_keyword_tagging": 61.9694, "eval_f1_for_overlap_extraction": 34.1371, "eval_f1_for_question_rewriting": 70.3341, "eval_f1_for_task020_mctaco_answerability_classification": 53.0, "eval_f1_for_task033_winogrande_coreference_resolution": 55.3333, "eval_f1_for_task034_winogrande_question_rewriting": 87.9518, "eval_f1_for_task035_winogrande_question_rewriting": 89.0545, "eval_f1_for_task036_qasc_keyword_tagging": 68.4762, "eval_f1_for_task039_qasc_overlap_extraction": 22.6667, "eval_f1_for_task050_multirc_answerability_classification": 66.0, "eval_f1_for_task102_commongen_data_to_text": 55.5751, "eval_f1_for_task1152_bard_word_analogy": 32.0, "eval_f1_for_task1153_bard_word_analogy": 29.0, "eval_f1_for_task1154_bard_word_analogy": 27.0, "eval_f1_for_task1155_bard_word_analogy": 68.0, "eval_f1_for_task1156_bard_word_analogy": 42.0, "eval_f1_for_task1157_bard_word_analogy": 55.0, "eval_f1_for_task1158_bard_word_analogy": 38.0, "eval_f1_for_task1159_bard_word_analogy": 39.0, "eval_f1_for_task1161_coda_19_title_generation": 38.4165, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7669, "eval_f1_for_task121_atomic_question_rewriting": 48.6877, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.0329, "eval_f1_for_task1344_rte_textual_entailment": 54.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.5934, "eval_f1_for_task1356_xlsum_title_generation": 23.6523, "eval_f1_for_task1358_xlsum_title_generation": 33.3598, "eval_f1_for_task1385_anli_textual_entailment": 0.0, "eval_f1_for_task1386_anli_textual_entailment": 1.0, "eval_f1_for_task1387_anli_textual_entailment": 0.0, "eval_f1_for_task1388_cb_textual_entailment": 21.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, "eval_f1_for_task1393_copa_cause_effect_classification": 70.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 60.0, "eval_f1_for_task1407_dart_data_to_text": 34.7959, "eval_f1_for_task1409_dart_data_to_text": 47.6266, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.8591, "eval_f1_for_task1439_doqa_answerability_classification": 49.0, "eval_f1_for_task1442_doqa_answerability_classification": 53.0, "eval_f1_for_task1516_imppres_textual_entailment": 27.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, "eval_f1_for_task1540_peer_read_title_generation": 39.8249, "eval_f1_for_task1554_scitail_textual_entailment": 52.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.5581, "eval_f1_for_task1562_zest_question_rewriting": 48.9308, "eval_f1_for_task1586_scifact_title_generation": 36.301, "eval_f1_for_task1598_nyc_data_to_text": 51.8334, "eval_f1_for_task1612_sick_textual_entailment": 36.0, "eval_f1_for_task1615_sick_textual_entailment": 45.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5531, "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_f1_for_task1631_open_pi_data_to_text": 94.0887, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_f1_for_task1659_billsum_title_generation": 38.9582, "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.1714, "eval_f1_for_task1728_web_nlg_data_to_text": 60.4539, "eval_f1_for_task190_snli_textual_entailment": 0.0, "eval_f1_for_task199_multinli_textual_entailment": 36.0, "eval_f1_for_task200_multinli_textual_entailment": 73.0, "eval_f1_for_task201_multinli_textual_entailment": 27.0, "eval_f1_for_task202_multinli_textual_entailment": 24.0, "eval_f1_for_task219_rocstories_title_generation": 21.9445, "eval_f1_for_task220_rocstories_title_generation": 90.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_f1_for_task232_iirc_answerability_classification": 50.0, "eval_f1_for_task233_iirc_answerability_classification": 40.0, "eval_f1_for_task242_tweetqa_answerability_classification": 58.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.7333, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.6075, "eval_f1_for_task288_gigaword_title_generation": 29.6137, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 39.0667, "eval_f1_for_task329_gap_coreference_resolution": 46.0, "eval_f1_for_task330_gap_coreference_resolution": 68.8286, "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 31.071, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 37.1667, "eval_f1_for_task402_grailqa_question_rewriting": 81.774, "eval_f1_for_task418_persent_title_generation": 28.9196, "eval_f1_for_task442_com_qa_question_rewriting": 71.1376, "eval_f1_for_task500_scruples_title_generation": 21.9018, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 42.6566, "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, "eval_f1_for_task569_recipe_nlg_title_generation": 39.0272, "eval_f1_for_task602_wikitext_title_generation": 15.1685, "eval_f1_for_task613_liar_keyword_tagging": 20.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 48.7049, "eval_f1_for_task619_ohsumed_title_generation": 46.9236, "eval_f1_for_task620_ohsumed_keyword_tagging": 33.2, "eval_f1_for_task623_ohsumed_keyword_tagging": 96.0, "eval_f1_for_task640_e_snli_textual_entailment": 30.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 39.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 91.5043, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.0, "eval_f1_for_task670_ambigqa_question_rewriting": 79.7693, "eval_f1_for_task671_ambigqa_question_rewriting": 68.4555, "eval_f1_for_task677_ollie_data_to_text": 35.2353, "eval_f1_for_task738_perspectrum_textual_entailment": 43.0, "eval_f1_for_task743_eurlex_title_generation": 38.6046, "eval_f1_for_task760_msr_sqa_data_to_text": 7.468, "eval_f1_for_task769_qed_title_generation": 85.8763, "eval_f1_for_task827_copa_cause_effect_classification": 50.0, "eval_f1_for_task828_copa_cause_effect_classification": 66.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_f1_for_task890_gwsd_textual_entailment": 37.0, "eval_f1_for_task891_gap_coreference_resolution": 65.9667, "eval_f1_for_task892_gap_coreference_resolution": 50.0, "eval_f1_for_task893_gap_coreference_resolution": 31.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_f1_for_task957_e2e_data_to_text": 58.5107, "eval_f1_for_task970_sherliic_textual_entailment": 70.0, "eval_f1_for_textual_entailment": 36.5833, "eval_f1_for_title_generation": 38.215, "eval_f1_for_word_analogy": 41.25, "eval_gen_len": 9.3165, "eval_global_step": 1500, "eval_loss": 1.1518067121505737, "eval_rouge1": 51.912, "eval_rouge1_for_answerability_classification": 55.4872, "eval_rouge1_for_cause_effect_classification": 62.6493, "eval_rouge1_for_coreference_resolution": 49.3997, "eval_rouge1_for_data_to_text": 56.4622, "eval_rouge1_for_dialogue_act_recognition": 58.1587, "eval_rouge1_for_grammar_error_correction": 62.232, "eval_rouge1_for_keyword_tagging": 66.8608, "eval_rouge1_for_overlap_extraction": 37.111, "eval_rouge1_for_question_rewriting": 71.9559, "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 88.108, "eval_rouge1_for_task035_winogrande_question_rewriting": 89.647, "eval_rouge1_for_task036_qasc_keyword_tagging": 74.6661, "eval_rouge1_for_task039_qasc_overlap_extraction": 27.5, "eval_rouge1_for_task050_multirc_answerability_classification": 66.0, "eval_rouge1_for_task102_commongen_data_to_text": 69.8821, "eval_rouge1_for_task1152_bard_word_analogy": 32.0, "eval_rouge1_for_task1153_bard_word_analogy": 31.0, "eval_rouge1_for_task1154_bard_word_analogy": 27.0, "eval_rouge1_for_task1155_bard_word_analogy": 68.0, "eval_rouge1_for_task1156_bard_word_analogy": 42.0, "eval_rouge1_for_task1157_bard_word_analogy": 55.0, "eval_rouge1_for_task1158_bard_word_analogy": 38.0, "eval_rouge1_for_task1159_bard_word_analogy": 39.0, "eval_rouge1_for_task1161_coda_19_title_generation": 42.6332, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.9155, "eval_rouge1_for_task121_atomic_question_rewriting": 51.4376, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.8378, "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.8992, "eval_rouge1_for_task1356_xlsum_title_generation": 27.0428, "eval_rouge1_for_task1358_xlsum_title_generation": 37.9838, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 70.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 72.7778, "eval_rouge1_for_task1407_dart_data_to_text": 35.7785, "eval_rouge1_for_task1409_dart_data_to_text": 48.7368, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8574, "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 27.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, "eval_rouge1_for_task1540_peer_read_title_generation": 42.8272, "eval_rouge1_for_task1554_scitail_textual_entailment": 52.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6066, "eval_rouge1_for_task1562_zest_question_rewriting": 51.7885, "eval_rouge1_for_task1586_scifact_title_generation": 40.1626, "eval_rouge1_for_task1598_nyc_data_to_text": 54.1319, "eval_rouge1_for_task1612_sick_textual_entailment": 36.0, "eval_rouge1_for_task1615_sick_textual_entailment": 81.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.9472, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 94.3135, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_rouge1_for_task1659_billsum_title_generation": 41.1682, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.1714, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.4147, "eval_rouge1_for_task190_snli_textual_entailment": 0.0, "eval_rouge1_for_task199_multinli_textual_entailment": 36.0, "eval_rouge1_for_task200_multinli_textual_entailment": 73.0, "eval_rouge1_for_task201_multinli_textual_entailment": 27.0, "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, "eval_rouge1_for_task219_rocstories_title_generation": 24.8174, "eval_rouge1_for_task220_rocstories_title_generation": 90.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, "eval_rouge1_for_task233_iirc_answerability_classification": 40.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 58.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.4, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.722, "eval_rouge1_for_task288_gigaword_title_generation": 33.1476, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 39.9667, "eval_rouge1_for_task329_gap_coreference_resolution": 46.0, "eval_rouge1_for_task330_gap_coreference_resolution": 68.7571, "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.5559, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.5, "eval_rouge1_for_task402_grailqa_question_rewriting": 84.1383, "eval_rouge1_for_task418_persent_title_generation": 32.7467, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.4565, "eval_rouge1_for_task500_scruples_title_generation": 23.3076, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.3709, "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.2396, "eval_rouge1_for_task602_wikitext_title_generation": 15.8367, "eval_rouge1_for_task613_liar_keyword_tagging": 32.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 54.6559, "eval_rouge1_for_task619_ohsumed_title_generation": 50.6789, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.1333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 96.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.0043, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 11.1667, "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.8278, "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.349, "eval_rouge1_for_task677_ollie_data_to_text": 38.549, "eval_rouge1_for_task738_perspectrum_textual_entailment": 89.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.5448, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.5547, "eval_rouge1_for_task769_qed_title_generation": 85.7772, "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, "eval_rouge1_for_task891_gap_coreference_resolution": 66.3, "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rouge1_for_task957_e2e_data_to_text": 60.6068, "eval_rouge1_for_task970_sherliic_textual_entailment": 70.0, "eval_rouge1_for_textual_entailment": 44.8611, "eval_rouge1_for_title_generation": 40.616, "eval_rouge1_for_word_analogy": 41.5, "eval_rougeL": 50.4083, "eval_rougeL_for_answerability_classification": 55.4872, "eval_rougeL_for_cause_effect_classification": 62.0993, "eval_rougeL_for_coreference_resolution": 49.3997, "eval_rougeL_for_data_to_text": 48.3117, "eval_rougeL_for_dialogue_act_recognition": 58.1587, "eval_rougeL_for_grammar_error_correction": 61.5416, "eval_rougeL_for_keyword_tagging": 66.4408, "eval_rougeL_for_overlap_extraction": 36.0229, "eval_rougeL_for_question_rewriting": 68.1701, "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 87.8166, "eval_rougeL_for_task035_winogrande_question_rewriting": 89.5845, "eval_rougeL_for_task036_qasc_keyword_tagging": 73.8994, "eval_rougeL_for_task039_qasc_overlap_extraction": 27.5, "eval_rougeL_for_task050_multirc_answerability_classification": 66.0, "eval_rougeL_for_task102_commongen_data_to_text": 59.3213, "eval_rougeL_for_task1152_bard_word_analogy": 32.0, "eval_rougeL_for_task1153_bard_word_analogy": 31.0, "eval_rougeL_for_task1154_bard_word_analogy": 27.0, "eval_rougeL_for_task1155_bard_word_analogy": 68.0, "eval_rougeL_for_task1156_bard_word_analogy": 42.0, "eval_rougeL_for_task1157_bard_word_analogy": 55.0, "eval_rougeL_for_task1158_bard_word_analogy": 38.0, "eval_rougeL_for_task1159_bard_word_analogy": 39.0, "eval_rougeL_for_task1161_coda_19_title_generation": 35.161, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1995, "eval_rougeL_for_task121_atomic_question_rewriting": 45.0345, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.9282, "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.3422, "eval_rougeL_for_task1356_xlsum_title_generation": 23.2099, "eval_rougeL_for_task1358_xlsum_title_generation": 32.1584, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 70.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 72.7778, "eval_rougeL_for_task1407_dart_data_to_text": 28.8067, "eval_rougeL_for_task1409_dart_data_to_text": 41.3613, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3415, "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 27.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 57.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 46.0, "eval_rougeL_for_task1540_peer_read_title_generation": 38.7312, "eval_rougeL_for_task1554_scitail_textual_entailment": 52.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7417, "eval_rougeL_for_task1562_zest_question_rewriting": 44.8887, "eval_rougeL_for_task1586_scifact_title_generation": 33.0278, "eval_rougeL_for_task1598_nyc_data_to_text": 42.3314, "eval_rougeL_for_task1612_sick_textual_entailment": 36.0, "eval_rougeL_for_task1615_sick_textual_entailment": 81.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.6828, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 94.1537, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.9158, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.1714, "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.2268, "eval_rougeL_for_task190_snli_textual_entailment": 0.0, "eval_rougeL_for_task199_multinli_textual_entailment": 36.0, "eval_rougeL_for_task200_multinli_textual_entailment": 73.0, "eval_rougeL_for_task201_multinli_textual_entailment": 27.0, "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, "eval_rougeL_for_task219_rocstories_title_generation": 24.8174, "eval_rougeL_for_task220_rocstories_title_generation": 90.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, "eval_rougeL_for_task233_iirc_answerability_classification": 40.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 58.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.4, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.5459, "eval_rougeL_for_task288_gigaword_title_generation": 28.674, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 39.9667, "eval_rougeL_for_task329_gap_coreference_resolution": 46.0, "eval_rougeL_for_task330_gap_coreference_resolution": 68.7571, "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.8652, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.5, "eval_rougeL_for_task402_grailqa_question_rewriting": 67.7732, "eval_rougeL_for_task418_persent_title_generation": 28.5022, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2962, "eval_rougeL_for_task500_scruples_title_generation": 21.9897, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.7477, "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.3665, "eval_rougeL_for_task602_wikitext_title_generation": 15.8367, "eval_rougeL_for_task613_liar_keyword_tagging": 32.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 51.4965, "eval_rougeL_for_task619_ohsumed_title_generation": 43.0529, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.8, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 96.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.0043, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 11.1667, "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.4445, "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.8089, "eval_rougeL_for_task677_ollie_data_to_text": 30.1132, "eval_rougeL_for_task738_perspectrum_textual_entailment": 89.0, "eval_rougeL_for_task743_eurlex_title_generation": 35.6043, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.4194, "eval_rougeL_for_task769_qed_title_generation": 85.7772, "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 60.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 49.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, "eval_rougeL_for_task891_gap_coreference_resolution": 66.3, "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_rougeL_for_task957_e2e_data_to_text": 46.0713, "eval_rougeL_for_task970_sherliic_textual_entailment": 70.0, "eval_rougeL_for_textual_entailment": 44.8611, "eval_rougeL_for_title_generation": 37.2179, "eval_rougeL_for_word_analogy": 41.5, "eval_runtime": 886.217, "eval_samples_per_second": 13.439, "eval_steps_per_second": 0.841, "step": 1500 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 1.0086, "step": 2000 }, { "epoch": 0.44, "eval_exact_match": 32.5693, "eval_exact_match_for_answerability_classification": 55.8462, "eval_exact_match_for_cause_effect_classification": 49.7143, "eval_exact_match_for_coreference_resolution": 46.0, "eval_exact_match_for_data_to_text": 7.2639, "eval_exact_match_for_dialogue_act_recognition": 47.2857, "eval_exact_match_for_grammar_error_correction": 7.0, "eval_exact_match_for_keyword_tagging": 51.6, "eval_exact_match_for_overlap_extraction": 12.0, "eval_exact_match_for_question_rewriting": 3.5455, "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 39.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, "eval_exact_match_for_task050_multirc_answerability_classification": 73.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 35.0, "eval_exact_match_for_task1153_bard_word_analogy": 32.0, "eval_exact_match_for_task1154_bard_word_analogy": 28.0, "eval_exact_match_for_task1155_bard_word_analogy": 93.0, "eval_exact_match_for_task1156_bard_word_analogy": 39.0, "eval_exact_match_for_task1157_bard_word_analogy": 68.0, "eval_exact_match_for_task1158_bard_word_analogy": 51.0, "eval_exact_match_for_task1159_bard_word_analogy": 32.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 16.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 1.0, "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, "eval_exact_match_for_task1387_anli_textual_entailment": 2.0, "eval_exact_match_for_task1388_cb_textual_entailment": 18.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 80.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 1.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 51.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 53.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 55.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 38.0, "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 53.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_exact_match_for_task1659_billsum_title_generation": 2.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 40.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 5.0, "eval_exact_match_for_task199_multinli_textual_entailment": 43.0, "eval_exact_match_for_task200_multinli_textual_entailment": 80.0, "eval_exact_match_for_task201_multinli_textual_entailment": 15.0, "eval_exact_match_for_task202_multinli_textual_entailment": 16.0, "eval_exact_match_for_task219_rocstories_title_generation": 6.0, "eval_exact_match_for_task220_rocstories_title_generation": 97.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 88.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 65.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 57.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 0.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 51.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 20.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 95.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 41.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 32.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 17.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 41.0, "eval_exact_match_for_task743_eurlex_title_generation": 1.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 60.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 81.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 70.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, "eval_exact_match_for_task893_gap_coreference_resolution": 52.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 74.0, "eval_exact_match_for_textual_entailment": 36.4583, "eval_exact_match_for_title_generation": 10.2018, "eval_exact_match_for_word_analogy": 47.25, "eval_f1": 48.628, "eval_f1_for_answerability_classification": 58.4103, "eval_f1_for_cause_effect_classification": 68.7473, "eval_f1_for_coreference_resolution": 51.5515, "eval_f1_for_data_to_text": 50.3659, "eval_f1_for_dialogue_act_recognition": 50.3571, "eval_f1_for_grammar_error_correction": 56.7643, "eval_f1_for_keyword_tagging": 63.8965, "eval_f1_for_overlap_extraction": 28.9575, "eval_f1_for_question_rewriting": 60.295, "eval_f1_for_task020_mctaco_answerability_classification": 53.0, "eval_f1_for_task033_winogrande_coreference_resolution": 56.0, "eval_f1_for_task034_winogrande_question_rewriting": 12.6407, "eval_f1_for_task035_winogrande_question_rewriting": 89.1751, "eval_f1_for_task036_qasc_keyword_tagging": 65.4347, "eval_f1_for_task039_qasc_overlap_extraction": 32.5, "eval_f1_for_task050_multirc_answerability_classification": 73.0, "eval_f1_for_task102_commongen_data_to_text": 51.3894, "eval_f1_for_task1152_bard_word_analogy": 35.0, "eval_f1_for_task1153_bard_word_analogy": 32.0, "eval_f1_for_task1154_bard_word_analogy": 28.0, "eval_f1_for_task1155_bard_word_analogy": 93.0, "eval_f1_for_task1156_bard_word_analogy": 39.0, "eval_f1_for_task1157_bard_word_analogy": 68.0, "eval_f1_for_task1158_bard_word_analogy": 51.0, "eval_f1_for_task1159_bard_word_analogy": 32.0, "eval_f1_for_task1161_coda_19_title_generation": 35.0684, "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.1069, "eval_f1_for_task121_atomic_question_rewriting": 47.5678, "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 12.4092, "eval_f1_for_task1344_rte_textual_entailment": 51.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.3838, "eval_f1_for_task1356_xlsum_title_generation": 21.7507, "eval_f1_for_task1358_xlsum_title_generation": 34.7803, "eval_f1_for_task1385_anli_textual_entailment": 1.0, "eval_f1_for_task1386_anli_textual_entailment": 0.0, "eval_f1_for_task1387_anli_textual_entailment": 2.0, "eval_f1_for_task1388_cb_textual_entailment": 18.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_f1_for_task1393_copa_cause_effect_classification": 80.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 49.0, "eval_f1_for_task1407_dart_data_to_text": 32.2357, "eval_f1_for_task1409_dart_data_to_text": 49.5846, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.7759, "eval_f1_for_task1439_doqa_answerability_classification": 51.0, "eval_f1_for_task1442_doqa_answerability_classification": 52.0, "eval_f1_for_task1516_imppres_textual_entailment": 33.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_f1_for_task1540_peer_read_title_generation": 37.1614, "eval_f1_for_task1554_scitail_textual_entailment": 55.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7527, "eval_f1_for_task1562_zest_question_rewriting": 50.6264, "eval_f1_for_task1586_scifact_title_generation": 34.7137, "eval_f1_for_task1598_nyc_data_to_text": 48.4418, "eval_f1_for_task1612_sick_textual_entailment": 38.0, "eval_f1_for_task1615_sick_textual_entailment": 49.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.402, "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_f1_for_task1631_open_pi_data_to_text": 89.2479, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_f1_for_task1659_billsum_title_generation": 34.8641, "eval_f1_for_task1664_wino_bias_coreference_resolution": 76.7143, "eval_f1_for_task1728_web_nlg_data_to_text": 60.5008, "eval_f1_for_task190_snli_textual_entailment": 5.0, "eval_f1_for_task199_multinli_textual_entailment": 43.0, "eval_f1_for_task200_multinli_textual_entailment": 80.0, "eval_f1_for_task201_multinli_textual_entailment": 15.0, "eval_f1_for_task202_multinli_textual_entailment": 16.0, "eval_f1_for_task219_rocstories_title_generation": 20.7356, "eval_f1_for_task220_rocstories_title_generation": 97.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_f1_for_task232_iirc_answerability_classification": 47.0, "eval_f1_for_task233_iirc_answerability_classification": 47.0, "eval_f1_for_task242_tweetqa_answerability_classification": 88.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 73.65, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 25.4151, "eval_f1_for_task288_gigaword_title_generation": 28.4976, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.4667, "eval_f1_for_task329_gap_coreference_resolution": 53.0, "eval_f1_for_task330_gap_coreference_resolution": 67.2571, "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 78.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.1557, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.0, "eval_f1_for_task402_grailqa_question_rewriting": 74.8723, "eval_f1_for_task418_persent_title_generation": 27.596, "eval_f1_for_task442_com_qa_question_rewriting": 70.0985, "eval_f1_for_task500_scruples_title_generation": 21.3575, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.4634, "eval_f1_for_task520_aquamuse_answerability_classification": 51.0, "eval_f1_for_task569_recipe_nlg_title_generation": 39.0628, "eval_f1_for_task602_wikitext_title_generation": 13.6254, "eval_f1_for_task613_liar_keyword_tagging": 22.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 49.0753, "eval_f1_for_task619_ohsumed_title_generation": 41.4911, "eval_f1_for_task620_ohsumed_keyword_tagging": 43.6333, "eval_f1_for_task623_ohsumed_keyword_tagging": 95.0, "eval_f1_for_task640_e_snli_textual_entailment": 41.0, "eval_f1_for_task641_e_snli_textual_entailment": 33.0, "eval_f1_for_task642_e_snli_textual_entailment": 32.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.7476, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.3333, "eval_f1_for_task670_ambigqa_question_rewriting": 78.316, "eval_f1_for_task671_ambigqa_question_rewriting": 38.055, "eval_f1_for_task677_ollie_data_to_text": 26.3887, "eval_f1_for_task738_perspectrum_textual_entailment": 41.0, "eval_f1_for_task743_eurlex_title_generation": 38.8254, "eval_f1_for_task760_msr_sqa_data_to_text": 6.1938, "eval_f1_for_task769_qed_title_generation": 76.3913, "eval_f1_for_task827_copa_cause_effect_classification": 81.0, "eval_f1_for_task828_copa_cause_effect_classification": 70.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.0, "eval_f1_for_task890_gwsd_textual_entailment": 36.0, "eval_f1_for_task891_gap_coreference_resolution": 64.3, "eval_f1_for_task892_gap_coreference_resolution": 52.0, "eval_f1_for_task893_gap_coreference_resolution": 52.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 56.6228, "eval_f1_for_task970_sherliic_textual_entailment": 74.0, "eval_f1_for_textual_entailment": 36.4583, "eval_f1_for_title_generation": 36.6375, "eval_f1_for_word_analogy": 47.25, "eval_gen_len": 8.5377, "eval_global_step": 2000, "eval_loss": 1.1825075149536133, "eval_rouge1": 51.6979, "eval_rouge1_for_answerability_classification": 58.4103, "eval_rouge1_for_cause_effect_classification": 69.8435, "eval_rouge1_for_coreference_resolution": 52.1668, "eval_rouge1_for_data_to_text": 53.2583, "eval_rouge1_for_dialogue_act_recognition": 55.2, "eval_rouge1_for_grammar_error_correction": 61.6428, "eval_rouge1_for_keyword_tagging": 69.326, "eval_rouge1_for_overlap_extraction": 31.3019, "eval_rouge1_for_question_rewriting": 61.9636, "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 56.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 12.6084, "eval_rouge1_for_task035_winogrande_question_rewriting": 89.6894, "eval_rouge1_for_task036_qasc_keyword_tagging": 74.3966, "eval_rouge1_for_task039_qasc_overlap_extraction": 36.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 73.0, "eval_rouge1_for_task102_commongen_data_to_text": 65.805, "eval_rouge1_for_task1152_bard_word_analogy": 35.0, "eval_rouge1_for_task1153_bard_word_analogy": 32.0, "eval_rouge1_for_task1154_bard_word_analogy": 28.0, "eval_rouge1_for_task1155_bard_word_analogy": 93.0, "eval_rouge1_for_task1156_bard_word_analogy": 39.0, "eval_rouge1_for_task1157_bard_word_analogy": 68.0, "eval_rouge1_for_task1158_bard_word_analogy": 51.0, "eval_rouge1_for_task1159_bard_word_analogy": 32.0, "eval_rouge1_for_task1161_coda_19_title_generation": 39.3688, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.2858, "eval_rouge1_for_task121_atomic_question_rewriting": 50.1911, "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 13.3663, "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.5026, "eval_rouge1_for_task1356_xlsum_title_generation": 25.7794, "eval_rouge1_for_task1358_xlsum_title_generation": 39.1358, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 80.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 54.9, "eval_rouge1_for_task1407_dart_data_to_text": 32.8903, "eval_rouge1_for_task1409_dart_data_to_text": 50.1621, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.206, "eval_rouge1_for_task1439_doqa_answerability_classification": 51.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 53.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1540_peer_read_title_generation": 40.3048, "eval_rouge1_for_task1554_scitail_textual_entailment": 55.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0795, "eval_rouge1_for_task1562_zest_question_rewriting": 52.9742, "eval_rouge1_for_task1586_scifact_title_generation": 38.9634, "eval_rouge1_for_task1598_nyc_data_to_text": 49.9892, "eval_rouge1_for_task1612_sick_textual_entailment": 38.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.7977, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 89.5518, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_rouge1_for_task1659_billsum_title_generation": 36.5339, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 76.7143, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.354, "eval_rouge1_for_task190_snli_textual_entailment": 5.0, "eval_rouge1_for_task199_multinli_textual_entailment": 43.0, "eval_rouge1_for_task200_multinli_textual_entailment": 80.0, "eval_rouge1_for_task201_multinli_textual_entailment": 15.0, "eval_rouge1_for_task202_multinli_textual_entailment": 16.0, "eval_rouge1_for_task219_rocstories_title_generation": 23.1462, "eval_rouge1_for_task220_rocstories_title_generation": 97.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 88.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 74.3167, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 26.2705, "eval_rouge1_for_task288_gigaword_title_generation": 31.6224, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 8.4667, "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.1857, "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 78.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.9741, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.0, "eval_rouge1_for_task402_grailqa_question_rewriting": 77.5341, "eval_rouge1_for_task418_persent_title_generation": 31.1757, "eval_rouge1_for_task442_com_qa_question_rewriting": 73.3904, "eval_rouge1_for_task500_scruples_title_generation": 22.8908, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.1014, "eval_rouge1_for_task520_aquamuse_answerability_classification": 51.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.9904, "eval_rouge1_for_task602_wikitext_title_generation": 14.4861, "eval_rouge1_for_task613_liar_keyword_tagging": 35.4667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 55.9305, "eval_rouge1_for_task619_ohsumed_title_generation": 44.7968, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.519, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 95.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 41.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 32.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.2476, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 23.1667, "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.7685, "eval_rouge1_for_task671_ambigqa_question_rewriting": 39.8578, "eval_rouge1_for_task677_ollie_data_to_text": 29.1317, "eval_rouge1_for_task738_perspectrum_textual_entailment": 80.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.5519, "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.4083, "eval_rouge1_for_task769_qed_title_generation": 76.3163, "eval_rouge1_for_task827_copa_cause_effect_classification": 81.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 70.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, "eval_rouge1_for_task891_gap_coreference_resolution": 64.4857, "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, "eval_rouge1_for_task893_gap_coreference_resolution": 52.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rouge1_for_task957_e2e_data_to_text": 58.3632, "eval_rouge1_for_task970_sherliic_textual_entailment": 74.0, "eval_rouge1_for_textual_entailment": 44.375, "eval_rouge1_for_title_generation": 38.9693, "eval_rouge1_for_word_analogy": 47.25, "eval_rougeL": 50.2785, "eval_rougeL_for_answerability_classification": 58.4103, "eval_rougeL_for_cause_effect_classification": 69.0917, "eval_rougeL_for_coreference_resolution": 52.1668, "eval_rougeL_for_data_to_text": 45.9878, "eval_rougeL_for_dialogue_act_recognition": 55.2, "eval_rougeL_for_grammar_error_correction": 60.8051, "eval_rougeL_for_keyword_tagging": 68.9248, "eval_rougeL_for_overlap_extraction": 30.9575, "eval_rougeL_for_question_rewriting": 58.3781, "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 56.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 12.6084, "eval_rougeL_for_task035_winogrande_question_rewriting": 89.2846, "eval_rougeL_for_task036_qasc_keyword_tagging": 73.8906, "eval_rougeL_for_task039_qasc_overlap_extraction": 36.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 73.0, "eval_rougeL_for_task102_commongen_data_to_text": 56.4687, "eval_rougeL_for_task1152_bard_word_analogy": 35.0, "eval_rougeL_for_task1153_bard_word_analogy": 32.0, "eval_rougeL_for_task1154_bard_word_analogy": 28.0, "eval_rougeL_for_task1155_bard_word_analogy": 93.0, "eval_rougeL_for_task1156_bard_word_analogy": 39.0, "eval_rougeL_for_task1157_bard_word_analogy": 68.0, "eval_rougeL_for_task1158_bard_word_analogy": 51.0, "eval_rougeL_for_task1159_bard_word_analogy": 32.0, "eval_rougeL_for_task1161_coda_19_title_generation": 32.3287, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.4262, "eval_rougeL_for_task121_atomic_question_rewriting": 45.0243, "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 12.4048, "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.5559, "eval_rougeL_for_task1356_xlsum_title_generation": 21.7047, "eval_rougeL_for_task1358_xlsum_title_generation": 32.8543, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 80.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 54.9, "eval_rougeL_for_task1407_dart_data_to_text": 29.398, "eval_rougeL_for_task1409_dart_data_to_text": 42.2821, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.4364, "eval_rougeL_for_task1439_doqa_answerability_classification": 51.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 53.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1540_peer_read_title_generation": 36.0368, "eval_rougeL_for_task1554_scitail_textual_entailment": 55.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.1738, "eval_rougeL_for_task1562_zest_question_rewriting": 46.9538, "eval_rougeL_for_task1586_scifact_title_generation": 31.6646, "eval_rougeL_for_task1598_nyc_data_to_text": 39.3689, "eval_rougeL_for_task1612_sick_textual_entailment": 38.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.9553, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 88.2793, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, "eval_rougeL_for_task1659_billsum_title_generation": 31.4539, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 76.7143, "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.7552, "eval_rougeL_for_task190_snli_textual_entailment": 5.0, "eval_rougeL_for_task199_multinli_textual_entailment": 43.0, "eval_rougeL_for_task200_multinli_textual_entailment": 80.0, "eval_rougeL_for_task201_multinli_textual_entailment": 15.0, "eval_rougeL_for_task202_multinli_textual_entailment": 16.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.1462, "eval_rougeL_for_task220_rocstories_title_generation": 97.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 88.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 74.3167, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 25.5818, "eval_rougeL_for_task288_gigaword_title_generation": 28.0445, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 8.4667, "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.1857, "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 78.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9003, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.0, "eval_rougeL_for_task402_grailqa_question_rewriting": 64.2308, "eval_rougeL_for_task418_persent_title_generation": 26.5542, "eval_rougeL_for_task442_com_qa_question_rewriting": 68.1779, "eval_rougeL_for_task500_scruples_title_generation": 21.623, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.8344, "eval_rougeL_for_task520_aquamuse_answerability_classification": 51.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.6379, "eval_rougeL_for_task602_wikitext_title_generation": 14.4861, "eval_rougeL_for_task613_liar_keyword_tagging": 35.4667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 51.7416, "eval_rougeL_for_task619_ohsumed_title_generation": 35.3046, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.019, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 95.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 41.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 32.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.2476, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 23.1667, "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.5185, "eval_rougeL_for_task671_ambigqa_question_rewriting": 36.4235, "eval_rougeL_for_task677_ollie_data_to_text": 24.3758, "eval_rougeL_for_task738_perspectrum_textual_entailment": 80.0, "eval_rougeL_for_task743_eurlex_title_generation": 36.2116, "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.6061, "eval_rougeL_for_task769_qed_title_generation": 76.3163, "eval_rougeL_for_task827_copa_cause_effect_classification": 81.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 70.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 48.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, "eval_rougeL_for_task891_gap_coreference_resolution": 64.4857, "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, "eval_rougeL_for_task893_gap_coreference_resolution": 52.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_rougeL_for_task957_e2e_data_to_text": 44.4738, "eval_rougeL_for_task970_sherliic_textual_entailment": 74.0, "eval_rougeL_for_textual_entailment": 44.375, "eval_rougeL_for_title_generation": 35.6104, "eval_rougeL_for_word_analogy": 47.25, "eval_runtime": 798.8387, "eval_samples_per_second": 14.909, "eval_steps_per_second": 0.933, "step": 2000 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 0.9691, "step": 2500 }, { "epoch": 0.55, "eval_exact_match": 34.2485, "eval_exact_match_for_answerability_classification": 55.5385, "eval_exact_match_for_cause_effect_classification": 45.1429, "eval_exact_match_for_coreference_resolution": 46.2857, "eval_exact_match_for_data_to_text": 9.5642, "eval_exact_match_for_dialogue_act_recognition": 50.4286, "eval_exact_match_for_grammar_error_correction": 6.5, "eval_exact_match_for_keyword_tagging": 49.8, "eval_exact_match_for_overlap_extraction": 12.0, "eval_exact_match_for_question_rewriting": 3.5455, "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 60.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 12.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 29.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 24.0, "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 31.0, "eval_exact_match_for_task1153_bard_word_analogy": 29.0, "eval_exact_match_for_task1154_bard_word_analogy": 28.0, "eval_exact_match_for_task1155_bard_word_analogy": 90.0, "eval_exact_match_for_task1156_bard_word_analogy": 49.0, "eval_exact_match_for_task1157_bard_word_analogy": 68.0, "eval_exact_match_for_task1158_bard_word_analogy": 32.0, "eval_exact_match_for_task1159_bard_word_analogy": 38.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 49.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 55.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, "eval_exact_match_for_task1386_anli_textual_entailment": 31.0, "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 57.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 80.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 62.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 37.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 54.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 69.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 35.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 50.0, "eval_exact_match_for_task199_multinli_textual_entailment": 39.0, "eval_exact_match_for_task200_multinli_textual_entailment": 88.0, "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, "eval_exact_match_for_task202_multinli_textual_entailment": 20.0, "eval_exact_match_for_task219_rocstories_title_generation": 1.0, "eval_exact_match_for_task220_rocstories_title_generation": 98.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_exact_match_for_task232_iirc_answerability_classification": 38.0, "eval_exact_match_for_task233_iirc_answerability_classification": 34.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 78.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 66.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 60.0, "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 65.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 51.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 30.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, "eval_exact_match_for_task418_persent_title_generation": 0.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 0.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 54.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 88.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 44.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 32.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 12.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 39.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 65.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 61.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 74.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, "eval_exact_match_for_task891_gap_coreference_resolution": 56.0, "eval_exact_match_for_task892_gap_coreference_resolution": 53.0, "eval_exact_match_for_task893_gap_coreference_resolution": 57.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, "eval_exact_match_for_textual_entailment": 45.2083, "eval_exact_match_for_title_generation": 10.426, "eval_exact_match_for_word_analogy": 45.625, "eval_f1": 51.6895, "eval_f1_for_answerability_classification": 58.1026, "eval_f1_for_cause_effect_classification": 65.4526, "eval_f1_for_coreference_resolution": 53.1791, "eval_f1_for_data_to_text": 52.6688, "eval_f1_for_dialogue_act_recognition": 52.9286, "eval_f1_for_grammar_error_correction": 60.1938, "eval_f1_for_keyword_tagging": 62.4516, "eval_f1_for_overlap_extraction": 32.997, "eval_f1_for_question_rewriting": 68.973, "eval_f1_for_task020_mctaco_answerability_classification": 55.0, "eval_f1_for_task033_winogrande_coreference_resolution": 61.3333, "eval_f1_for_task034_winogrande_question_rewriting": 86.7398, "eval_f1_for_task035_winogrande_question_rewriting": 87.0801, "eval_f1_for_task036_qasc_keyword_tagging": 65.2342, "eval_f1_for_task039_qasc_overlap_extraction": 30.5, "eval_f1_for_task050_multirc_answerability_classification": 69.0, "eval_f1_for_task102_commongen_data_to_text": 54.9571, "eval_f1_for_task1152_bard_word_analogy": 31.0, "eval_f1_for_task1153_bard_word_analogy": 30.3333, "eval_f1_for_task1154_bard_word_analogy": 28.0, "eval_f1_for_task1155_bard_word_analogy": 90.0, "eval_f1_for_task1156_bard_word_analogy": 51.6667, "eval_f1_for_task1157_bard_word_analogy": 68.0, "eval_f1_for_task1158_bard_word_analogy": 32.0, "eval_f1_for_task1159_bard_word_analogy": 38.0, "eval_f1_for_task1161_coda_19_title_generation": 38.1923, "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.0997, "eval_f1_for_task121_atomic_question_rewriting": 45.3274, "eval_f1_for_task133_winowhy_coreference_resolution": 49.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7497, "eval_f1_for_task1344_rte_textual_entailment": 55.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.219, "eval_f1_for_task1356_xlsum_title_generation": 23.3855, "eval_f1_for_task1358_xlsum_title_generation": 35.3216, "eval_f1_for_task1385_anli_textual_entailment": 33.0, "eval_f1_for_task1386_anli_textual_entailment": 31.0, "eval_f1_for_task1387_anli_textual_entailment": 37.0, "eval_f1_for_task1388_cb_textual_entailment": 40.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_f1_for_task1393_copa_cause_effect_classification": 80.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 62.0, "eval_f1_for_task1407_dart_data_to_text": 32.8683, "eval_f1_for_task1409_dart_data_to_text": 48.2031, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.9791, "eval_f1_for_task1439_doqa_answerability_classification": 48.0, "eval_f1_for_task1442_doqa_answerability_classification": 58.0, "eval_f1_for_task1516_imppres_textual_entailment": 37.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 36.431, "eval_f1_for_task1554_scitail_textual_entailment": 54.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.4084, "eval_f1_for_task1562_zest_question_rewriting": 47.423, "eval_f1_for_task1586_scifact_title_generation": 37.5102, "eval_f1_for_task1598_nyc_data_to_text": 49.8296, "eval_f1_for_task1612_sick_textual_entailment": 46.0, "eval_f1_for_task1615_sick_textual_entailment": 51.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.0209, "eval_f1_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_f1_for_task1631_open_pi_data_to_text": 95.0761, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_f1_for_task1659_billsum_title_generation": 37.6864, "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.0952, "eval_f1_for_task1728_web_nlg_data_to_text": 64.3073, "eval_f1_for_task190_snli_textual_entailment": 50.0, "eval_f1_for_task199_multinli_textual_entailment": 39.0, "eval_f1_for_task200_multinli_textual_entailment": 88.0, "eval_f1_for_task201_multinli_textual_entailment": 10.0, "eval_f1_for_task202_multinli_textual_entailment": 20.0, "eval_f1_for_task219_rocstories_title_generation": 16.6469, "eval_f1_for_task220_rocstories_title_generation": 98.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_f1_for_task232_iirc_answerability_classification": 38.0, "eval_f1_for_task233_iirc_answerability_classification": 34.0, "eval_f1_for_task242_tweetqa_answerability_classification": 78.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 75.5548, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.494, "eval_f1_for_task288_gigaword_title_generation": 29.7273, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 25.5, "eval_f1_for_task329_gap_coreference_resolution": 60.0, "eval_f1_for_task330_gap_coreference_resolution": 67.7238, "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 82.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.7144, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 33.1667, "eval_f1_for_task402_grailqa_question_rewriting": 79.422, "eval_f1_for_task418_persent_title_generation": 28.5722, "eval_f1_for_task442_com_qa_question_rewriting": 71.4856, "eval_f1_for_task500_scruples_title_generation": 20.4129, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.7471, "eval_f1_for_task520_aquamuse_answerability_classification": 54.0, "eval_f1_for_task569_recipe_nlg_title_generation": 43.4724, "eval_f1_for_task602_wikitext_title_generation": 14.5839, "eval_f1_for_task613_liar_keyword_tagging": 22.3333, "eval_f1_for_task614_glucose_cause_effect_classification": 46.4539, "eval_f1_for_task619_ohsumed_title_generation": 43.1989, "eval_f1_for_task620_ohsumed_keyword_tagging": 42.1333, "eval_f1_for_task623_ohsumed_keyword_tagging": 88.0, "eval_f1_for_task640_e_snli_textual_entailment": 44.0, "eval_f1_for_task641_e_snli_textual_entailment": 34.0, "eval_f1_for_task642_e_snli_textual_entailment": 32.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.5571, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 18.0, "eval_f1_for_task670_ambigqa_question_rewriting": 80.1878, "eval_f1_for_task671_ambigqa_question_rewriting": 63.6978, "eval_f1_for_task677_ollie_data_to_text": 30.9479, "eval_f1_for_task738_perspectrum_textual_entailment": 39.0, "eval_f1_for_task743_eurlex_title_generation": 41.0011, "eval_f1_for_task760_msr_sqa_data_to_text": 7.5382, "eval_f1_for_task769_qed_title_generation": 84.0147, "eval_f1_for_task827_copa_cause_effect_classification": 61.0, "eval_f1_for_task828_copa_cause_effect_classification": 74.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, "eval_f1_for_task890_gwsd_textual_entailment": 37.0, "eval_f1_for_task891_gap_coreference_resolution": 66.1333, "eval_f1_for_task892_gap_coreference_resolution": 53.0, "eval_f1_for_task893_gap_coreference_resolution": 57.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, "eval_f1_for_task957_e2e_data_to_text": 56.8947, "eval_f1_for_task970_sherliic_textual_entailment": 71.0, "eval_f1_for_textual_entailment": 45.2083, "eval_f1_for_title_generation": 38.1906, "eval_f1_for_word_analogy": 46.125, "eval_gen_len": 8.7106, "eval_global_step": 2500, "eval_loss": 1.142754077911377, "eval_rouge1": 53.7393, "eval_rouge1_for_answerability_classification": 58.1026, "eval_rouge1_for_cause_effect_classification": 66.4447, "eval_rouge1_for_coreference_resolution": 53.9058, "eval_rouge1_for_data_to_text": 55.6215, "eval_rouge1_for_dialogue_act_recognition": 55.8302, "eval_rouge1_for_grammar_error_correction": 65.1774, "eval_rouge1_for_keyword_tagging": 67.7794, "eval_rouge1_for_overlap_extraction": 34.9668, "eval_rouge1_for_question_rewriting": 70.6251, "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 86.7467, "eval_rouge1_for_task035_winogrande_question_rewriting": 87.6667, "eval_rouge1_for_task036_qasc_keyword_tagging": 71.8875, "eval_rouge1_for_task039_qasc_overlap_extraction": 33.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.9385, "eval_rouge1_for_task1152_bard_word_analogy": 31.0, "eval_rouge1_for_task1153_bard_word_analogy": 32.3333, "eval_rouge1_for_task1154_bard_word_analogy": 28.0, "eval_rouge1_for_task1155_bard_word_analogy": 90.0, "eval_rouge1_for_task1156_bard_word_analogy": 51.6667, "eval_rouge1_for_task1157_bard_word_analogy": 68.0, "eval_rouge1_for_task1158_bard_word_analogy": 32.0, "eval_rouge1_for_task1159_bard_word_analogy": 38.0, "eval_rouge1_for_task1161_coda_19_title_generation": 42.3939, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.3379, "eval_rouge1_for_task121_atomic_question_rewriting": 47.8858, "eval_rouge1_for_task133_winowhy_coreference_resolution": 49.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.5682, "eval_rouge1_for_task1344_rte_textual_entailment": 55.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.9941, "eval_rouge1_for_task1356_xlsum_title_generation": 27.6528, "eval_rouge1_for_task1358_xlsum_title_generation": 39.4469, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 80.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.6444, "eval_rouge1_for_task1407_dart_data_to_text": 33.7826, "eval_rouge1_for_task1409_dart_data_to_text": 48.8506, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.8362, "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 37.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 40.9953, "eval_rouge1_for_task1554_scitail_textual_entailment": 54.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.5186, "eval_rouge1_for_task1562_zest_question_rewriting": 51.0265, "eval_rouge1_for_task1586_scifact_title_generation": 41.6241, "eval_rouge1_for_task1598_nyc_data_to_text": 52.3021, "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.5026, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 95.1728, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.8907, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.0952, "eval_rouge1_for_task1728_web_nlg_data_to_text": 65.922, "eval_rouge1_for_task190_snli_textual_entailment": 50.0, "eval_rouge1_for_task199_multinli_textual_entailment": 39.0, "eval_rouge1_for_task200_multinli_textual_entailment": 88.0, "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, "eval_rouge1_for_task202_multinli_textual_entailment": 20.0, "eval_rouge1_for_task219_rocstories_title_generation": 21.1636, "eval_rouge1_for_task220_rocstories_title_generation": 98.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rouge1_for_task232_iirc_answerability_classification": 38.0, "eval_rouge1_for_task233_iirc_answerability_classification": 34.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 78.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 75.9, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 36.6003, "eval_rouge1_for_task288_gigaword_title_generation": 32.2571, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 25.3333, "eval_rouge1_for_task329_gap_coreference_resolution": 60.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.4857, "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 82.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.0059, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 81.7252, "eval_rouge1_for_task418_persent_title_generation": 32.6972, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6994, "eval_rouge1_for_task500_scruples_title_generation": 22.9424, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.2483, "eval_rouge1_for_task520_aquamuse_answerability_classification": 54.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.6358, "eval_rouge1_for_task602_wikitext_title_generation": 15.2526, "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 53.1067, "eval_rouge1_for_task619_ohsumed_title_generation": 45.9216, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.119, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 88.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 44.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 32.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.0571, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 18.0, "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.4222, "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.8694, "eval_rouge1_for_task677_ollie_data_to_text": 33.5564, "eval_rouge1_for_task738_perspectrum_textual_entailment": 78.0, "eval_rouge1_for_task743_eurlex_title_generation": 42.9665, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.8298, "eval_rouge1_for_task769_qed_title_generation": 83.6953, "eval_rouge1_for_task827_copa_cause_effect_classification": 61.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 74.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, "eval_rouge1_for_task891_gap_coreference_resolution": 66.3667, "eval_rouge1_for_task892_gap_coreference_resolution": 53.0, "eval_rouge1_for_task893_gap_coreference_resolution": 57.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.3333, "eval_rouge1_for_task957_e2e_data_to_text": 58.8729, "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, "eval_rouge1_for_textual_entailment": 48.4167, "eval_rouge1_for_title_generation": 40.6901, "eval_rouge1_for_word_analogy": 46.375, "eval_rougeL": 52.3248, "eval_rougeL_for_answerability_classification": 58.1026, "eval_rougeL_for_cause_effect_classification": 65.9609, "eval_rougeL_for_coreference_resolution": 53.9058, "eval_rougeL_for_data_to_text": 47.6652, "eval_rougeL_for_dialogue_act_recognition": 55.8302, "eval_rougeL_for_grammar_error_correction": 64.239, "eval_rougeL_for_keyword_tagging": 67.254, "eval_rougeL_for_overlap_extraction": 34.761, "eval_rougeL_for_question_rewriting": 67.1198, "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 85.5371, "eval_rougeL_for_task035_winogrande_question_rewriting": 86.3988, "eval_rougeL_for_task036_qasc_keyword_tagging": 71.1605, "eval_rougeL_for_task039_qasc_overlap_extraction": 33.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, "eval_rougeL_for_task102_commongen_data_to_text": 57.699, "eval_rougeL_for_task1152_bard_word_analogy": 31.0, "eval_rougeL_for_task1153_bard_word_analogy": 32.3333, "eval_rougeL_for_task1154_bard_word_analogy": 28.0, "eval_rougeL_for_task1155_bard_word_analogy": 90.0, "eval_rougeL_for_task1156_bard_word_analogy": 51.6667, "eval_rougeL_for_task1157_bard_word_analogy": 68.0, "eval_rougeL_for_task1158_bard_word_analogy": 32.0, "eval_rougeL_for_task1159_bard_word_analogy": 38.0, "eval_rougeL_for_task1161_coda_19_title_generation": 36.191, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.4627, "eval_rougeL_for_task121_atomic_question_rewriting": 42.349, "eval_rougeL_for_task133_winowhy_coreference_resolution": 49.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.8198, "eval_rougeL_for_task1344_rte_textual_entailment": 55.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.3441, "eval_rougeL_for_task1356_xlsum_title_generation": 22.7932, "eval_rougeL_for_task1358_xlsum_title_generation": 34.3862, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 57.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 80.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.6444, "eval_rougeL_for_task1407_dart_data_to_text": 29.3917, "eval_rougeL_for_task1409_dart_data_to_text": 41.8091, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.8657, "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 37.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.1265, "eval_rougeL_for_task1554_scitail_textual_entailment": 54.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.6124, "eval_rougeL_for_task1562_zest_question_rewriting": 45.4407, "eval_rougeL_for_task1586_scifact_title_generation": 34.6299, "eval_rougeL_for_task1598_nyc_data_to_text": 39.522, "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.649, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 66.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 95.0928, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_rougeL_for_task1659_billsum_title_generation": 33.5288, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.0952, "eval_rougeL_for_task1728_web_nlg_data_to_text": 56.7906, "eval_rougeL_for_task190_snli_textual_entailment": 50.0, "eval_rougeL_for_task199_multinli_textual_entailment": 39.0, "eval_rougeL_for_task200_multinli_textual_entailment": 88.0, "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, "eval_rougeL_for_task202_multinli_textual_entailment": 20.0, "eval_rougeL_for_task219_rocstories_title_generation": 21.1636, "eval_rougeL_for_task220_rocstories_title_generation": 98.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, "eval_rougeL_for_task232_iirc_answerability_classification": 38.0, "eval_rougeL_for_task233_iirc_answerability_classification": 34.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 78.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 75.9, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 36.1887, "eval_rougeL_for_task288_gigaword_title_generation": 29.1865, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 25.3333, "eval_rougeL_for_task329_gap_coreference_resolution": 60.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.4857, "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 82.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.6667, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9383, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 68.6112, "eval_rougeL_for_task418_persent_title_generation": 27.6132, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2991, "eval_rougeL_for_task500_scruples_title_generation": 21.873, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.8854, "eval_rougeL_for_task520_aquamuse_answerability_classification": 54.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.1317, "eval_rougeL_for_task602_wikitext_title_generation": 15.2526, "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 50.7883, "eval_rougeL_for_task619_ohsumed_title_generation": 38.7592, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.219, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 88.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 44.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 32.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.0571, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 18.0, "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.6442, "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.5814, "eval_rougeL_for_task677_ollie_data_to_text": 27.2494, "eval_rougeL_for_task738_perspectrum_textual_entailment": 78.0, "eval_rougeL_for_task743_eurlex_title_generation": 39.837, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.6408, "eval_rougeL_for_task769_qed_title_generation": 83.2953, "eval_rougeL_for_task827_copa_cause_effect_classification": 61.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 74.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 41.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, "eval_rougeL_for_task891_gap_coreference_resolution": 66.3667, "eval_rougeL_for_task892_gap_coreference_resolution": 53.0, "eval_rougeL_for_task893_gap_coreference_resolution": 57.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.3333, "eval_rougeL_for_task957_e2e_data_to_text": 44.4332, "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, "eval_rougeL_for_textual_entailment": 48.4167, "eval_rougeL_for_title_generation": 37.5579, "eval_rougeL_for_word_analogy": 46.375, "eval_runtime": 756.5153, "eval_samples_per_second": 15.743, "eval_steps_per_second": 0.985, "step": 2500 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 0.9425, "step": 3000 }, { "epoch": 0.66, "eval_exact_match": 33.4593, "eval_exact_match_for_answerability_classification": 58.7692, "eval_exact_match_for_cause_effect_classification": 46.8571, "eval_exact_match_for_coreference_resolution": 44.8571, "eval_exact_match_for_data_to_text": 5.8111, "eval_exact_match_for_dialogue_act_recognition": 49.0, "eval_exact_match_for_grammar_error_correction": 7.0, "eval_exact_match_for_keyword_tagging": 44.0, "eval_exact_match_for_overlap_extraction": 14.0, "eval_exact_match_for_question_rewriting": 4.1818, "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 52.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 10.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 6.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 34.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, "eval_exact_match_for_task050_multirc_answerability_classification": 78.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 33.0, "eval_exact_match_for_task1153_bard_word_analogy": 28.0, "eval_exact_match_for_task1154_bard_word_analogy": 23.0, "eval_exact_match_for_task1155_bard_word_analogy": 86.0, "eval_exact_match_for_task1156_bard_word_analogy": 51.0, "eval_exact_match_for_task1157_bard_word_analogy": 68.0, "eval_exact_match_for_task1158_bard_word_analogy": 41.0, "eval_exact_match_for_task1159_bard_word_analogy": 31.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 44.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 56.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, "eval_exact_match_for_task1386_anli_textual_entailment": 12.0, "eval_exact_match_for_task1387_anli_textual_entailment": 12.0, "eval_exact_match_for_task1388_cb_textual_entailment": 2.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 56.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 86.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 58.0, "eval_exact_match_for_task1407_dart_data_to_text": 1.0, "eval_exact_match_for_task1409_dart_data_to_text": 2.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 48.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 59.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 73.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 42.0, "eval_exact_match_for_task1615_sick_textual_entailment": 52.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 9.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 38.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 36.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 16.0, "eval_exact_match_for_task199_multinli_textual_entailment": 47.0, "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, "eval_exact_match_for_task201_multinli_textual_entailment": 9.0, "eval_exact_match_for_task202_multinli_textual_entailment": 68.0, "eval_exact_match_for_task219_rocstories_title_generation": 1.0, "eval_exact_match_for_task220_rocstories_title_generation": 99.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_exact_match_for_task232_iirc_answerability_classification": 52.0, "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 83.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 58.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 51.0, "eval_exact_match_for_task330_gap_coreference_resolution": 60.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 49.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 2.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 66.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 65.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 27.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 38.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 43.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 83.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 20.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 36.0, "eval_exact_match_for_task743_eurlex_title_generation": 3.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 62.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 75.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 68.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, "eval_exact_match_for_task891_gap_coreference_resolution": 52.0, "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, "eval_exact_match_for_task893_gap_coreference_resolution": 57.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 70.0, "eval_exact_match_for_textual_entailment": 42.5417, "eval_exact_match_for_title_generation": 10.3139, "eval_exact_match_for_word_analogy": 45.125, "eval_f1": 50.8785, "eval_f1_for_answerability_classification": 61.3333, "eval_f1_for_cause_effect_classification": 67.6117, "eval_f1_for_coreference_resolution": 50.4406, "eval_f1_for_data_to_text": 50.216, "eval_f1_for_dialogue_act_recognition": 52.2143, "eval_f1_for_grammar_error_correction": 56.2215, "eval_f1_for_keyword_tagging": 58.742, "eval_f1_for_overlap_extraction": 28.6979, "eval_f1_for_question_rewriting": 70.2636, "eval_f1_for_task020_mctaco_answerability_classification": 56.0, "eval_f1_for_task033_winogrande_coreference_resolution": 55.3333, "eval_f1_for_task034_winogrande_question_rewriting": 90.0753, "eval_f1_for_task035_winogrande_question_rewriting": 90.5739, "eval_f1_for_task036_qasc_keyword_tagging": 68.8957, "eval_f1_for_task039_qasc_overlap_extraction": 35.8333, "eval_f1_for_task050_multirc_answerability_classification": 78.0, "eval_f1_for_task102_commongen_data_to_text": 51.407, "eval_f1_for_task1152_bard_word_analogy": 33.0, "eval_f1_for_task1153_bard_word_analogy": 30.0, "eval_f1_for_task1154_bard_word_analogy": 23.0, "eval_f1_for_task1155_bard_word_analogy": 86.0, "eval_f1_for_task1156_bard_word_analogy": 53.0, "eval_f1_for_task1157_bard_word_analogy": 68.0, "eval_f1_for_task1158_bard_word_analogy": 41.0, "eval_f1_for_task1159_bard_word_analogy": 32.3333, "eval_f1_for_task1161_coda_19_title_generation": 37.3732, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.7671, "eval_f1_for_task121_atomic_question_rewriting": 49.6023, "eval_f1_for_task133_winowhy_coreference_resolution": 44.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.0269, "eval_f1_for_task1344_rte_textual_entailment": 56.0, "eval_f1_for_task1345_qqp_question_rewriting": 37.8882, "eval_f1_for_task1356_xlsum_title_generation": 24.0064, "eval_f1_for_task1358_xlsum_title_generation": 37.5489, "eval_f1_for_task1385_anli_textual_entailment": 0.0, "eval_f1_for_task1386_anli_textual_entailment": 12.0, "eval_f1_for_task1387_anli_textual_entailment": 12.0, "eval_f1_for_task1388_cb_textual_entailment": 2.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 56.0, "eval_f1_for_task1393_copa_cause_effect_classification": 86.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 58.0, "eval_f1_for_task1407_dart_data_to_text": 29.2217, "eval_f1_for_task1409_dart_data_to_text": 46.9676, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 28.9268, "eval_f1_for_task1439_doqa_answerability_classification": 48.0, "eval_f1_for_task1442_doqa_answerability_classification": 58.0, "eval_f1_for_task1516_imppres_textual_entailment": 48.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 59.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_f1_for_task1540_peer_read_title_generation": 38.7526, "eval_f1_for_task1554_scitail_textual_entailment": 73.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5161, "eval_f1_for_task1562_zest_question_rewriting": 52.7322, "eval_f1_for_task1586_scifact_title_generation": 37.5251, "eval_f1_for_task1598_nyc_data_to_text": 49.233, "eval_f1_for_task1612_sick_textual_entailment": 42.0, "eval_f1_for_task1615_sick_textual_entailment": 52.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.5989, "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_f1_for_task1631_open_pi_data_to_text": 86.612, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_f1_for_task1659_billsum_title_generation": 36.7646, "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.8952, "eval_f1_for_task1728_web_nlg_data_to_text": 61.0526, "eval_f1_for_task190_snli_textual_entailment": 16.0, "eval_f1_for_task199_multinli_textual_entailment": 47.0, "eval_f1_for_task200_multinli_textual_entailment": 92.0, "eval_f1_for_task201_multinli_textual_entailment": 9.0, "eval_f1_for_task202_multinli_textual_entailment": 68.0, "eval_f1_for_task219_rocstories_title_generation": 15.9651, "eval_f1_for_task220_rocstories_title_generation": 99.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_f1_for_task232_iirc_answerability_classification": 52.0, "eval_f1_for_task233_iirc_answerability_classification": 46.0, "eval_f1_for_task242_tweetqa_answerability_classification": 83.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 67.8833, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 21.5626, "eval_f1_for_task288_gigaword_title_generation": 30.1653, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 5.6667, "eval_f1_for_task329_gap_coreference_resolution": 51.0, "eval_f1_for_task330_gap_coreference_resolution": 67.8571, "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 83.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.0488, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 40.5, "eval_f1_for_task402_grailqa_question_rewriting": 79.4593, "eval_f1_for_task418_persent_title_generation": 28.9443, "eval_f1_for_task442_com_qa_question_rewriting": 71.0544, "eval_f1_for_task500_scruples_title_generation": 23.2158, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.6912, "eval_f1_for_task520_aquamuse_answerability_classification": 66.0, "eval_f1_for_task569_recipe_nlg_title_generation": 38.2997, "eval_f1_for_task602_wikitext_title_generation": 14.5955, "eval_f1_for_task613_liar_keyword_tagging": 24.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 48.8997, "eval_f1_for_task619_ohsumed_title_generation": 44.6661, "eval_f1_for_task620_ohsumed_keyword_tagging": 40.3714, "eval_f1_for_task623_ohsumed_keyword_tagging": 65.0, "eval_f1_for_task640_e_snli_textual_entailment": 27.0, "eval_f1_for_task641_e_snli_textual_entailment": 38.0, "eval_f1_for_task642_e_snli_textual_entailment": 43.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.7762, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 24.5, "eval_f1_for_task670_ambigqa_question_rewriting": 79.4057, "eval_f1_for_task671_ambigqa_question_rewriting": 59.7423, "eval_f1_for_task677_ollie_data_to_text": 29.4465, "eval_f1_for_task738_perspectrum_textual_entailment": 36.0, "eval_f1_for_task743_eurlex_title_generation": 38.7688, "eval_f1_for_task760_msr_sqa_data_to_text": 7.4603, "eval_f1_for_task769_qed_title_generation": 84.8026, "eval_f1_for_task827_copa_cause_effect_classification": 75.0, "eval_f1_for_task828_copa_cause_effect_classification": 68.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, "eval_f1_for_task890_gwsd_textual_entailment": 35.0, "eval_f1_for_task891_gap_coreference_resolution": 61.5333, "eval_f1_for_task892_gap_coreference_resolution": 51.0, "eval_f1_for_task893_gap_coreference_resolution": 57.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_f1_for_task957_e2e_data_to_text": 58.9039, "eval_f1_for_task970_sherliic_textual_entailment": 70.0, "eval_f1_for_textual_entailment": 42.5417, "eval_f1_for_title_generation": 38.048, "eval_f1_for_word_analogy": 45.7917, "eval_gen_len": 8.9207, "eval_global_step": 3000, "eval_loss": 1.1522873640060425, "eval_rouge1": 53.9387, "eval_rouge1_for_answerability_classification": 61.3333, "eval_rouge1_for_cause_effect_classification": 68.4727, "eval_rouge1_for_coreference_resolution": 50.9284, "eval_rouge1_for_data_to_text": 53.3645, "eval_rouge1_for_dialogue_act_recognition": 55.8531, "eval_rouge1_for_grammar_error_correction": 61.428, "eval_rouge1_for_keyword_tagging": 63.5254, "eval_rouge1_for_overlap_extraction": 32.2686, "eval_rouge1_for_question_rewriting": 71.9542, "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 55.3333, "eval_rouge1_for_task034_winogrande_question_rewriting": 90.0859, "eval_rouge1_for_task035_winogrande_question_rewriting": 91.0772, "eval_rouge1_for_task036_qasc_keyword_tagging": 73.5959, "eval_rouge1_for_task039_qasc_overlap_extraction": 41.6667, "eval_rouge1_for_task050_multirc_answerability_classification": 78.0, "eval_rouge1_for_task102_commongen_data_to_text": 67.0767, "eval_rouge1_for_task1152_bard_word_analogy": 33.0, "eval_rouge1_for_task1153_bard_word_analogy": 32.0, "eval_rouge1_for_task1154_bard_word_analogy": 23.0, "eval_rouge1_for_task1155_bard_word_analogy": 86.0, "eval_rouge1_for_task1156_bard_word_analogy": 53.0, "eval_rouge1_for_task1157_bard_word_analogy": 68.0, "eval_rouge1_for_task1158_bard_word_analogy": 41.0, "eval_rouge1_for_task1159_bard_word_analogy": 32.3333, "eval_rouge1_for_task1161_coda_19_title_generation": 41.5097, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.9802, "eval_rouge1_for_task121_atomic_question_rewriting": 52.4977, "eval_rouge1_for_task133_winowhy_coreference_resolution": 44.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.9835, "eval_rouge1_for_task1344_rte_textual_entailment": 56.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.0003, "eval_rouge1_for_task1356_xlsum_title_generation": 28.069, "eval_rouge1_for_task1358_xlsum_title_generation": 41.7859, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, "eval_rouge1_for_task1388_cb_textual_entailment": 39.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 56.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 86.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 62.4714, "eval_rouge1_for_task1407_dart_data_to_text": 30.4581, "eval_rouge1_for_task1409_dart_data_to_text": 47.7981, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.23, "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 48.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 59.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rouge1_for_task1540_peer_read_title_generation": 41.6947, "eval_rouge1_for_task1554_scitail_textual_entailment": 73.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.626, "eval_rouge1_for_task1562_zest_question_rewriting": 55.9863, "eval_rouge1_for_task1586_scifact_title_generation": 41.2764, "eval_rouge1_for_task1598_nyc_data_to_text": 51.2139, "eval_rouge1_for_task1612_sick_textual_entailment": 42.0, "eval_rouge1_for_task1615_sick_textual_entailment": 84.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.8626, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 86.8368, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.3236, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.8952, "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.3975, "eval_rouge1_for_task190_snli_textual_entailment": 16.0, "eval_rouge1_for_task199_multinli_textual_entailment": 47.0, "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, "eval_rouge1_for_task201_multinli_textual_entailment": 9.0, "eval_rouge1_for_task202_multinli_textual_entailment": 68.0, "eval_rouge1_for_task219_rocstories_title_generation": 20.4033, "eval_rouge1_for_task220_rocstories_title_generation": 99.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rouge1_for_task232_iirc_answerability_classification": 52.0, "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 83.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 68.55, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 22.8705, "eval_rouge1_for_task288_gigaword_title_generation": 32.931, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 5.6667, "eval_rouge1_for_task329_gap_coreference_resolution": 51.0, "eval_rouge1_for_task330_gap_coreference_resolution": 67.619, "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.2417, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 82.1525, "eval_rouge1_for_task418_persent_title_generation": 31.9181, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.2205, "eval_rouge1_for_task500_scruples_title_generation": 24.675, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.0804, "eval_rouge1_for_task520_aquamuse_answerability_classification": 66.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 39.9996, "eval_rouge1_for_task602_wikitext_title_generation": 15.1281, "eval_rouge1_for_task613_liar_keyword_tagging": 36.5, "eval_rouge1_for_task614_glucose_cause_effect_classification": 54.7341, "eval_rouge1_for_task619_ohsumed_title_generation": 48.4103, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.2548, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 65.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 27.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 38.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 43.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.2762, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 24.9, "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.5574, "eval_rouge1_for_task671_ambigqa_question_rewriting": 61.0752, "eval_rouge1_for_task677_ollie_data_to_text": 32.2841, "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.7058, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.6136, "eval_rouge1_for_task769_qed_title_generation": 84.3701, "eval_rouge1_for_task827_copa_cause_effect_classification": 75.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 68.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, "eval_rouge1_for_task891_gap_coreference_resolution": 61.8667, "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, "eval_rouge1_for_task893_gap_coreference_resolution": 57.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rouge1_for_task957_e2e_data_to_text": 60.7457, "eval_rouge1_for_task970_sherliic_textual_entailment": 70.0, "eval_rouge1_for_textual_entailment": 50.75, "eval_rouge1_for_title_generation": 40.35, "eval_rouge1_for_word_analogy": 46.0417, "eval_rougeL": 52.4679, "eval_rougeL_for_answerability_classification": 61.3333, "eval_rougeL_for_cause_effect_classification": 67.6453, "eval_rougeL_for_coreference_resolution": 50.9284, "eval_rougeL_for_data_to_text": 45.6479, "eval_rougeL_for_dialogue_act_recognition": 55.8531, "eval_rougeL_for_grammar_error_correction": 60.2718, "eval_rougeL_for_keyword_tagging": 62.9174, "eval_rougeL_for_overlap_extraction": 32.0406, "eval_rougeL_for_question_rewriting": 68.1186, "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 55.3333, "eval_rougeL_for_task034_winogrande_question_rewriting": 89.3778, "eval_rougeL_for_task035_winogrande_question_rewriting": 91.0147, "eval_rougeL_for_task036_qasc_keyword_tagging": 71.7229, "eval_rougeL_for_task039_qasc_overlap_extraction": 41.6667, "eval_rougeL_for_task050_multirc_answerability_classification": 78.0, "eval_rougeL_for_task102_commongen_data_to_text": 55.7992, "eval_rougeL_for_task1152_bard_word_analogy": 33.0, "eval_rougeL_for_task1153_bard_word_analogy": 32.0, "eval_rougeL_for_task1154_bard_word_analogy": 23.0, "eval_rougeL_for_task1155_bard_word_analogy": 86.0, "eval_rougeL_for_task1156_bard_word_analogy": 53.0, "eval_rougeL_for_task1157_bard_word_analogy": 68.0, "eval_rougeL_for_task1158_bard_word_analogy": 41.0, "eval_rougeL_for_task1159_bard_word_analogy": 32.3333, "eval_rougeL_for_task1161_coda_19_title_generation": 34.962, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 80.5772, "eval_rougeL_for_task121_atomic_question_rewriting": 46.9486, "eval_rougeL_for_task133_winowhy_coreference_resolution": 44.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7075, "eval_rougeL_for_task1344_rte_textual_entailment": 56.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.4161, "eval_rougeL_for_task1356_xlsum_title_generation": 24.0208, "eval_rougeL_for_task1358_xlsum_title_generation": 35.9685, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, "eval_rougeL_for_task1388_cb_textual_entailment": 39.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 56.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 86.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 62.4714, "eval_rougeL_for_task1407_dart_data_to_text": 26.5895, "eval_rougeL_for_task1409_dart_data_to_text": 40.5012, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.7825, "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 48.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 59.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 46.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.8732, "eval_rougeL_for_task1554_scitail_textual_entailment": 73.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7611, "eval_rougeL_for_task1562_zest_question_rewriting": 49.2234, "eval_rougeL_for_task1586_scifact_title_generation": 35.7865, "eval_rougeL_for_task1598_nyc_data_to_text": 41.237, "eval_rougeL_for_task1612_sick_textual_entailment": 42.0, "eval_rougeL_for_task1615_sick_textual_entailment": 84.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.857, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 83.5432, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 64.0, "eval_rougeL_for_task1659_billsum_title_generation": 32.8447, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.8952, "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.9073, "eval_rougeL_for_task190_snli_textual_entailment": 16.0, "eval_rougeL_for_task199_multinli_textual_entailment": 47.0, "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, "eval_rougeL_for_task201_multinli_textual_entailment": 9.0, "eval_rougeL_for_task202_multinli_textual_entailment": 68.0, "eval_rougeL_for_task219_rocstories_title_generation": 20.4033, "eval_rougeL_for_task220_rocstories_title_generation": 99.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, "eval_rougeL_for_task232_iirc_answerability_classification": 52.0, "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 83.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 68.55, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 22.4146, "eval_rougeL_for_task288_gigaword_title_generation": 28.5342, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 5.6667, "eval_rougeL_for_task329_gap_coreference_resolution": 51.0, "eval_rougeL_for_task330_gap_coreference_resolution": 67.619, "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.4301, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.5917, "eval_rougeL_for_task418_persent_title_generation": 27.3998, "eval_rougeL_for_task442_com_qa_question_rewriting": 69.0225, "eval_rougeL_for_task500_scruples_title_generation": 23.5748, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.6257, "eval_rougeL_for_task520_aquamuse_answerability_classification": 66.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.3337, "eval_rougeL_for_task602_wikitext_title_generation": 15.1281, "eval_rougeL_for_task613_liar_keyword_tagging": 36.5, "eval_rougeL_for_task614_glucose_cause_effect_classification": 49.7536, "eval_rougeL_for_task619_ohsumed_title_generation": 40.3073, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.0881, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 65.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 27.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 38.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 43.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.2762, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 24.9, "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.7673, "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.5078, "eval_rougeL_for_task677_ollie_data_to_text": 26.2226, "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, "eval_rougeL_for_task743_eurlex_title_generation": 35.7905, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.6347, "eval_rougeL_for_task769_qed_title_generation": 84.3701, "eval_rougeL_for_task827_copa_cause_effect_classification": 75.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 68.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, "eval_rougeL_for_task891_gap_coreference_resolution": 61.8667, "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, "eval_rougeL_for_task893_gap_coreference_resolution": 57.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 69.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, "eval_rougeL_for_task957_e2e_data_to_text": 46.5268, "eval_rougeL_for_task970_sherliic_textual_entailment": 70.0, "eval_rougeL_for_textual_entailment": 50.75, "eval_rougeL_for_title_generation": 37.1194, "eval_rougeL_for_word_analogy": 46.0417, "eval_runtime": 817.9683, "eval_samples_per_second": 14.56, "eval_steps_per_second": 0.911, "step": 3000 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 0.9165, "step": 3500 }, { "epoch": 0.76, "eval_exact_match": 33.3417, "eval_exact_match_for_answerability_classification": 53.6923, "eval_exact_match_for_cause_effect_classification": 50.8571, "eval_exact_match_for_coreference_resolution": 43.0714, "eval_exact_match_for_data_to_text": 6.1743, "eval_exact_match_for_dialogue_act_recognition": 56.2857, "eval_exact_match_for_grammar_error_correction": 6.0, "eval_exact_match_for_keyword_tagging": 44.2, "eval_exact_match_for_overlap_extraction": 11.5, "eval_exact_match_for_question_rewriting": 5.3636, "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 55.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 12.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, "eval_exact_match_for_task050_multirc_answerability_classification": 74.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 35.0, "eval_exact_match_for_task1153_bard_word_analogy": 38.0, "eval_exact_match_for_task1154_bard_word_analogy": 18.0, "eval_exact_match_for_task1155_bard_word_analogy": 70.0, "eval_exact_match_for_task1156_bard_word_analogy": 44.0, "eval_exact_match_for_task1157_bard_word_analogy": 64.0, "eval_exact_match_for_task1158_bard_word_analogy": 41.0, "eval_exact_match_for_task1159_bard_word_analogy": 35.0, "eval_exact_match_for_task1161_coda_19_title_generation": 2.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 15.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 36.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 11.0, "eval_exact_match_for_task1386_anli_textual_entailment": 20.0, "eval_exact_match_for_task1387_anli_textual_entailment": 8.0, "eval_exact_match_for_task1388_cb_textual_entailment": 23.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 61.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 83.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 68.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 35.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 53.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 12.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 47.0, "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 41.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_exact_match_for_task1659_billsum_title_generation": 3.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 34.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 38.0, "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, "eval_exact_match_for_task200_multinli_textual_entailment": 88.0, "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, "eval_exact_match_for_task202_multinli_textual_entailment": 53.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 99.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, "eval_exact_match_for_task232_iirc_answerability_classification": 16.0, "eval_exact_match_for_task233_iirc_answerability_classification": 9.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 90.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 52.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 6.0, "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 72.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 69.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 78.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 81.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 34.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 57.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 37.0, "eval_exact_match_for_task743_eurlex_title_generation": 3.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 66.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 84.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 58.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, "eval_exact_match_for_task891_gap_coreference_resolution": 58.0, "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, "eval_exact_match_for_textual_entailment": 42.25, "eval_exact_match_for_title_generation": 10.9305, "eval_exact_match_for_word_analogy": 43.125, "eval_f1": 50.2713, "eval_f1_for_answerability_classification": 56.2564, "eval_f1_for_cause_effect_classification": 66.6183, "eval_f1_for_coreference_resolution": 48.7451, "eval_f1_for_data_to_text": 49.7935, "eval_f1_for_dialogue_act_recognition": 58.2857, "eval_f1_for_grammar_error_correction": 58.8077, "eval_f1_for_keyword_tagging": 59.5765, "eval_f1_for_overlap_extraction": 24.9249, "eval_f1_for_question_rewriting": 70.8573, "eval_f1_for_task020_mctaco_answerability_classification": 52.0, "eval_f1_for_task033_winogrande_coreference_resolution": 59.6667, "eval_f1_for_task034_winogrande_question_rewriting": 92.2461, "eval_f1_for_task035_winogrande_question_rewriting": 90.2607, "eval_f1_for_task036_qasc_keyword_tagging": 63.5394, "eval_f1_for_task039_qasc_overlap_extraction": 33.1333, "eval_f1_for_task050_multirc_answerability_classification": 74.0, "eval_f1_for_task102_commongen_data_to_text": 54.1071, "eval_f1_for_task1152_bard_word_analogy": 35.0, "eval_f1_for_task1153_bard_word_analogy": 38.0, "eval_f1_for_task1154_bard_word_analogy": 18.0, "eval_f1_for_task1155_bard_word_analogy": 70.0, "eval_f1_for_task1156_bard_word_analogy": 46.0, "eval_f1_for_task1157_bard_word_analogy": 64.0, "eval_f1_for_task1158_bard_word_analogy": 41.0, "eval_f1_for_task1159_bard_word_analogy": 35.6667, "eval_f1_for_task1161_coda_19_title_generation": 38.0801, "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.9294, "eval_f1_for_task121_atomic_question_rewriting": 48.2806, "eval_f1_for_task133_winowhy_coreference_resolution": 36.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.7652, "eval_f1_for_task1344_rte_textual_entailment": 50.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.6614, "eval_f1_for_task1356_xlsum_title_generation": 21.4939, "eval_f1_for_task1358_xlsum_title_generation": 34.4378, "eval_f1_for_task1385_anli_textual_entailment": 11.0, "eval_f1_for_task1386_anli_textual_entailment": 20.0, "eval_f1_for_task1387_anli_textual_entailment": 8.0, "eval_f1_for_task1388_cb_textual_entailment": 23.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 61.0, "eval_f1_for_task1393_copa_cause_effect_classification": 83.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 68.0, "eval_f1_for_task1407_dart_data_to_text": 25.236, "eval_f1_for_task1409_dart_data_to_text": 50.1476, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.0208, "eval_f1_for_task1439_doqa_answerability_classification": 48.0, "eval_f1_for_task1442_doqa_answerability_classification": 53.0, "eval_f1_for_task1516_imppres_textual_entailment": 35.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_f1_for_task1540_peer_read_title_generation": 38.6955, "eval_f1_for_task1554_scitail_textual_entailment": 53.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.5945, "eval_f1_for_task1562_zest_question_rewriting": 48.2683, "eval_f1_for_task1586_scifact_title_generation": 37.2157, "eval_f1_for_task1598_nyc_data_to_text": 52.44, "eval_f1_for_task1612_sick_textual_entailment": 47.0, "eval_f1_for_task1615_sick_textual_entailment": 51.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 82.023, "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_f1_for_task1631_open_pi_data_to_text": 86.2193, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_f1_for_task1659_billsum_title_generation": 36.9436, "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.3524, "eval_f1_for_task1728_web_nlg_data_to_text": 58.9515, "eval_f1_for_task190_snli_textual_entailment": 38.0, "eval_f1_for_task199_multinli_textual_entailment": 50.0, "eval_f1_for_task200_multinli_textual_entailment": 88.0, "eval_f1_for_task201_multinli_textual_entailment": 12.0, "eval_f1_for_task202_multinli_textual_entailment": 53.0, "eval_f1_for_task219_rocstories_title_generation": 19.7143, "eval_f1_for_task220_rocstories_title_generation": 99.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, "eval_f1_for_task232_iirc_answerability_classification": 16.0, "eval_f1_for_task233_iirc_answerability_classification": 9.0, "eval_f1_for_task242_tweetqa_answerability_classification": 90.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.2881, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 16.7164, "eval_f1_for_task288_gigaword_title_generation": 30.6161, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 11.3333, "eval_f1_for_task329_gap_coreference_resolution": 55.0, "eval_f1_for_task330_gap_coreference_resolution": 72.2571, "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 89.6667, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.8622, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 36.5, "eval_f1_for_task402_grailqa_question_rewriting": 77.798, "eval_f1_for_task418_persent_title_generation": 29.8855, "eval_f1_for_task442_com_qa_question_rewriting": 71.5636, "eval_f1_for_task500_scruples_title_generation": 19.1438, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.1779, "eval_f1_for_task520_aquamuse_answerability_classification": 78.0, "eval_f1_for_task569_recipe_nlg_title_generation": 40.1691, "eval_f1_for_task602_wikitext_title_generation": 13.5455, "eval_f1_for_task613_liar_keyword_tagging": 20.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 34.4659, "eval_f1_for_task619_ohsumed_title_generation": 44.8529, "eval_f1_for_task620_ohsumed_keyword_tagging": 37.6667, "eval_f1_for_task623_ohsumed_keyword_tagging": 81.0, "eval_f1_for_task640_e_snli_textual_entailment": 34.0, "eval_f1_for_task641_e_snli_textual_entailment": 57.0, "eval_f1_for_task642_e_snli_textual_entailment": 45.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.0095, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 5.6667, "eval_f1_for_task670_ambigqa_question_rewriting": 78.1891, "eval_f1_for_task671_ambigqa_question_rewriting": 68.2099, "eval_f1_for_task677_ollie_data_to_text": 24.4884, "eval_f1_for_task738_perspectrum_textual_entailment": 37.0, "eval_f1_for_task743_eurlex_title_generation": 39.4145, "eval_f1_for_task760_msr_sqa_data_to_text": 6.7518, "eval_f1_for_task769_qed_title_generation": 84.496, "eval_f1_for_task827_copa_cause_effect_classification": 84.0, "eval_f1_for_task828_copa_cause_effect_classification": 58.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, "eval_f1_for_task890_gwsd_textual_entailment": 34.0, "eval_f1_for_task891_gap_coreference_resolution": 67.3667, "eval_f1_for_task892_gap_coreference_resolution": 49.0, "eval_f1_for_task893_gap_coreference_resolution": 44.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, "eval_f1_for_task957_e2e_data_to_text": 57.9487, "eval_f1_for_task970_sherliic_textual_entailment": 51.0, "eval_f1_for_textual_entailment": 42.25, "eval_f1_for_title_generation": 38.1996, "eval_f1_for_word_analogy": 43.4583, "eval_gen_len": 8.5626, "eval_global_step": 3500, "eval_loss": 1.1878433227539062, "eval_rouge1": 52.9417, "eval_rouge1_for_answerability_classification": 56.2564, "eval_rouge1_for_cause_effect_classification": 67.1225, "eval_rouge1_for_coreference_resolution": 49.2337, "eval_rouge1_for_data_to_text": 52.8806, "eval_rouge1_for_dialogue_act_recognition": 61.9952, "eval_rouge1_for_grammar_error_correction": 63.518, "eval_rouge1_for_keyword_tagging": 64.3309, "eval_rouge1_for_overlap_extraction": 27.5853, "eval_rouge1_for_question_rewriting": 72.6076, "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 59.6667, "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2944, "eval_rouge1_for_task035_winogrande_question_rewriting": 90.8616, "eval_rouge1_for_task036_qasc_keyword_tagging": 66.759, "eval_rouge1_for_task039_qasc_overlap_extraction": 37.9667, "eval_rouge1_for_task050_multirc_answerability_classification": 74.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.5781, "eval_rouge1_for_task1152_bard_word_analogy": 35.0, "eval_rouge1_for_task1153_bard_word_analogy": 39.0, "eval_rouge1_for_task1154_bard_word_analogy": 18.0, "eval_rouge1_for_task1155_bard_word_analogy": 70.0, "eval_rouge1_for_task1156_bard_word_analogy": 46.0, "eval_rouge1_for_task1157_bard_word_analogy": 64.0, "eval_rouge1_for_task1158_bard_word_analogy": 41.0, "eval_rouge1_for_task1159_bard_word_analogy": 35.6667, "eval_rouge1_for_task1161_coda_19_title_generation": 41.7523, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.0648, "eval_rouge1_for_task121_atomic_question_rewriting": 51.2163, "eval_rouge1_for_task133_winowhy_coreference_resolution": 36.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.6427, "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.9037, "eval_rouge1_for_task1356_xlsum_title_generation": 25.6949, "eval_rouge1_for_task1358_xlsum_title_generation": 39.019, "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 61.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 83.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 71.3, "eval_rouge1_for_task1407_dart_data_to_text": 27.296, "eval_rouge1_for_task1409_dart_data_to_text": 50.6694, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.4076, "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 35.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rouge1_for_task1540_peer_read_title_generation": 41.7929, "eval_rouge1_for_task1554_scitail_textual_entailment": 53.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6285, "eval_rouge1_for_task1562_zest_question_rewriting": 52.3911, "eval_rouge1_for_task1586_scifact_title_generation": 40.8638, "eval_rouge1_for_task1598_nyc_data_to_text": 55.0112, "eval_rouge1_for_task1612_sick_textual_entailment": 47.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.3027, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 86.3224, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_rouge1_for_task1659_billsum_title_generation": 38.5605, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.3524, "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.3824, "eval_rouge1_for_task190_snli_textual_entailment": 38.0, "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, "eval_rouge1_for_task200_multinli_textual_entailment": 88.0, "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, "eval_rouge1_for_task202_multinli_textual_entailment": 53.0, "eval_rouge1_for_task219_rocstories_title_generation": 22.5025, "eval_rouge1_for_task220_rocstories_title_generation": 99.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, "eval_rouge1_for_task232_iirc_answerability_classification": 16.0, "eval_rouge1_for_task233_iirc_answerability_classification": 9.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 90.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.1333, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.2039, "eval_rouge1_for_task288_gigaword_title_generation": 33.9259, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 11.3333, "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, "eval_rouge1_for_task330_gap_coreference_resolution": 72.019, "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 89.6667, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.879, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.5, "eval_rouge1_for_task402_grailqa_question_rewriting": 80.0825, "eval_rouge1_for_task418_persent_title_generation": 33.0012, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.6555, "eval_rouge1_for_task500_scruples_title_generation": 20.3932, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.9846, "eval_rouge1_for_task520_aquamuse_answerability_classification": 78.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.9743, "eval_rouge1_for_task602_wikitext_title_generation": 14.3787, "eval_rouge1_for_task613_liar_keyword_tagging": 35.6333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 37.9788, "eval_rouge1_for_task619_ohsumed_title_generation": 49.0823, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 42.7524, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 81.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 34.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 57.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.5095, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 5.6667, "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.5662, "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.3452, "eval_rouge1_for_task677_ollie_data_to_text": 26.7644, "eval_rouge1_for_task738_perspectrum_textual_entailment": 80.0, "eval_rouge1_for_task743_eurlex_title_generation": 40.9408, "eval_rouge1_for_task760_msr_sqa_data_to_text": 6.7006, "eval_rouge1_for_task769_qed_title_generation": 84.0433, "eval_rouge1_for_task827_copa_cause_effect_classification": 84.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 58.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, "eval_rouge1_for_task891_gap_coreference_resolution": 67.6, "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rouge1_for_task957_e2e_data_to_text": 60.0273, "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, "eval_rouge1_for_textual_entailment": 48.7778, "eval_rouge1_for_title_generation": 40.485, "eval_rouge1_for_word_analogy": 43.5833, "eval_rougeL": 51.4795, "eval_rougeL_for_answerability_classification": 56.2564, "eval_rougeL_for_cause_effect_classification": 66.6269, "eval_rougeL_for_coreference_resolution": 49.2337, "eval_rougeL_for_data_to_text": 44.7792, "eval_rougeL_for_dialogue_act_recognition": 61.9952, "eval_rougeL_for_grammar_error_correction": 62.8819, "eval_rougeL_for_keyword_tagging": 63.7003, "eval_rougeL_for_overlap_extraction": 27.4863, "eval_rougeL_for_question_rewriting": 68.7929, "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 59.6667, "eval_rougeL_for_task034_winogrande_question_rewriting": 90.0831, "eval_rougeL_for_task035_winogrande_question_rewriting": 90.2973, "eval_rougeL_for_task036_qasc_keyword_tagging": 64.9397, "eval_rougeL_for_task039_qasc_overlap_extraction": 37.9667, "eval_rougeL_for_task050_multirc_answerability_classification": 74.0, "eval_rougeL_for_task102_commongen_data_to_text": 57.7558, "eval_rougeL_for_task1152_bard_word_analogy": 35.0, "eval_rougeL_for_task1153_bard_word_analogy": 39.0, "eval_rougeL_for_task1154_bard_word_analogy": 18.0, "eval_rougeL_for_task1155_bard_word_analogy": 70.0, "eval_rougeL_for_task1156_bard_word_analogy": 46.0, "eval_rougeL_for_task1157_bard_word_analogy": 64.0, "eval_rougeL_for_task1158_bard_word_analogy": 41.0, "eval_rougeL_for_task1159_bard_word_analogy": 35.6667, "eval_rougeL_for_task1161_coda_19_title_generation": 35.4842, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 82.6618, "eval_rougeL_for_task121_atomic_question_rewriting": 46.0751, "eval_rougeL_for_task133_winowhy_coreference_resolution": 36.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.5736, "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5908, "eval_rougeL_for_task1356_xlsum_title_generation": 21.8324, "eval_rougeL_for_task1358_xlsum_title_generation": 32.8299, "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 61.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 83.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 71.3, "eval_rougeL_for_task1407_dart_data_to_text": 21.0998, "eval_rougeL_for_task1409_dart_data_to_text": 42.8631, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.0711, "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 35.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 54.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 55.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, "eval_rougeL_for_task1540_peer_read_title_generation": 38.0709, "eval_rougeL_for_task1554_scitail_textual_entailment": 53.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6928, "eval_rougeL_for_task1562_zest_question_rewriting": 45.891, "eval_rougeL_for_task1586_scifact_title_generation": 34.61, "eval_rougeL_for_task1598_nyc_data_to_text": 42.8825, "eval_rougeL_for_task1612_sick_textual_entailment": 47.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 81.0317, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 82.7388, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 67.0, "eval_rougeL_for_task1659_billsum_title_generation": 32.7661, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.3524, "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.8762, "eval_rougeL_for_task190_snli_textual_entailment": 38.0, "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, "eval_rougeL_for_task200_multinli_textual_entailment": 88.0, "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, "eval_rougeL_for_task202_multinli_textual_entailment": 53.0, "eval_rougeL_for_task219_rocstories_title_generation": 22.1692, "eval_rougeL_for_task220_rocstories_title_generation": 99.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, "eval_rougeL_for_task232_iirc_answerability_classification": 16.0, "eval_rougeL_for_task233_iirc_answerability_classification": 9.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 90.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.1333, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.006, "eval_rougeL_for_task288_gigaword_title_generation": 29.5043, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 11.3333, "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, "eval_rougeL_for_task330_gap_coreference_resolution": 72.019, "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 89.6667, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.7922, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.5, "eval_rougeL_for_task402_grailqa_question_rewriting": 64.5184, "eval_rougeL_for_task418_persent_title_generation": 28.5932, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.8621, "eval_rougeL_for_task500_scruples_title_generation": 19.4019, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 41.4126, "eval_rougeL_for_task520_aquamuse_answerability_classification": 78.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.1513, "eval_rougeL_for_task602_wikitext_title_generation": 14.3787, "eval_rougeL_for_task613_liar_keyword_tagging": 35.6333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 35.5961, "eval_rougeL_for_task619_ohsumed_title_generation": 42.047, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 41.419, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 81.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 34.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 57.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.5095, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 5.6667, "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.9802, "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.7305, "eval_rougeL_for_task677_ollie_data_to_text": 22.336, "eval_rougeL_for_task738_perspectrum_textual_entailment": 80.0, "eval_rougeL_for_task743_eurlex_title_generation": 36.5079, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.0478, "eval_rougeL_for_task769_qed_title_generation": 84.0433, "eval_rougeL_for_task827_copa_cause_effect_classification": 84.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 58.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, "eval_rougeL_for_task891_gap_coreference_resolution": 67.6, "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 60.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rougeL_for_task957_e2e_data_to_text": 45.7519, "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, "eval_rougeL_for_textual_entailment": 48.7778, "eval_rougeL_for_title_generation": 37.28, "eval_rougeL_for_word_analogy": 43.5833, "eval_runtime": 779.0369, "eval_samples_per_second": 15.288, "eval_steps_per_second": 0.956, "step": 3500 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 0.8813, "step": 4000 }, { "epoch": 0.87, "eval_exact_match": 34.8279, "eval_exact_match_for_answerability_classification": 59.4615, "eval_exact_match_for_cause_effect_classification": 49.0, "eval_exact_match_for_coreference_resolution": 40.2143, "eval_exact_match_for_data_to_text": 6.7797, "eval_exact_match_for_dialogue_act_recognition": 52.7143, "eval_exact_match_for_grammar_error_correction": 5.5, "eval_exact_match_for_keyword_tagging": 48.2, "eval_exact_match_for_overlap_extraction": 9.0, "eval_exact_match_for_question_rewriting": 5.1818, "eval_exact_match_for_task020_mctaco_answerability_classification": 57.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 57.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 13.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 10.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 36.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, "eval_exact_match_for_task050_multirc_answerability_classification": 77.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 33.0, "eval_exact_match_for_task1153_bard_word_analogy": 35.0, "eval_exact_match_for_task1154_bard_word_analogy": 22.0, "eval_exact_match_for_task1155_bard_word_analogy": 70.0, "eval_exact_match_for_task1156_bard_word_analogy": 50.0, "eval_exact_match_for_task1157_bard_word_analogy": 61.0, "eval_exact_match_for_task1158_bard_word_analogy": 41.0, "eval_exact_match_for_task1159_bard_word_analogy": 37.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 2.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 79.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, "eval_exact_match_for_task1386_anli_textual_entailment": 29.0, "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 39.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 43.0, "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 66.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 46.0, "eval_exact_match_for_task1615_sick_textual_entailment": 50.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 11.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 46.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_exact_match_for_task1659_billsum_title_generation": 4.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 33.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, "eval_exact_match_for_task190_snli_textual_entailment": 30.0, "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, "eval_exact_match_for_task201_multinli_textual_entailment": 14.0, "eval_exact_match_for_task202_multinli_textual_entailment": 54.0, "eval_exact_match_for_task219_rocstories_title_generation": 4.0, "eval_exact_match_for_task220_rocstories_title_generation": 98.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_exact_match_for_task232_iirc_answerability_classification": 46.0, "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 95.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, "eval_exact_match_for_task329_gap_coreference_resolution": 54.0, "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 53.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 52.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 28.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, "eval_exact_match_for_task418_persent_title_generation": 0.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 69.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, "eval_exact_match_for_task602_wikitext_title_generation": 2.381, "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 77.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 36.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 58.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 42.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 21.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 26.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 64.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 84.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 61.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 50.0, "eval_exact_match_for_task891_gap_coreference_resolution": 62.0, "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 73.0, "eval_exact_match_for_textual_entailment": 49.1667, "eval_exact_match_for_title_generation": 10.5381, "eval_exact_match_for_word_analogy": 43.625, "eval_f1": 52.1346, "eval_f1_for_answerability_classification": 62.0256, "eval_f1_for_cause_effect_classification": 68.0187, "eval_f1_for_coreference_resolution": 47.1522, "eval_f1_for_data_to_text": 51.4215, "eval_f1_for_dialogue_act_recognition": 56.0714, "eval_f1_for_grammar_error_correction": 57.3579, "eval_f1_for_keyword_tagging": 60.1316, "eval_f1_for_overlap_extraction": 22.9449, "eval_f1_for_question_rewriting": 70.075, "eval_f1_for_task020_mctaco_answerability_classification": 57.0, "eval_f1_for_task033_winogrande_coreference_resolution": 61.0, "eval_f1_for_task034_winogrande_question_rewriting": 91.7499, "eval_f1_for_task035_winogrande_question_rewriting": 88.371, "eval_f1_for_task036_qasc_keyword_tagging": 66.677, "eval_f1_for_task039_qasc_overlap_extraction": 25.8333, "eval_f1_for_task050_multirc_answerability_classification": 77.0, "eval_f1_for_task102_commongen_data_to_text": 53.0097, "eval_f1_for_task1152_bard_word_analogy": 33.0, "eval_f1_for_task1153_bard_word_analogy": 35.0, "eval_f1_for_task1154_bard_word_analogy": 22.0, "eval_f1_for_task1155_bard_word_analogy": 70.0, "eval_f1_for_task1156_bard_word_analogy": 50.6667, "eval_f1_for_task1157_bard_word_analogy": 61.0, "eval_f1_for_task1158_bard_word_analogy": 41.0, "eval_f1_for_task1159_bard_word_analogy": 39.6667, "eval_f1_for_task1161_coda_19_title_generation": 36.9733, "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.2243, "eval_f1_for_task121_atomic_question_rewriting": 48.2642, "eval_f1_for_task133_winowhy_coreference_resolution": 2.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2335, "eval_f1_for_task1344_rte_textual_entailment": 79.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.5058, "eval_f1_for_task1356_xlsum_title_generation": 25.3364, "eval_f1_for_task1358_xlsum_title_generation": 34.8236, "eval_f1_for_task1385_anli_textual_entailment": 33.0, "eval_f1_for_task1386_anli_textual_entailment": 29.0, "eval_f1_for_task1387_anli_textual_entailment": 30.0, "eval_f1_for_task1388_cb_textual_entailment": 41.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 67.0, "eval_f1_for_task1407_dart_data_to_text": 32.6811, "eval_f1_for_task1409_dart_data_to_text": 48.7223, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.6161, "eval_f1_for_task1439_doqa_answerability_classification": 47.0, "eval_f1_for_task1442_doqa_answerability_classification": 54.0, "eval_f1_for_task1516_imppres_textual_entailment": 39.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, "eval_f1_for_task1540_peer_read_title_generation": 38.0087, "eval_f1_for_task1554_scitail_textual_entailment": 66.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.0997, "eval_f1_for_task1562_zest_question_rewriting": 51.1277, "eval_f1_for_task1586_scifact_title_generation": 36.0219, "eval_f1_for_task1598_nyc_data_to_text": 50.2699, "eval_f1_for_task1612_sick_textual_entailment": 46.0, "eval_f1_for_task1615_sick_textual_entailment": 50.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.5858, "eval_f1_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_f1_for_task1631_open_pi_data_to_text": 90.1601, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_f1_for_task1659_billsum_title_generation": 38.1501, "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.1845, "eval_f1_for_task1728_web_nlg_data_to_text": 62.3398, "eval_f1_for_task190_snli_textual_entailment": 30.0, "eval_f1_for_task199_multinli_textual_entailment": 49.0, "eval_f1_for_task200_multinli_textual_entailment": 92.0, "eval_f1_for_task201_multinli_textual_entailment": 14.0, "eval_f1_for_task202_multinli_textual_entailment": 54.0, "eval_f1_for_task219_rocstories_title_generation": 20.0659, "eval_f1_for_task220_rocstories_title_generation": 98.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_f1_for_task232_iirc_answerability_classification": 46.0, "eval_f1_for_task233_iirc_answerability_classification": 48.0, "eval_f1_for_task242_tweetqa_answerability_classification": 95.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.05, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 20.0565, "eval_f1_for_task288_gigaword_title_generation": 31.5246, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.3667, "eval_f1_for_task329_gap_coreference_resolution": 54.0, "eval_f1_for_task330_gap_coreference_resolution": 71.7302, "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 84.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_f1_for_task393_cod3s_cause_effect_classification": 29.2741, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.1667, "eval_f1_for_task402_grailqa_question_rewriting": 76.6901, "eval_f1_for_task418_persent_title_generation": 28.6284, "eval_f1_for_task442_com_qa_question_rewriting": 71.2869, "eval_f1_for_task500_scruples_title_generation": 20.4923, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.3491, "eval_f1_for_task520_aquamuse_answerability_classification": 69.0, "eval_f1_for_task569_recipe_nlg_title_generation": 41.6305, "eval_f1_for_task602_wikitext_title_generation": 13.5234, "eval_f1_for_task613_liar_keyword_tagging": 24.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 46.5235, "eval_f1_for_task619_ohsumed_title_generation": 47.7001, "eval_f1_for_task620_ohsumed_keyword_tagging": 37.5333, "eval_f1_for_task623_ohsumed_keyword_tagging": 77.0, "eval_f1_for_task640_e_snli_textual_entailment": 36.0, "eval_f1_for_task641_e_snli_textual_entailment": 58.0, "eval_f1_for_task642_e_snli_textual_entailment": 42.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.781, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 39.7667, "eval_f1_for_task670_ambigqa_question_rewriting": 77.9779, "eval_f1_for_task671_ambigqa_question_rewriting": 65.0412, "eval_f1_for_task677_ollie_data_to_text": 29.913, "eval_f1_for_task738_perspectrum_textual_entailment": 26.0, "eval_f1_for_task743_eurlex_title_generation": 37.2477, "eval_f1_for_task760_msr_sqa_data_to_text": 7.4966, "eval_f1_for_task769_qed_title_generation": 81.4304, "eval_f1_for_task827_copa_cause_effect_classification": 84.0, "eval_f1_for_task828_copa_cause_effect_classification": 61.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.0, "eval_f1_for_task890_gwsd_textual_entailment": 50.0, "eval_f1_for_task891_gap_coreference_resolution": 70.8667, "eval_f1_for_task892_gap_coreference_resolution": 43.0, "eval_f1_for_task893_gap_coreference_resolution": 44.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_f1_for_task957_e2e_data_to_text": 55.6967, "eval_f1_for_task970_sherliic_textual_entailment": 73.0, "eval_f1_for_textual_entailment": 49.1667, "eval_f1_for_title_generation": 38.3955, "eval_f1_for_word_analogy": 44.0417, "eval_gen_len": 8.8208, "eval_global_step": 4000, "eval_loss": 1.1603326797485352, "eval_rouge1": 54.3961, "eval_rouge1_for_answerability_classification": 62.0256, "eval_rouge1_for_cause_effect_classification": 68.8615, "eval_rouge1_for_coreference_resolution": 47.8147, "eval_rouge1_for_data_to_text": 54.4904, "eval_rouge1_for_dialogue_act_recognition": 59.5503, "eval_rouge1_for_grammar_error_correction": 62.408, "eval_rouge1_for_keyword_tagging": 66.0656, "eval_rouge1_for_overlap_extraction": 26.4228, "eval_rouge1_for_question_rewriting": 71.5568, "eval_rouge1_for_task020_mctaco_answerability_classification": 57.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 91.7641, "eval_rouge1_for_task035_winogrande_question_rewriting": 89.09, "eval_rouge1_for_task036_qasc_keyword_tagging": 79.4278, "eval_rouge1_for_task039_qasc_overlap_extraction": 31.6667, "eval_rouge1_for_task050_multirc_answerability_classification": 77.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.7983, "eval_rouge1_for_task1152_bard_word_analogy": 33.0, "eval_rouge1_for_task1153_bard_word_analogy": 37.0, "eval_rouge1_for_task1154_bard_word_analogy": 22.0, "eval_rouge1_for_task1155_bard_word_analogy": 70.0, "eval_rouge1_for_task1156_bard_word_analogy": 50.6667, "eval_rouge1_for_task1157_bard_word_analogy": 61.0, "eval_rouge1_for_task1158_bard_word_analogy": 41.0, "eval_rouge1_for_task1159_bard_word_analogy": 39.6667, "eval_rouge1_for_task1161_coda_19_title_generation": 41.2772, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.3876, "eval_rouge1_for_task121_atomic_question_rewriting": 50.1002, "eval_rouge1_for_task133_winowhy_coreference_resolution": 2.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.0105, "eval_rouge1_for_task1344_rte_textual_entailment": 79.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.6367, "eval_rouge1_for_task1356_xlsum_title_generation": 29.786, "eval_rouge1_for_task1358_xlsum_title_generation": 38.9871, "eval_rouge1_for_task1385_anli_textual_entailment": 37.0, "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.6857, "eval_rouge1_for_task1407_dart_data_to_text": 33.2393, "eval_rouge1_for_task1409_dart_data_to_text": 49.4239, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.4818, "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 39.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, "eval_rouge1_for_task1540_peer_read_title_generation": 42.14, "eval_rouge1_for_task1554_scitail_textual_entailment": 66.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.3343, "eval_rouge1_for_task1562_zest_question_rewriting": 54.3669, "eval_rouge1_for_task1586_scifact_title_generation": 39.815, "eval_rouge1_for_task1598_nyc_data_to_text": 52.0558, "eval_rouge1_for_task1612_sick_textual_entailment": 46.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.3333, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.8666, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 90.3266, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rouge1_for_task1659_billsum_title_generation": 40.2099, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.1845, "eval_rouge1_for_task1728_web_nlg_data_to_text": 63.5979, "eval_rouge1_for_task190_snli_textual_entailment": 30.0, "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, "eval_rouge1_for_task201_multinli_textual_entailment": 14.0, "eval_rouge1_for_task202_multinli_textual_entailment": 54.0, "eval_rouge1_for_task219_rocstories_title_generation": 24.1009, "eval_rouge1_for_task220_rocstories_title_generation": 98.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_rouge1_for_task232_iirc_answerability_classification": 46.0, "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 95.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.7167, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 21.1789, "eval_rouge1_for_task288_gigaword_title_generation": 35.059, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 13.3667, "eval_rouge1_for_task329_gap_coreference_resolution": 54.0, "eval_rouge1_for_task330_gap_coreference_resolution": 71.519, "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.3804, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.1667, "eval_rouge1_for_task402_grailqa_question_rewriting": 78.4777, "eval_rouge1_for_task418_persent_title_generation": 31.5346, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.5832, "eval_rouge1_for_task500_scruples_title_generation": 21.5209, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.481, "eval_rouge1_for_task520_aquamuse_answerability_classification": 69.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.9213, "eval_rouge1_for_task602_wikitext_title_generation": 14.4758, "eval_rouge1_for_task613_liar_keyword_tagging": 35.0, "eval_rouge1_for_task614_glucose_cause_effect_classification": 52.3169, "eval_rouge1_for_task619_ohsumed_title_generation": 50.9191, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.619, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 77.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 36.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 58.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 42.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 40.4, "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.0793, "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.772, "eval_rouge1_for_task677_ollie_data_to_text": 32.9372, "eval_rouge1_for_task738_perspectrum_textual_entailment": 74.0, "eval_rouge1_for_task743_eurlex_title_generation": 38.7778, "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.4854, "eval_rouge1_for_task769_qed_title_generation": 81.384, "eval_rouge1_for_task827_copa_cause_effect_classification": 84.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 61.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, "eval_rouge1_for_task890_gwsd_textual_entailment": 50.0, "eval_rouge1_for_task891_gap_coreference_resolution": 71.0524, "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rouge1_for_task957_e2e_data_to_text": 57.7656, "eval_rouge1_for_task970_sherliic_textual_entailment": 73.0, "eval_rouge1_for_textual_entailment": 53.1806, "eval_rouge1_for_title_generation": 40.8119, "eval_rouge1_for_word_analogy": 44.2917, "eval_rougeL": 52.9869, "eval_rougeL_for_answerability_classification": 62.0256, "eval_rougeL_for_cause_effect_classification": 68.3127, "eval_rougeL_for_coreference_resolution": 47.8147, "eval_rougeL_for_data_to_text": 46.7266, "eval_rougeL_for_dialogue_act_recognition": 59.5503, "eval_rougeL_for_grammar_error_correction": 61.6506, "eval_rougeL_for_keyword_tagging": 65.7656, "eval_rougeL_for_overlap_extraction": 26.2829, "eval_rougeL_for_question_rewriting": 68.1835, "eval_rougeL_for_task020_mctaco_answerability_classification": 57.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 90.5835, "eval_rougeL_for_task035_winogrande_question_rewriting": 88.1674, "eval_rougeL_for_task036_qasc_keyword_tagging": 79.2611, "eval_rougeL_for_task039_qasc_overlap_extraction": 31.6667, "eval_rougeL_for_task050_multirc_answerability_classification": 77.0, "eval_rougeL_for_task102_commongen_data_to_text": 58.6057, "eval_rougeL_for_task1152_bard_word_analogy": 33.0, "eval_rougeL_for_task1153_bard_word_analogy": 37.0, "eval_rougeL_for_task1154_bard_word_analogy": 22.0, "eval_rougeL_for_task1155_bard_word_analogy": 70.0, "eval_rougeL_for_task1156_bard_word_analogy": 50.6667, "eval_rougeL_for_task1157_bard_word_analogy": 61.0, "eval_rougeL_for_task1158_bard_word_analogy": 41.0, "eval_rougeL_for_task1159_bard_word_analogy": 39.6667, "eval_rougeL_for_task1161_coda_19_title_generation": 35.6544, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8268, "eval_rougeL_for_task121_atomic_question_rewriting": 45.1248, "eval_rougeL_for_task133_winowhy_coreference_resolution": 2.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.2798, "eval_rougeL_for_task1344_rte_textual_entailment": 79.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5595, "eval_rougeL_for_task1356_xlsum_title_generation": 25.7197, "eval_rougeL_for_task1358_xlsum_title_generation": 33.5774, "eval_rougeL_for_task1385_anli_textual_entailment": 37.0, "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.6857, "eval_rougeL_for_task1407_dart_data_to_text": 27.8955, "eval_rougeL_for_task1409_dart_data_to_text": 42.0132, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.8318, "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 39.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 61.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 43.0, "eval_rougeL_for_task1540_peer_read_title_generation": 37.9235, "eval_rougeL_for_task1554_scitail_textual_entailment": 66.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.4694, "eval_rougeL_for_task1562_zest_question_rewriting": 48.0607, "eval_rougeL_for_task1586_scifact_title_generation": 33.2412, "eval_rougeL_for_task1598_nyc_data_to_text": 39.4759, "eval_rougeL_for_task1612_sick_textual_entailment": 46.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.3333, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.6311, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 88.9137, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 61.0, "eval_rougeL_for_task1659_billsum_title_generation": 33.6315, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.1845, "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.7449, "eval_rougeL_for_task190_snli_textual_entailment": 30.0, "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, "eval_rougeL_for_task201_multinli_textual_entailment": 14.0, "eval_rougeL_for_task202_multinli_textual_entailment": 54.0, "eval_rougeL_for_task219_rocstories_title_generation": 23.7009, "eval_rougeL_for_task220_rocstories_title_generation": 98.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, "eval_rougeL_for_task232_iirc_answerability_classification": 46.0, "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 95.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.7167, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 20.8992, "eval_rougeL_for_task288_gigaword_title_generation": 30.7814, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 13.3667, "eval_rougeL_for_task329_gap_coreference_resolution": 54.0, "eval_rougeL_for_task330_gap_coreference_resolution": 71.519, "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.2119, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.1667, "eval_rougeL_for_task402_grailqa_question_rewriting": 66.382, "eval_rougeL_for_task418_persent_title_generation": 27.393, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3942, "eval_rougeL_for_task500_scruples_title_generation": 20.3706, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 43.3259, "eval_rougeL_for_task520_aquamuse_answerability_classification": 69.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.6388, "eval_rougeL_for_task602_wikitext_title_generation": 14.2926, "eval_rougeL_for_task613_liar_keyword_tagging": 35.0, "eval_rougeL_for_task614_glucose_cause_effect_classification": 49.6434, "eval_rougeL_for_task619_ohsumed_title_generation": 43.0571, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 42.2857, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 77.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 36.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 58.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 42.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.281, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 40.4, "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.8845, "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.4045, "eval_rougeL_for_task677_ollie_data_to_text": 27.7822, "eval_rougeL_for_task738_perspectrum_textual_entailment": 74.0, "eval_rougeL_for_task743_eurlex_title_generation": 34.3768, "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.4169, "eval_rougeL_for_task769_qed_title_generation": 80.9395, "eval_rougeL_for_task827_copa_cause_effect_classification": 84.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 61.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 87.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 47.6667, "eval_rougeL_for_task890_gwsd_textual_entailment": 50.0, "eval_rougeL_for_task891_gap_coreference_resolution": 71.0524, "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 68.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rougeL_for_task957_e2e_data_to_text": 43.8622, "eval_rougeL_for_task970_sherliic_textual_entailment": 73.0, "eval_rougeL_for_textual_entailment": 53.1806, "eval_rougeL_for_title_generation": 37.4786, "eval_rougeL_for_word_analogy": 44.2917, "eval_runtime": 812.825, "eval_samples_per_second": 14.653, "eval_steps_per_second": 0.917, "step": 4000 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 0.9129, "step": 4500 }, { "epoch": 0.98, "eval_exact_match": 35.8858, "eval_exact_match_for_answerability_classification": 62.9231, "eval_exact_match_for_cause_effect_classification": 50.0, "eval_exact_match_for_coreference_resolution": 46.8571, "eval_exact_match_for_data_to_text": 4.9637, "eval_exact_match_for_dialogue_act_recognition": 57.2857, "eval_exact_match_for_grammar_error_correction": 5.5, "eval_exact_match_for_keyword_tagging": 50.0, "eval_exact_match_for_overlap_extraction": 21.5, "eval_exact_match_for_question_rewriting": 4.3636, "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 59.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 4.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 43.0, "eval_exact_match_for_task050_multirc_answerability_classification": 63.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 26.0, "eval_exact_match_for_task1153_bard_word_analogy": 43.0, "eval_exact_match_for_task1154_bard_word_analogy": 21.0, "eval_exact_match_for_task1155_bard_word_analogy": 81.0, "eval_exact_match_for_task1156_bard_word_analogy": 60.0, "eval_exact_match_for_task1157_bard_word_analogy": 62.0, "eval_exact_match_for_task1158_bard_word_analogy": 41.0, "eval_exact_match_for_task1159_bard_word_analogy": 36.0, "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 18.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 43.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, "eval_exact_match_for_task1344_rte_textual_entailment": 54.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 16.0, "eval_exact_match_for_task1386_anli_textual_entailment": 22.0, "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, "eval_exact_match_for_task1388_cb_textual_entailment": 30.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 84.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 65.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 61.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 49.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 58.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 39.0, "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 69.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 32.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 72.0, "eval_exact_match_for_task1659_billsum_title_generation": 4.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 46.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 50.0, "eval_exact_match_for_task199_multinli_textual_entailment": 46.0, "eval_exact_match_for_task200_multinli_textual_entailment": 84.0, "eval_exact_match_for_task201_multinli_textual_entailment": 14.0, "eval_exact_match_for_task202_multinli_textual_entailment": 19.0, "eval_exact_match_for_task219_rocstories_title_generation": 1.0, "eval_exact_match_for_task220_rocstories_title_generation": 99.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, "eval_exact_match_for_task232_iirc_answerability_classification": 58.0, "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 96.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 56.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 64.0, "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 73.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 59.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 4.0, "eval_exact_match_for_task418_persent_title_generation": 1.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 4.0, "eval_exact_match_for_task500_scruples_title_generation": 1.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 91.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, "eval_exact_match_for_task602_wikitext_title_generation": 4.7619, "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 1.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 75.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 49.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 41.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 40.0, "eval_exact_match_for_task743_eurlex_title_generation": 1.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 71.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 91.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 63.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 46.0, "eval_exact_match_for_task891_gap_coreference_resolution": 53.0, "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, "eval_exact_match_for_task893_gap_coreference_resolution": 70.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 74.0, "eval_exact_match_for_textual_entailment": 45.375, "eval_exact_match_for_title_generation": 11.0426, "eval_exact_match_for_word_analogy": 46.25, "eval_f1": 53.1037, "eval_f1_for_answerability_classification": 65.4872, "eval_f1_for_cause_effect_classification": 68.4711, "eval_f1_for_coreference_resolution": 51.7532, "eval_f1_for_data_to_text": 50.9563, "eval_f1_for_dialogue_act_recognition": 59.2143, "eval_f1_for_grammar_error_correction": 68.4925, "eval_f1_for_keyword_tagging": 61.8349, "eval_f1_for_overlap_extraction": 32.97, "eval_f1_for_question_rewriting": 70.519, "eval_f1_for_task020_mctaco_answerability_classification": 53.0, "eval_f1_for_task033_winogrande_coreference_resolution": 61.6667, "eval_f1_for_task034_winogrande_question_rewriting": 88.3167, "eval_f1_for_task035_winogrande_question_rewriting": 90.273, "eval_f1_for_task036_qasc_keyword_tagging": 75.527, "eval_f1_for_task039_qasc_overlap_extraction": 48.8333, "eval_f1_for_task050_multirc_answerability_classification": 63.0, "eval_f1_for_task102_commongen_data_to_text": 53.0451, "eval_f1_for_task1152_bard_word_analogy": 26.0, "eval_f1_for_task1153_bard_word_analogy": 43.0, "eval_f1_for_task1154_bard_word_analogy": 21.0, "eval_f1_for_task1155_bard_word_analogy": 81.0, "eval_f1_for_task1156_bard_word_analogy": 60.6667, "eval_f1_for_task1157_bard_word_analogy": 62.0, "eval_f1_for_task1158_bard_word_analogy": 41.0, "eval_f1_for_task1159_bard_word_analogy": 36.0, "eval_f1_for_task1161_coda_19_title_generation": 39.5543, "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.8792, "eval_f1_for_task121_atomic_question_rewriting": 49.1417, "eval_f1_for_task133_winowhy_coreference_resolution": 43.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.7028, "eval_f1_for_task1344_rte_textual_entailment": 54.0, "eval_f1_for_task1345_qqp_question_rewriting": 39.4444, "eval_f1_for_task1356_xlsum_title_generation": 24.0678, "eval_f1_for_task1358_xlsum_title_generation": 35.8365, "eval_f1_for_task1385_anli_textual_entailment": 16.0, "eval_f1_for_task1386_anli_textual_entailment": 22.0, "eval_f1_for_task1387_anli_textual_entailment": 30.0, "eval_f1_for_task1388_cb_textual_entailment": 30.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, "eval_f1_for_task1393_copa_cause_effect_classification": 84.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 65.0, "eval_f1_for_task1407_dart_data_to_text": 33.406, "eval_f1_for_task1409_dart_data_to_text": 47.3477, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.4987, "eval_f1_for_task1439_doqa_answerability_classification": 48.0, "eval_f1_for_task1442_doqa_answerability_classification": 61.0, "eval_f1_for_task1516_imppres_textual_entailment": 49.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1540_peer_read_title_generation": 40.9423, "eval_f1_for_task1554_scitail_textual_entailment": 58.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4863, "eval_f1_for_task1562_zest_question_rewriting": 54.1629, "eval_f1_for_task1586_scifact_title_generation": 37.1278, "eval_f1_for_task1598_nyc_data_to_text": 50.2932, "eval_f1_for_task1612_sick_textual_entailment": 39.0, "eval_f1_for_task1615_sick_textual_entailment": 49.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.9036, "eval_f1_for_task1624_disfl_qa_answerability_classification": 69.0, "eval_f1_for_task1631_open_pi_data_to_text": 87.0057, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 72.0, "eval_f1_for_task1659_billsum_title_generation": 37.3598, "eval_f1_for_task1664_wino_bias_coreference_resolution": 76.8048, "eval_f1_for_task1728_web_nlg_data_to_text": 63.5453, "eval_f1_for_task190_snli_textual_entailment": 50.0, "eval_f1_for_task199_multinli_textual_entailment": 46.0, "eval_f1_for_task200_multinli_textual_entailment": 84.0, "eval_f1_for_task201_multinli_textual_entailment": 14.0, "eval_f1_for_task202_multinli_textual_entailment": 19.0, "eval_f1_for_task219_rocstories_title_generation": 15.6825, "eval_f1_for_task220_rocstories_title_generation": 99.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, "eval_f1_for_task232_iirc_answerability_classification": 58.0, "eval_f1_for_task233_iirc_answerability_classification": 45.0, "eval_f1_for_task242_tweetqa_answerability_classification": 96.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.8833, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 17.1066, "eval_f1_for_task288_gigaword_title_generation": 29.6092, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, "eval_f1_for_task329_gap_coreference_resolution": 64.0, "eval_f1_for_task330_gap_coreference_resolution": 72.3238, "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_f1_for_task391_cod3s_cause_effect_classification": 86.3333, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, "eval_f1_for_task393_cod3s_cause_effect_classification": 33.8835, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.6667, "eval_f1_for_task402_grailqa_question_rewriting": 79.0134, "eval_f1_for_task418_persent_title_generation": 29.4778, "eval_f1_for_task442_com_qa_question_rewriting": 71.5523, "eval_f1_for_task500_scruples_title_generation": 19.6283, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.0735, "eval_f1_for_task520_aquamuse_answerability_classification": 91.0, "eval_f1_for_task569_recipe_nlg_title_generation": 40.0131, "eval_f1_for_task602_wikitext_title_generation": 15.5887, "eval_f1_for_task613_liar_keyword_tagging": 21.6667, "eval_f1_for_task614_glucose_cause_effect_classification": 37.0811, "eval_f1_for_task619_ohsumed_title_generation": 45.8186, "eval_f1_for_task620_ohsumed_keyword_tagging": 41.9, "eval_f1_for_task623_ohsumed_keyword_tagging": 75.0, "eval_f1_for_task640_e_snli_textual_entailment": 37.0, "eval_f1_for_task641_e_snli_textual_entailment": 49.0, "eval_f1_for_task642_e_snli_textual_entailment": 41.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.081, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.3333, "eval_f1_for_task670_ambigqa_question_rewriting": 78.7485, "eval_f1_for_task671_ambigqa_question_rewriting": 62.2733, "eval_f1_for_task677_ollie_data_to_text": 30.9067, "eval_f1_for_task738_perspectrum_textual_entailment": 40.0, "eval_f1_for_task743_eurlex_title_generation": 34.7975, "eval_f1_for_task760_msr_sqa_data_to_text": 3.1227, "eval_f1_for_task769_qed_title_generation": 86.8134, "eval_f1_for_task827_copa_cause_effect_classification": 91.0, "eval_f1_for_task828_copa_cause_effect_classification": 63.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 37.0, "eval_f1_for_task890_gwsd_textual_entailment": 46.0, "eval_f1_for_task891_gap_coreference_resolution": 60.5333, "eval_f1_for_task892_gap_coreference_resolution": 44.0, "eval_f1_for_task893_gap_coreference_resolution": 70.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_f1_for_task957_e2e_data_to_text": 54.5378, "eval_f1_for_task970_sherliic_textual_entailment": 74.0, "eval_f1_for_textual_entailment": 45.375, "eval_f1_for_title_generation": 38.4865, "eval_f1_for_word_analogy": 46.3333, "eval_gen_len": 8.091, "eval_global_step": 4500, "eval_loss": 1.1735401153564453, "eval_rouge1": 55.5064, "eval_rouge1_for_answerability_classification": 65.4872, "eval_rouge1_for_cause_effect_classification": 68.9307, "eval_rouge1_for_coreference_resolution": 52.4189, "eval_rouge1_for_data_to_text": 54.1071, "eval_rouge1_for_dialogue_act_recognition": 63.2889, "eval_rouge1_for_grammar_error_correction": 71.0756, "eval_rouge1_for_keyword_tagging": 67.1289, "eval_rouge1_for_overlap_extraction": 35.7253, "eval_rouge1_for_question_rewriting": 72.0336, "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 61.6667, "eval_rouge1_for_task034_winogrande_question_rewriting": 88.3224, "eval_rouge1_for_task035_winogrande_question_rewriting": 90.8067, "eval_rouge1_for_task036_qasc_keyword_tagging": 81.9778, "eval_rouge1_for_task039_qasc_overlap_extraction": 53.8333, "eval_rouge1_for_task050_multirc_answerability_classification": 63.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.0462, "eval_rouge1_for_task1152_bard_word_analogy": 26.0, "eval_rouge1_for_task1153_bard_word_analogy": 44.0, "eval_rouge1_for_task1154_bard_word_analogy": 21.0, "eval_rouge1_for_task1155_bard_word_analogy": 81.0, "eval_rouge1_for_task1156_bard_word_analogy": 60.6667, "eval_rouge1_for_task1157_bard_word_analogy": 62.0, "eval_rouge1_for_task1158_bard_word_analogy": 41.0, "eval_rouge1_for_task1159_bard_word_analogy": 36.0, "eval_rouge1_for_task1161_coda_19_title_generation": 42.7803, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 83.1642, "eval_rouge1_for_task121_atomic_question_rewriting": 51.3571, "eval_rouge1_for_task133_winowhy_coreference_resolution": 43.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.7246, "eval_rouge1_for_task1344_rte_textual_entailment": 54.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 42.2617, "eval_rouge1_for_task1356_xlsum_title_generation": 28.7221, "eval_rouge1_for_task1358_xlsum_title_generation": 40.1074, "eval_rouge1_for_task1385_anli_textual_entailment": 37.0, "eval_rouge1_for_task1386_anli_textual_entailment": 31.0, "eval_rouge1_for_task1387_anli_textual_entailment": 38.0, "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 84.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 68.1889, "eval_rouge1_for_task1407_dart_data_to_text": 34.5003, "eval_rouge1_for_task1409_dart_data_to_text": 48.3588, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.4622, "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 61.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 49.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1540_peer_read_title_generation": 43.2584, "eval_rouge1_for_task1554_scitail_textual_entailment": 58.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.689, "eval_rouge1_for_task1562_zest_question_rewriting": 56.8503, "eval_rouge1_for_task1586_scifact_title_generation": 40.9799, "eval_rouge1_for_task1598_nyc_data_to_text": 52.3137, "eval_rouge1_for_task1612_sick_textual_entailment": 39.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.1078, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 69.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 87.3201, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 72.0, "eval_rouge1_for_task1659_billsum_title_generation": 39.4398, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 76.8048, "eval_rouge1_for_task1728_web_nlg_data_to_text": 65.1922, "eval_rouge1_for_task190_snli_textual_entailment": 50.0, "eval_rouge1_for_task199_multinli_textual_entailment": 46.0, "eval_rouge1_for_task200_multinli_textual_entailment": 84.0, "eval_rouge1_for_task201_multinli_textual_entailment": 14.0, "eval_rouge1_for_task202_multinli_textual_entailment": 19.0, "eval_rouge1_for_task219_rocstories_title_generation": 19.7121, "eval_rouge1_for_task220_rocstories_title_generation": 99.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, "eval_rouge1_for_task232_iirc_answerability_classification": 58.0, "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 96.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.05, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 17.6173, "eval_rouge1_for_task288_gigaword_title_generation": 32.9165, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 4.3333, "eval_rouge1_for_task329_gap_coreference_resolution": 64.0, "eval_rouge1_for_task330_gap_coreference_resolution": 72.1429, "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 86.3333, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.8707, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.6667, "eval_rouge1_for_task402_grailqa_question_rewriting": 81.2881, "eval_rouge1_for_task418_persent_title_generation": 32.686, "eval_rouge1_for_task442_com_qa_question_rewriting": 74.9407, "eval_rouge1_for_task500_scruples_title_generation": 20.8929, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 43.7199, "eval_rouge1_for_task520_aquamuse_answerability_classification": 91.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.9219, "eval_rouge1_for_task602_wikitext_title_generation": 16.5727, "eval_rouge1_for_task613_liar_keyword_tagging": 34.6333, "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.3111, "eval_rouge1_for_task619_ohsumed_title_generation": 49.2196, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.4524, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 75.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 49.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 41.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.581, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.3333, "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.87, "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.4006, "eval_rouge1_for_task677_ollie_data_to_text": 33.75, "eval_rouge1_for_task738_perspectrum_textual_entailment": 79.0, "eval_rouge1_for_task743_eurlex_title_generation": 37.0216, "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2444, "eval_rouge1_for_task769_qed_title_generation": 86.8476, "eval_rouge1_for_task827_copa_cause_effect_classification": 91.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 63.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 56.3333, "eval_rouge1_for_task890_gwsd_textual_entailment": 46.0, "eval_rouge1_for_task891_gap_coreference_resolution": 60.8667, "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, "eval_rouge1_for_task893_gap_coreference_resolution": 70.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rouge1_for_task957_e2e_data_to_text": 56.5998, "eval_rouge1_for_task970_sherliic_textual_entailment": 74.0, "eval_rouge1_for_textual_entailment": 50.4583, "eval_rouge1_for_title_generation": 40.856, "eval_rouge1_for_word_analogy": 46.4583, "eval_rougeL": 54.0787, "eval_rougeL_for_answerability_classification": 65.4872, "eval_rougeL_for_cause_effect_classification": 68.4139, "eval_rougeL_for_coreference_resolution": 52.4189, "eval_rougeL_for_data_to_text": 45.9041, "eval_rougeL_for_dialogue_act_recognition": 63.2889, "eval_rougeL_for_grammar_error_correction": 69.9671, "eval_rougeL_for_keyword_tagging": 66.8089, "eval_rougeL_for_overlap_extraction": 35.5317, "eval_rougeL_for_question_rewriting": 68.2641, "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 61.6667, "eval_rougeL_for_task034_winogrande_question_rewriting": 86.9223, "eval_rougeL_for_task035_winogrande_question_rewriting": 90.6572, "eval_rougeL_for_task036_qasc_keyword_tagging": 81.7778, "eval_rougeL_for_task039_qasc_overlap_extraction": 53.8333, "eval_rougeL_for_task050_multirc_answerability_classification": 63.0, "eval_rougeL_for_task102_commongen_data_to_text": 54.2271, "eval_rougeL_for_task1152_bard_word_analogy": 26.0, "eval_rougeL_for_task1153_bard_word_analogy": 44.0, "eval_rougeL_for_task1154_bard_word_analogy": 21.0, "eval_rougeL_for_task1155_bard_word_analogy": 81.0, "eval_rougeL_for_task1156_bard_word_analogy": 60.6667, "eval_rougeL_for_task1157_bard_word_analogy": 62.0, "eval_rougeL_for_task1158_bard_word_analogy": 41.0, "eval_rougeL_for_task1159_bard_word_analogy": 36.0, "eval_rougeL_for_task1161_coda_19_title_generation": 36.5542, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 83.0689, "eval_rougeL_for_task121_atomic_question_rewriting": 45.45, "eval_rougeL_for_task133_winowhy_coreference_resolution": 43.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.7504, "eval_rougeL_for_task1344_rte_textual_entailment": 54.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5862, "eval_rougeL_for_task1356_xlsum_title_generation": 25.3293, "eval_rougeL_for_task1358_xlsum_title_generation": 34.0338, "eval_rougeL_for_task1385_anli_textual_entailment": 37.0, "eval_rougeL_for_task1386_anli_textual_entailment": 31.0, "eval_rougeL_for_task1387_anli_textual_entailment": 38.0, "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 84.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 68.1889, "eval_rougeL_for_task1407_dart_data_to_text": 29.118, "eval_rougeL_for_task1409_dart_data_to_text": 42.9086, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 54.1666, "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 61.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 49.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 53.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1540_peer_read_title_generation": 40.3608, "eval_rougeL_for_task1554_scitail_textual_entailment": 58.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7676, "eval_rougeL_for_task1562_zest_question_rewriting": 49.7461, "eval_rougeL_for_task1586_scifact_title_generation": 35.1651, "eval_rougeL_for_task1598_nyc_data_to_text": 41.5292, "eval_rougeL_for_task1612_sick_textual_entailment": 39.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.0367, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 69.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 82.6198, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 72.0, "eval_rougeL_for_task1659_billsum_title_generation": 34.1951, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 76.8048, "eval_rougeL_for_task1728_web_nlg_data_to_text": 57.0907, "eval_rougeL_for_task190_snli_textual_entailment": 50.0, "eval_rougeL_for_task199_multinli_textual_entailment": 46.0, "eval_rougeL_for_task200_multinli_textual_entailment": 84.0, "eval_rougeL_for_task201_multinli_textual_entailment": 14.0, "eval_rougeL_for_task202_multinli_textual_entailment": 19.0, "eval_rougeL_for_task219_rocstories_title_generation": 19.7121, "eval_rougeL_for_task220_rocstories_title_generation": 99.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, "eval_rougeL_for_task232_iirc_answerability_classification": 58.0, "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 96.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.05, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 17.2302, "eval_rougeL_for_task288_gigaword_title_generation": 29.3107, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 4.3333, "eval_rougeL_for_task329_gap_coreference_resolution": 64.0, "eval_rougeL_for_task330_gap_coreference_resolution": 72.1429, "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.5, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 86.3333, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.2124, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.6667, "eval_rougeL_for_task402_grailqa_question_rewriting": 65.5059, "eval_rougeL_for_task418_persent_title_generation": 28.6191, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.5296, "eval_rougeL_for_task500_scruples_title_generation": 19.7443, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.6749, "eval_rougeL_for_task520_aquamuse_answerability_classification": 91.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.4451, "eval_rougeL_for_task602_wikitext_title_generation": 16.5727, "eval_rougeL_for_task613_liar_keyword_tagging": 34.6333, "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.3514, "eval_rougeL_for_task619_ohsumed_title_generation": 42.7701, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.0524, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 75.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 49.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 41.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.581, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.3333, "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.1171, "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.285, "eval_rougeL_for_task677_ollie_data_to_text": 27.1729, "eval_rougeL_for_task738_perspectrum_textual_entailment": 79.0, "eval_rougeL_for_task743_eurlex_title_generation": 32.4427, "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0257, "eval_rougeL_for_task769_qed_title_generation": 86.8476, "eval_rougeL_for_task827_copa_cause_effect_classification": 91.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 63.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 78.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 56.3333, "eval_rougeL_for_task890_gwsd_textual_entailment": 46.0, "eval_rougeL_for_task891_gap_coreference_resolution": 60.8667, "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, "eval_rougeL_for_task893_gap_coreference_resolution": 70.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 62.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 72.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, "eval_rougeL_for_task957_e2e_data_to_text": 43.7148, "eval_rougeL_for_task970_sherliic_textual_entailment": 74.0, "eval_rougeL_for_textual_entailment": 50.4583, "eval_rougeL_for_title_generation": 37.8854, "eval_rougeL_for_word_analogy": 46.4583, "eval_runtime": 752.8471, "eval_samples_per_second": 15.82, "eval_steps_per_second": 0.99, "step": 4500 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 0.7361, "step": 5000 }, { "epoch": 1.09, "eval_exact_match": 35.4156, "eval_exact_match_for_answerability_classification": 63.0769, "eval_exact_match_for_cause_effect_classification": 50.0, "eval_exact_match_for_coreference_resolution": 42.4286, "eval_exact_match_for_data_to_text": 6.2954, "eval_exact_match_for_dialogue_act_recognition": 54.7143, "eval_exact_match_for_grammar_error_correction": 5.5, "eval_exact_match_for_keyword_tagging": 47.8, "eval_exact_match_for_overlap_extraction": 18.0, "eval_exact_match_for_question_rewriting": 4.1818, "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, "eval_exact_match_for_task033_winogrande_coreference_resolution": 58.0, "eval_exact_match_for_task034_winogrande_question_rewriting": 10.0, "eval_exact_match_for_task035_winogrande_question_rewriting": 7.0, "eval_exact_match_for_task036_qasc_keyword_tagging": 43.0, "eval_exact_match_for_task039_qasc_overlap_extraction": 36.0, "eval_exact_match_for_task050_multirc_answerability_classification": 68.0, "eval_exact_match_for_task102_commongen_data_to_text": 0.0, "eval_exact_match_for_task1152_bard_word_analogy": 27.0, "eval_exact_match_for_task1153_bard_word_analogy": 40.0, "eval_exact_match_for_task1154_bard_word_analogy": 26.0, "eval_exact_match_for_task1155_bard_word_analogy": 77.0, "eval_exact_match_for_task1156_bard_word_analogy": 59.0, "eval_exact_match_for_task1157_bard_word_analogy": 63.0, "eval_exact_match_for_task1158_bard_word_analogy": 52.0, "eval_exact_match_for_task1159_bard_word_analogy": 37.0, "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, "eval_exact_match_for_task1344_rte_textual_entailment": 67.0, "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, "eval_exact_match_for_task1385_anli_textual_entailment": 10.0, "eval_exact_match_for_task1386_anli_textual_entailment": 20.0, "eval_exact_match_for_task1387_anli_textual_entailment": 23.0, "eval_exact_match_for_task1388_cb_textual_entailment": 35.0, "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_exact_match_for_task1391_winogrande_coreference_resolution": 64.0, "eval_exact_match_for_task1393_copa_cause_effect_classification": 81.0, "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 61.0, "eval_exact_match_for_task1407_dart_data_to_text": 0.0, "eval_exact_match_for_task1409_dart_data_to_text": 3.0, "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, "eval_exact_match_for_task1442_doqa_answerability_classification": 59.0, "eval_exact_match_for_task1516_imppres_textual_entailment": 45.0, "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 58.0, "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, "eval_exact_match_for_task1554_scitail_textual_entailment": 67.0, "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 11.0, "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, "eval_exact_match_for_task1586_scifact_title_generation": 0.0, "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, "eval_exact_match_for_task1612_sick_textual_entailment": 53.0, "eval_exact_match_for_task1615_sick_textual_entailment": 49.0, "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 10.0, "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_exact_match_for_task1631_open_pi_data_to_text": 43.0, "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 78.0, "eval_exact_match_for_task1659_billsum_title_generation": 2.0, "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 42.0, "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, "eval_exact_match_for_task190_snli_textual_entailment": 8.0, "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, "eval_exact_match_for_task200_multinli_textual_entailment": 90.0, "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, "eval_exact_match_for_task202_multinli_textual_entailment": 71.0, "eval_exact_match_for_task219_rocstories_title_generation": 2.0, "eval_exact_match_for_task220_rocstories_title_generation": 99.0, "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, "eval_exact_match_for_task242_tweetqa_answerability_classification": 96.0, "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 49.0, "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, "eval_exact_match_for_task288_gigaword_title_generation": 0.0, "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_exact_match_for_task329_gap_coreference_resolution": 58.0, "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, "eval_exact_match_for_task418_persent_title_generation": 2.0, "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, "eval_exact_match_for_task500_scruples_title_generation": 2.0, "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, "eval_exact_match_for_task520_aquamuse_answerability_classification": 89.0, "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, "eval_exact_match_for_task602_wikitext_title_generation": 5.9524, "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, "eval_exact_match_for_task614_glucose_cause_effect_classification": 2.0, "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, "eval_exact_match_for_task623_ohsumed_keyword_tagging": 68.0, "eval_exact_match_for_task640_e_snli_textual_entailment": 40.0, "eval_exact_match_for_task641_e_snli_textual_entailment": 30.0, "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, "eval_exact_match_for_task677_ollie_data_to_text": 0.0, "eval_exact_match_for_task738_perspectrum_textual_entailment": 34.0, "eval_exact_match_for_task743_eurlex_title_generation": 2.0, "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, "eval_exact_match_for_task769_qed_title_generation": 69.0, "eval_exact_match_for_task827_copa_cause_effect_classification": 89.0, "eval_exact_match_for_task828_copa_cause_effect_classification": 69.0, "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_exact_match_for_task890_gwsd_textual_entailment": 52.0, "eval_exact_match_for_task891_gap_coreference_resolution": 58.0, "eval_exact_match_for_task892_gap_coreference_resolution": 43.0, "eval_exact_match_for_task893_gap_coreference_resolution": 64.0, "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_exact_match_for_task957_e2e_data_to_text": 0.0, "eval_exact_match_for_task970_sherliic_textual_entailment": 75.0, "eval_exact_match_for_textual_entailment": 46.2083, "eval_exact_match_for_title_generation": 11.0426, "eval_exact_match_for_word_analogy": 47.625, "eval_f1": 52.7092, "eval_f1_for_answerability_classification": 65.641, "eval_f1_for_cause_effect_classification": 69.3677, "eval_f1_for_coreference_resolution": 47.7905, "eval_f1_for_data_to_text": 50.6257, "eval_f1_for_dialogue_act_recognition": 58.2857, "eval_f1_for_grammar_error_correction": 68.2355, "eval_f1_for_keyword_tagging": 59.3878, "eval_f1_for_overlap_extraction": 30.1621, "eval_f1_for_question_rewriting": 69.371, "eval_f1_for_task020_mctaco_answerability_classification": 55.0, "eval_f1_for_task033_winogrande_coreference_resolution": 62.0, "eval_f1_for_task034_winogrande_question_rewriting": 92.6735, "eval_f1_for_task035_winogrande_question_rewriting": 88.4728, "eval_f1_for_task036_qasc_keyword_tagging": 72.5151, "eval_f1_for_task039_qasc_overlap_extraction": 44.5, "eval_f1_for_task050_multirc_answerability_classification": 68.0, "eval_f1_for_task102_commongen_data_to_text": 54.2971, "eval_f1_for_task1152_bard_word_analogy": 27.0, "eval_f1_for_task1153_bard_word_analogy": 40.0, "eval_f1_for_task1154_bard_word_analogy": 26.0, "eval_f1_for_task1155_bard_word_analogy": 77.0, "eval_f1_for_task1156_bard_word_analogy": 59.6667, "eval_f1_for_task1157_bard_word_analogy": 63.0, "eval_f1_for_task1158_bard_word_analogy": 52.0, "eval_f1_for_task1159_bard_word_analogy": 37.0, "eval_f1_for_task1161_coda_19_title_generation": 38.9281, "eval_f1_for_task1195_disfl_qa_question_rewriting": 81.8154, "eval_f1_for_task121_atomic_question_rewriting": 47.5297, "eval_f1_for_task133_winowhy_coreference_resolution": 0.0, "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.9718, "eval_f1_for_task1344_rte_textual_entailment": 67.0, "eval_f1_for_task1345_qqp_question_rewriting": 38.4384, "eval_f1_for_task1356_xlsum_title_generation": 24.1726, "eval_f1_for_task1358_xlsum_title_generation": 36.2993, "eval_f1_for_task1385_anli_textual_entailment": 10.0, "eval_f1_for_task1386_anli_textual_entailment": 20.0, "eval_f1_for_task1387_anli_textual_entailment": 23.0, "eval_f1_for_task1388_cb_textual_entailment": 35.0, "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_f1_for_task1391_winogrande_coreference_resolution": 64.0, "eval_f1_for_task1393_copa_cause_effect_classification": 81.0, "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 61.0, "eval_f1_for_task1407_dart_data_to_text": 22.9083, "eval_f1_for_task1409_dart_data_to_text": 49.3771, "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 53.2536, "eval_f1_for_task1439_doqa_answerability_classification": 46.0, "eval_f1_for_task1442_doqa_answerability_classification": 59.0, "eval_f1_for_task1516_imppres_textual_entailment": 45.0, "eval_f1_for_task1529_scitailv1.1_textual_entailment": 58.0, "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 39.0, "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_f1_for_task1540_peer_read_title_generation": 39.0895, "eval_f1_for_task1554_scitail_textual_entailment": 67.0, "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2174, "eval_f1_for_task1562_zest_question_rewriting": 52.7794, "eval_f1_for_task1586_scifact_title_generation": 38.7743, "eval_f1_for_task1598_nyc_data_to_text": 50.0597, "eval_f1_for_task1612_sick_textual_entailment": 53.0, "eval_f1_for_task1615_sick_textual_entailment": 49.0, "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.7327, "eval_f1_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_f1_for_task1631_open_pi_data_to_text": 90.5779, "eval_f1_for_task1640_adverserial_qa_answerability_classification": 78.0, "eval_f1_for_task1659_billsum_title_generation": 35.4098, "eval_f1_for_task1664_wino_bias_coreference_resolution": 74.1429, "eval_f1_for_task1728_web_nlg_data_to_text": 66.6521, "eval_f1_for_task190_snli_textual_entailment": 8.0, "eval_f1_for_task199_multinli_textual_entailment": 45.0, "eval_f1_for_task200_multinli_textual_entailment": 90.0, "eval_f1_for_task201_multinli_textual_entailment": 10.0, "eval_f1_for_task202_multinli_textual_entailment": 71.0, "eval_f1_for_task219_rocstories_title_generation": 17.3932, "eval_f1_for_task220_rocstories_title_generation": 99.0, "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_f1_for_task232_iirc_answerability_classification": 49.0, "eval_f1_for_task233_iirc_answerability_classification": 49.0, "eval_f1_for_task242_tweetqa_answerability_classification": 96.0, "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.3667, "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 15.8242, "eval_f1_for_task288_gigaword_title_generation": 30.1704, "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_f1_for_task329_gap_coreference_resolution": 58.0, "eval_f1_for_task330_gap_coreference_resolution": 72.5238, "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_f1_for_task393_cod3s_cause_effect_classification": 31.0388, "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.3333, "eval_f1_for_task402_grailqa_question_rewriting": 72.0955, "eval_f1_for_task418_persent_title_generation": 30.3685, "eval_f1_for_task442_com_qa_question_rewriting": 72.3198, "eval_f1_for_task500_scruples_title_generation": 23.5154, "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 40.8212, "eval_f1_for_task520_aquamuse_answerability_classification": 89.0, "eval_f1_for_task569_recipe_nlg_title_generation": 41.0387, "eval_f1_for_task602_wikitext_title_generation": 15.2259, "eval_f1_for_task613_liar_keyword_tagging": 22.0, "eval_f1_for_task614_glucose_cause_effect_classification": 45.8685, "eval_f1_for_task619_ohsumed_title_generation": 45.3233, "eval_f1_for_task620_ohsumed_keyword_tagging": 40.0, "eval_f1_for_task623_ohsumed_keyword_tagging": 68.0, "eval_f1_for_task640_e_snli_textual_entailment": 40.0, "eval_f1_for_task641_e_snli_textual_entailment": 30.0, "eval_f1_for_task642_e_snli_textual_entailment": 36.0, "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4238, "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.0, "eval_f1_for_task670_ambigqa_question_rewriting": 73.002, "eval_f1_for_task671_ambigqa_question_rewriting": 63.2219, "eval_f1_for_task677_ollie_data_to_text": 27.1842, "eval_f1_for_task738_perspectrum_textual_entailment": 34.0, "eval_f1_for_task743_eurlex_title_generation": 37.3277, "eval_f1_for_task760_msr_sqa_data_to_text": 5.4268, "eval_f1_for_task769_qed_title_generation": 89.6489, "eval_f1_for_task827_copa_cause_effect_classification": 89.0, "eval_f1_for_task828_copa_cause_effect_classification": 69.0, "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 50.0, "eval_f1_for_task890_gwsd_textual_entailment": 52.0, "eval_f1_for_task891_gap_coreference_resolution": 66.7, "eval_f1_for_task892_gap_coreference_resolution": 43.0, "eval_f1_for_task893_gap_coreference_resolution": 64.0, "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_f1_for_task957_e2e_data_to_text": 55.7006, "eval_f1_for_task970_sherliic_textual_entailment": 75.0, "eval_f1_for_textual_entailment": 46.2083, "eval_f1_for_title_generation": 39.0158, "eval_f1_for_word_analogy": 47.7083, "eval_gen_len": 8.6991, "eval_global_step": 5000, "eval_loss": 1.265062928199768, "eval_rouge1": 55.4027, "eval_rouge1_for_answerability_classification": 65.641, "eval_rouge1_for_cause_effect_classification": 70.0251, "eval_rouge1_for_coreference_resolution": 48.5418, "eval_rouge1_for_data_to_text": 53.8534, "eval_rouge1_for_dialogue_act_recognition": 61.0122, "eval_rouge1_for_grammar_error_correction": 71.4044, "eval_rouge1_for_keyword_tagging": 65.1656, "eval_rouge1_for_overlap_extraction": 34.3666, "eval_rouge1_for_question_rewriting": 70.9566, "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, "eval_rouge1_for_task033_winogrande_coreference_resolution": 62.0, "eval_rouge1_for_task034_winogrande_question_rewriting": 92.719, "eval_rouge1_for_task035_winogrande_question_rewriting": 89.1939, "eval_rouge1_for_task036_qasc_keyword_tagging": 80.504, "eval_rouge1_for_task039_qasc_overlap_extraction": 52.3333, "eval_rouge1_for_task050_multirc_answerability_classification": 68.0, "eval_rouge1_for_task102_commongen_data_to_text": 68.7507, "eval_rouge1_for_task1152_bard_word_analogy": 27.0, "eval_rouge1_for_task1153_bard_word_analogy": 40.0, "eval_rouge1_for_task1154_bard_word_analogy": 26.0, "eval_rouge1_for_task1155_bard_word_analogy": 77.0, "eval_rouge1_for_task1156_bard_word_analogy": 59.6667, "eval_rouge1_for_task1157_bard_word_analogy": 63.0, "eval_rouge1_for_task1158_bard_word_analogy": 52.0, "eval_rouge1_for_task1159_bard_word_analogy": 37.0, "eval_rouge1_for_task1161_coda_19_title_generation": 42.5347, "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.0698, "eval_rouge1_for_task121_atomic_question_rewriting": 49.8285, "eval_rouge1_for_task133_winowhy_coreference_resolution": 0.0, "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.5418, "eval_rouge1_for_task1344_rte_textual_entailment": 67.0, "eval_rouge1_for_task1345_qqp_question_rewriting": 41.652, "eval_rouge1_for_task1356_xlsum_title_generation": 28.3531, "eval_rouge1_for_task1358_xlsum_title_generation": 40.7117, "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, "eval_rouge1_for_task1387_anli_textual_entailment": 41.0, "eval_rouge1_for_task1388_cb_textual_entailment": 56.0, "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rouge1_for_task1391_winogrande_coreference_resolution": 64.0, "eval_rouge1_for_task1393_copa_cause_effect_classification": 81.0, "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 64.0857, "eval_rouge1_for_task1407_dart_data_to_text": 24.5399, "eval_rouge1_for_task1409_dart_data_to_text": 50.5753, "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 56.6508, "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, "eval_rouge1_for_task1442_doqa_answerability_classification": 59.0, "eval_rouge1_for_task1516_imppres_textual_entailment": 45.0, "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 58.0, "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rouge1_for_task1540_peer_read_title_generation": 43.5161, "eval_rouge1_for_task1554_scitail_textual_entailment": 67.0, "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.158, "eval_rouge1_for_task1562_zest_question_rewriting": 55.2578, "eval_rouge1_for_task1586_scifact_title_generation": 43.1572, "eval_rouge1_for_task1598_nyc_data_to_text": 51.9724, "eval_rouge1_for_task1612_sick_textual_entailment": 53.0, "eval_rouge1_for_task1615_sick_textual_entailment": 83.0, "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.0896, "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_rouge1_for_task1631_open_pi_data_to_text": 90.6749, "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 78.0, "eval_rouge1_for_task1659_billsum_title_generation": 37.3231, "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 74.1429, "eval_rouge1_for_task1728_web_nlg_data_to_text": 68.5699, "eval_rouge1_for_task190_snli_textual_entailment": 8.0, "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, "eval_rouge1_for_task200_multinli_textual_entailment": 90.0, "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, "eval_rouge1_for_task202_multinli_textual_entailment": 71.0, "eval_rouge1_for_task219_rocstories_title_generation": 21.2085, "eval_rouge1_for_task220_rocstories_title_generation": 99.0, "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, "eval_rouge1_for_task242_tweetqa_answerability_classification": 96.0, "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 60.0333, "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 16.3998, "eval_rouge1_for_task288_gigaword_title_generation": 34.175, "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_rouge1_for_task329_gap_coreference_resolution": 58.0, "eval_rouge1_for_task330_gap_coreference_resolution": 72.3429, "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.1457, "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.0, "eval_rouge1_for_task402_grailqa_question_rewriting": 74.3518, "eval_rouge1_for_task418_persent_title_generation": 33.623, "eval_rouge1_for_task442_com_qa_question_rewriting": 75.6693, "eval_rouge1_for_task500_scruples_title_generation": 25.3214, "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 41.5112, "eval_rouge1_for_task520_aquamuse_answerability_classification": 89.0, "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.3788, "eval_rouge1_for_task602_wikitext_title_generation": 15.9984, "eval_rouge1_for_task613_liar_keyword_tagging": 34.4667, "eval_rouge1_for_task614_glucose_cause_effect_classification": 50.3633, "eval_rouge1_for_task619_ohsumed_title_generation": 49.2241, "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.9333, "eval_rouge1_for_task623_ohsumed_keyword_tagging": 68.0, "eval_rouge1_for_task640_e_snli_textual_entailment": 40.0, "eval_rouge1_for_task641_e_snli_textual_entailment": 30.0, "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9238, "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.1333, "eval_rouge1_for_task670_ambigqa_question_rewriting": 74.2134, "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.4777, "eval_rouge1_for_task677_ollie_data_to_text": 30.3397, "eval_rouge1_for_task738_perspectrum_textual_entailment": 81.0, "eval_rouge1_for_task743_eurlex_title_generation": 39.2113, "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.9086, "eval_rouge1_for_task769_qed_title_generation": 89.6629, "eval_rouge1_for_task827_copa_cause_effect_classification": 89.0, "eval_rouge1_for_task828_copa_cause_effect_classification": 69.0, "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_rouge1_for_task890_gwsd_textual_entailment": 52.0, "eval_rouge1_for_task891_gap_coreference_resolution": 66.9333, "eval_rouge1_for_task892_gap_coreference_resolution": 43.0, "eval_rouge1_for_task893_gap_coreference_resolution": 64.0, "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rouge1_for_task957_e2e_data_to_text": 57.8698, "eval_rouge1_for_task970_sherliic_textual_entailment": 75.0, "eval_rouge1_for_textual_entailment": 52.625, "eval_rouge1_for_title_generation": 41.5298, "eval_rouge1_for_word_analogy": 47.7083, "eval_rougeL": 53.9418, "eval_rougeL_for_answerability_classification": 65.641, "eval_rougeL_for_cause_effect_classification": 69.6532, "eval_rougeL_for_coreference_resolution": 48.5418, "eval_rougeL_for_data_to_text": 45.6967, "eval_rougeL_for_dialogue_act_recognition": 61.0122, "eval_rougeL_for_grammar_error_correction": 70.3271, "eval_rougeL_for_keyword_tagging": 64.6611, "eval_rougeL_for_overlap_extraction": 34.2539, "eval_rougeL_for_question_rewriting": 67.404, "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, "eval_rougeL_for_task033_winogrande_coreference_resolution": 62.0, "eval_rougeL_for_task034_winogrande_question_rewriting": 91.9949, "eval_rougeL_for_task035_winogrande_question_rewriting": 88.5342, "eval_rougeL_for_task036_qasc_keyword_tagging": 79.1317, "eval_rougeL_for_task039_qasc_overlap_extraction": 52.3333, "eval_rougeL_for_task050_multirc_answerability_classification": 68.0, "eval_rougeL_for_task102_commongen_data_to_text": 58.1508, "eval_rougeL_for_task1152_bard_word_analogy": 27.0, "eval_rougeL_for_task1153_bard_word_analogy": 40.0, "eval_rougeL_for_task1154_bard_word_analogy": 26.0, "eval_rougeL_for_task1155_bard_word_analogy": 77.0, "eval_rougeL_for_task1156_bard_word_analogy": 59.6667, "eval_rougeL_for_task1157_bard_word_analogy": 63.0, "eval_rougeL_for_task1158_bard_word_analogy": 52.0, "eval_rougeL_for_task1159_bard_word_analogy": 37.0, "eval_rougeL_for_task1161_coda_19_title_generation": 35.1473, "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.4169, "eval_rougeL_for_task121_atomic_question_rewriting": 43.9282, "eval_rougeL_for_task133_winowhy_coreference_resolution": 0.0, "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.6105, "eval_rougeL_for_task1344_rte_textual_entailment": 67.0, "eval_rougeL_for_task1345_qqp_question_rewriting": 38.769, "eval_rougeL_for_task1356_xlsum_title_generation": 23.772, "eval_rougeL_for_task1358_xlsum_title_generation": 34.8033, "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, "eval_rougeL_for_task1387_anli_textual_entailment": 41.0, "eval_rougeL_for_task1388_cb_textual_entailment": 56.0, "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, "eval_rougeL_for_task1391_winogrande_coreference_resolution": 64.0, "eval_rougeL_for_task1393_copa_cause_effect_classification": 81.0, "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 64.0857, "eval_rougeL_for_task1407_dart_data_to_text": 19.6133, "eval_rougeL_for_task1409_dart_data_to_text": 42.7796, "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 55.4176, "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, "eval_rougeL_for_task1442_doqa_answerability_classification": 59.0, "eval_rougeL_for_task1516_imppres_textual_entailment": 45.0, "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 58.0, "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, "eval_rougeL_for_task1540_peer_read_title_generation": 39.9862, "eval_rougeL_for_task1554_scitail_textual_entailment": 67.0, "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.2366, "eval_rougeL_for_task1562_zest_question_rewriting": 48.6638, "eval_rougeL_for_task1586_scifact_title_generation": 36.7045, "eval_rougeL_for_task1598_nyc_data_to_text": 40.84, "eval_rougeL_for_task1612_sick_textual_entailment": 53.0, "eval_rougeL_for_task1615_sick_textual_entailment": 83.0, "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.5018, "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 65.0, "eval_rougeL_for_task1631_open_pi_data_to_text": 87.0616, "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 78.0, "eval_rougeL_for_task1659_billsum_title_generation": 31.5371, "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 74.1429, "eval_rougeL_for_task1728_web_nlg_data_to_text": 59.472, "eval_rougeL_for_task190_snli_textual_entailment": 8.0, "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, "eval_rougeL_for_task200_multinli_textual_entailment": 90.0, "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, "eval_rougeL_for_task202_multinli_textual_entailment": 71.0, "eval_rougeL_for_task219_rocstories_title_generation": 21.2085, "eval_rougeL_for_task220_rocstories_title_generation": 99.0, "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 57.0, "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, "eval_rougeL_for_task242_tweetqa_answerability_classification": 96.0, "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 60.0333, "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 16.1745, "eval_rougeL_for_task288_gigaword_title_generation": 29.6762, "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.0, "eval_rougeL_for_task329_gap_coreference_resolution": 58.0, "eval_rougeL_for_task330_gap_coreference_resolution": 72.3429, "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.0, "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.367, "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.0, "eval_rougeL_for_task402_grailqa_question_rewriting": 62.0545, "eval_rougeL_for_task418_persent_title_generation": 30.203, "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2847, "eval_rougeL_for_task500_scruples_title_generation": 24.3047, "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.1692, "eval_rougeL_for_task520_aquamuse_answerability_classification": 89.0, "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.7701, "eval_rougeL_for_task602_wikitext_title_generation": 15.9984, "eval_rougeL_for_task613_liar_keyword_tagging": 34.4667, "eval_rougeL_for_task614_glucose_cause_effect_classification": 48.5391, "eval_rougeL_for_task619_ohsumed_title_generation": 40.7169, "eval_rougeL_for_task620_ohsumed_keyword_tagging": 46.7833, "eval_rougeL_for_task623_ohsumed_keyword_tagging": 68.0, "eval_rougeL_for_task640_e_snli_textual_entailment": 40.0, "eval_rougeL_for_task641_e_snli_textual_entailment": 30.0, "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9238, "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.1333, "eval_rougeL_for_task670_ambigqa_question_rewriting": 72.3223, "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.9739, "eval_rougeL_for_task677_ollie_data_to_text": 24.7007, "eval_rougeL_for_task738_perspectrum_textual_entailment": 81.0, "eval_rougeL_for_task743_eurlex_title_generation": 34.1434, "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.387, "eval_rougeL_for_task769_qed_title_generation": 89.6629, "eval_rougeL_for_task827_copa_cause_effect_classification": 89.0, "eval_rougeL_for_task828_copa_cause_effect_classification": 69.0, "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 79.0, "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 58.0, "eval_rougeL_for_task890_gwsd_textual_entailment": 52.0, "eval_rougeL_for_task891_gap_coreference_resolution": 66.9333, "eval_rougeL_for_task892_gap_coreference_resolution": 43.0, "eval_rougeL_for_task893_gap_coreference_resolution": 64.0, "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 76.0, "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 52.0, "eval_rougeL_for_task957_e2e_data_to_text": 43.4362, "eval_rougeL_for_task970_sherliic_textual_entailment": 75.0, "eval_rougeL_for_textual_entailment": 52.625, "eval_rougeL_for_title_generation": 38.1645, "eval_rougeL_for_word_analogy": 47.7083, "eval_runtime": 788.0222, "eval_samples_per_second": 15.114, "eval_steps_per_second": 0.945, "step": 5000 }, { "epoch": 1.09, "step": 5000, "total_flos": 4.731573510382551e+17, "train_loss": 0.9779035568237304, "train_runtime": 33930.9714, "train_samples_per_second": 2.358, "train_steps_per_second": 0.147 } ], "max_steps": 5000, "num_train_epochs": 2, "total_flos": 4.731573510382551e+17, "trial_name": null, "trial_params": null }