diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7697 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0926472194908774, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 6.5283, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 10.5038, + "eval_exact_match_for_answerability_classification": 14.0769, + "eval_exact_match_for_cause_effect_classification": 4.2857, + "eval_exact_match_for_coreference_resolution": 11.5, + "eval_exact_match_for_data_to_text": 1.6949, + "eval_exact_match_for_dialogue_act_recognition": 10.4286, + "eval_exact_match_for_grammar_error_correction": 5.0, + "eval_exact_match_for_keyword_tagging": 12.6, + "eval_exact_match_for_overlap_extraction": 5.5, + "eval_exact_match_for_question_rewriting": 0.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 47.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 6.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 2.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 11.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 15.0, + "eval_exact_match_for_task102_commongen_data_to_text": 4.0, + "eval_exact_match_for_task1152_bard_word_analogy": 0.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 0.0, + "eval_exact_match_for_task1155_bard_word_analogy": 0.0, + "eval_exact_match_for_task1156_bard_word_analogy": 5.0, + "eval_exact_match_for_task1157_bard_word_analogy": 0.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 5.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 18.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 25.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 24.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 24.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 10.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 6.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 3.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 6.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 2.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 13.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 18.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 22.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 74.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 19.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 1.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 27.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 17.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 11.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 8.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 5.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 8.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 6.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 1.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 46.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 21.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 40.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 30.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 1.0, + "eval_exact_match_for_textual_entailment": 23.5833, + "eval_exact_match_for_title_generation": 6.8386, + "eval_exact_match_for_word_analogy": 1.5, + "eval_f1": 27.989, + "eval_f1_for_answerability_classification": 17.9624, + "eval_f1_for_cause_effect_classification": 28.7059, + "eval_f1_for_coreference_resolution": 19.4848, + "eval_f1_for_data_to_text": 48.9389, + "eval_f1_for_dialogue_act_recognition": 16.1415, + "eval_f1_for_grammar_error_correction": 53.8623, + "eval_f1_for_keyword_tagging": 28.4442, + "eval_f1_for_overlap_extraction": 30.9782, + "eval_f1_for_question_rewriting": 62.5492, + "eval_f1_for_task020_mctaco_answerability_classification": 47.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 6.4, + "eval_f1_for_task034_winogrande_question_rewriting": 72.4667, + "eval_f1_for_task035_winogrande_question_rewriting": 72.2701, + "eval_f1_for_task036_qasc_keyword_tagging": 45.1648, + "eval_f1_for_task039_qasc_overlap_extraction": 19.0277, + "eval_f1_for_task050_multirc_answerability_classification": 15.0, + "eval_f1_for_task102_commongen_data_to_text": 60.7628, + "eval_f1_for_task1152_bard_word_analogy": 0.0, + "eval_f1_for_task1153_bard_word_analogy": 2.0, + "eval_f1_for_task1154_bard_word_analogy": 0.0, + "eval_f1_for_task1155_bard_word_analogy": 0.0, + "eval_f1_for_task1156_bard_word_analogy": 5.0, + "eval_f1_for_task1157_bard_word_analogy": 0.0, + "eval_f1_for_task1158_bard_word_analogy": 0.0, + "eval_f1_for_task1159_bard_word_analogy": 5.0, + "eval_f1_for_task1161_coda_19_title_generation": 16.4142, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 74.8215, + "eval_f1_for_task121_atomic_question_rewriting": 48.4703, + "eval_f1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.7603, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 32.4443, + "eval_f1_for_task1356_xlsum_title_generation": 7.4302, + "eval_f1_for_task1358_xlsum_title_generation": 31.0646, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.3333, + "eval_f1_for_task1388_cb_textual_entailment": 37.4378, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 26.6667, + "eval_f1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 0.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 37.7855, + "eval_f1_for_task1409_dart_data_to_text": 46.0403, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.5918, + "eval_f1_for_task1439_doqa_answerability_classification": 0.4037, + "eval_f1_for_task1442_doqa_answerability_classification": 6.0932, + "eval_f1_for_task1516_imppres_textual_entailment": 24.4576, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 15.0667, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.027, + "eval_f1_for_task1540_peer_read_title_generation": 9.5232, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 77.1327, + "eval_f1_for_task1562_zest_question_rewriting": 57.0052, + "eval_f1_for_task1586_scifact_title_generation": 19.2318, + "eval_f1_for_task1598_nyc_data_to_text": 52.1068, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 73.622, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 6.7801, + "eval_f1_for_task1631_open_pi_data_to_text": 84.5931, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 6.0, + "eval_f1_for_task1659_billsum_title_generation": 17.3742, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 37.0139, + "eval_f1_for_task1728_web_nlg_data_to_text": 52.7169, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 14.3333, + "eval_f1_for_task200_multinli_textual_entailment": 18.0, + "eval_f1_for_task201_multinli_textual_entailment": 22.113, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 16.7894, + "eval_f1_for_task220_rocstories_title_generation": 74.0286, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 19.5369, + "eval_f1_for_task232_iirc_answerability_classification": 3.015, + "eval_f1_for_task233_iirc_answerability_classification": 1.2118, + "eval_f1_for_task242_tweetqa_answerability_classification": 33.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 24.0595, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 42.9288, + "eval_f1_for_task288_gigaword_title_generation": 30.4103, + "eval_f1_for_task290_tellmewhy_answerability_classification": 82.3553, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 3.2217, + "eval_f1_for_task329_gap_coreference_resolution": 34.2857, + "eval_f1_for_task330_gap_coreference_resolution": 13.2, + "eval_f1_for_task349_squad2.0_answerability_classification": 8.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 24.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 40.7, + "eval_f1_for_task392_cod3s_cause_effect_classification": 42.7, + "eval_f1_for_task393_cod3s_cause_effect_classification": 23.5578, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 8.4206, + "eval_f1_for_task402_grailqa_question_rewriting": 61.6897, + "eval_f1_for_task418_persent_title_generation": 8.4219, + "eval_f1_for_task442_com_qa_question_rewriting": 57.8311, + "eval_f1_for_task500_scruples_title_generation": 7.1147, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 19.1609, + "eval_f1_for_task520_aquamuse_answerability_classification": 5.1149, + "eval_f1_for_task569_recipe_nlg_title_generation": 21.517, + "eval_f1_for_task602_wikitext_title_generation": 3.9581, + "eval_f1_for_task613_liar_keyword_tagging": 9.8524, + "eval_f1_for_task614_glucose_cause_effect_classification": 35.4838, + "eval_f1_for_task619_ohsumed_title_generation": 21.1131, + "eval_f1_for_task620_ohsumed_keyword_tagging": 14.7359, + "eval_f1_for_task623_ohsumed_keyword_tagging": 2.4392, + "eval_f1_for_task640_e_snli_textual_entailment": 37.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 14.6667, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 70.0289, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 23.9524, + "eval_f1_for_task670_ambigqa_question_rewriting": 72.0359, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.3845, + "eval_f1_for_task677_ollie_data_to_text": 25.1239, + "eval_f1_for_task738_perspectrum_textual_entailment": 21.0, + "eval_f1_for_task743_eurlex_title_generation": 18.8349, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.1048, + "eval_f1_for_task769_qed_title_generation": 56.2227, + "eval_f1_for_task827_copa_cause_effect_classification": 30.0, + "eval_f1_for_task828_copa_cause_effect_classification": 28.5, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 21.397, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_f1_for_task890_gwsd_textual_entailment": 33.0, + "eval_f1_for_task891_gap_coreference_resolution": 37.567, + "eval_f1_for_task892_gap_coreference_resolution": 27.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 27.3333, + "eval_f1_for_task957_e2e_data_to_text": 43.779, + "eval_f1_for_task970_sherliic_textual_entailment": 1.0, + "eval_f1_for_textual_entailment": 29.6392, + "eval_f1_for_title_generation": 21.622, + "eval_f1_for_word_analogy": 1.5, + "eval_gen_len": 30.1854, + "eval_global_step": 1, + "eval_loss": 5.5749592781066895, + "eval_rouge1": 29.9558, + "eval_rouge1_for_answerability_classification": 17.9497, + "eval_rouge1_for_cause_effect_classification": 33.3612, + "eval_rouge1_for_coreference_resolution": 19.8449, + "eval_rouge1_for_data_to_text": 52.0004, + "eval_rouge1_for_dialogue_act_recognition": 19.0611, + "eval_rouge1_for_grammar_error_correction": 58.94, + "eval_rouge1_for_keyword_tagging": 32.4846, + "eval_rouge1_for_overlap_extraction": 33.1494, + "eval_rouge1_for_question_rewriting": 64.1514, + "eval_rouge1_for_task020_mctaco_answerability_classification": 47.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 6.4, + "eval_rouge1_for_task034_winogrande_question_rewriting": 72.6289, + "eval_rouge1_for_task035_winogrande_question_rewriting": 72.9824, + "eval_rouge1_for_task036_qasc_keyword_tagging": 51.4348, + "eval_rouge1_for_task039_qasc_overlap_extraction": 22.8221, + "eval_rouge1_for_task050_multirc_answerability_classification": 15.0, + "eval_rouge1_for_task102_commongen_data_to_text": 72.3889, + "eval_rouge1_for_task1152_bard_word_analogy": 0.0, + "eval_rouge1_for_task1153_bard_word_analogy": 2.0, + "eval_rouge1_for_task1154_bard_word_analogy": 0.0, + "eval_rouge1_for_task1155_bard_word_analogy": 0.0, + "eval_rouge1_for_task1156_bard_word_analogy": 5.0, + "eval_rouge1_for_task1157_bard_word_analogy": 0.0, + "eval_rouge1_for_task1158_bard_word_analogy": 0.0, + "eval_rouge1_for_task1159_bard_word_analogy": 5.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 18.4126, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 75.689, + "eval_rouge1_for_task121_atomic_question_rewriting": 50.465, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.1078, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 35.0816, + "eval_rouge1_for_task1356_xlsum_title_generation": 8.8327, + "eval_rouge1_for_task1358_xlsum_title_generation": 35.1252, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.3333, + "eval_rouge1_for_task1388_cb_textual_entailment": 37.4278, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 26.6667, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 0.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 22.1381, + "eval_rouge1_for_task1407_dart_data_to_text": 38.769, + "eval_rouge1_for_task1409_dart_data_to_text": 48.6748, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.1498, + "eval_rouge1_for_task1439_doqa_answerability_classification": 0.3913, + "eval_rouge1_for_task1442_doqa_answerability_classification": 6.09, + "eval_rouge1_for_task1516_imppres_textual_entailment": 24.4289, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 15.0667, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0263, + "eval_rouge1_for_task1540_peer_read_title_generation": 10.6897, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 80.7302, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.8508, + "eval_rouge1_for_task1586_scifact_title_generation": 21.4429, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.4922, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 75.1253, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 6.7708, + "eval_rouge1_for_task1631_open_pi_data_to_text": 85.3783, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 6.0, + "eval_rouge1_for_task1659_billsum_title_generation": 18.5233, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 37.0139, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.4578, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 14.3333, + "eval_rouge1_for_task200_multinli_textual_entailment": 28.6667, + "eval_rouge1_for_task201_multinli_textual_entailment": 30.7728, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.6667, + "eval_rouge1_for_task219_rocstories_title_generation": 20.3045, + "eval_rouge1_for_task220_rocstories_title_generation": 74.0286, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 19.5253, + "eval_rouge1_for_task232_iirc_answerability_classification": 2.9558, + "eval_rouge1_for_task233_iirc_answerability_classification": 1.1464, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 33.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 24.1548, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 43.4767, + "eval_rouge1_for_task288_gigaword_title_generation": 32.6496, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 82.3553, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 5.2201, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.2857, + "eval_rouge1_for_task330_gap_coreference_resolution": 13.2, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 8.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 24.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 40.7, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 42.7, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 24.09, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 8.4738, + "eval_rouge1_for_task402_grailqa_question_rewriting": 63.1777, + "eval_rouge1_for_task418_persent_title_generation": 10.1026, + "eval_rouge1_for_task442_com_qa_question_rewriting": 61.7275, + "eval_rouge1_for_task500_scruples_title_generation": 7.7155, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 19.2642, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 5.1108, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 21.7381, + "eval_rouge1_for_task602_wikitext_title_generation": 4.0531, + "eval_rouge1_for_task613_liar_keyword_tagging": 19.4833, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 42.2048, + "eval_rouge1_for_task619_ohsumed_title_generation": 22.7931, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 18.2085, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 2.4104, + "eval_rouge1_for_task640_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 14.6667, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 70.8861, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 25.8527, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.9144, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.0224, + "eval_rouge1_for_task677_ollie_data_to_text": 26.8592, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 21.0, + "eval_rouge1_for_task743_eurlex_title_generation": 19.8758, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.6319, + "eval_rouge1_for_task769_qed_title_generation": 56.8403, + "eval_rouge1_for_task827_copa_cause_effect_classification": 55.3333, + "eval_rouge1_for_task828_copa_cause_effect_classification": 28.5, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 21.397, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.3, + "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 38.5609, + "eval_rouge1_for_task892_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 37.3333, + "eval_rouge1_for_task957_e2e_data_to_text": 45.0388, + "eval_rouge1_for_task970_sherliic_textual_entailment": 1.0, + "eval_rouge1_for_textual_entailment": 32.7485, + "eval_rouge1_for_title_generation": 22.9737, + "eval_rouge1_for_word_analogy": 1.5, + "eval_rougeL": 28.621, + "eval_rougeL_for_answerability_classification": 17.9497, + "eval_rougeL_for_cause_effect_classification": 32.3876, + "eval_rougeL_for_coreference_resolution": 19.6287, + "eval_rougeL_for_data_to_text": 45.0528, + "eval_rougeL_for_dialogue_act_recognition": 18.9326, + "eval_rougeL_for_grammar_error_correction": 58.3655, + "eval_rougeL_for_keyword_tagging": 31.2702, + "eval_rougeL_for_overlap_extraction": 32.446, + "eval_rougeL_for_question_rewriting": 60.1842, + "eval_rougeL_for_task020_mctaco_answerability_classification": 47.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 6.4, + "eval_rougeL_for_task034_winogrande_question_rewriting": 71.7346, + "eval_rougeL_for_task035_winogrande_question_rewriting": 71.1545, + "eval_rougeL_for_task036_qasc_keyword_tagging": 48.2078, + "eval_rougeL_for_task039_qasc_overlap_extraction": 22.8221, + "eval_rougeL_for_task050_multirc_answerability_classification": 15.0, + "eval_rougeL_for_task102_commongen_data_to_text": 63.8732, + "eval_rougeL_for_task1152_bard_word_analogy": 0.0, + "eval_rougeL_for_task1153_bard_word_analogy": 2.0, + "eval_rougeL_for_task1154_bard_word_analogy": 0.0, + "eval_rougeL_for_task1155_bard_word_analogy": 0.0, + "eval_rougeL_for_task1156_bard_word_analogy": 5.0, + "eval_rougeL_for_task1157_bard_word_analogy": 0.0, + "eval_rougeL_for_task1158_bard_word_analogy": 0.0, + "eval_rougeL_for_task1159_bard_word_analogy": 5.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 14.7541, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 73.8652, + "eval_rougeL_for_task121_atomic_question_rewriting": 45.9438, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.7735, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 31.2014, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.0766, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.1131, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.3333, + "eval_rougeL_for_task1388_cb_textual_entailment": 37.4278, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 26.6667, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 0.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 0.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 21.2381, + "eval_rougeL_for_task1407_dart_data_to_text": 35.2918, + "eval_rougeL_for_task1409_dart_data_to_text": 39.3799, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.836, + "eval_rougeL_for_task1439_doqa_answerability_classification": 0.3913, + "eval_rougeL_for_task1442_doqa_answerability_classification": 6.09, + "eval_rougeL_for_task1516_imppres_textual_entailment": 24.4289, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 15.0667, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0263, + "eval_rougeL_for_task1540_peer_read_title_generation": 9.0134, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 79.8951, + "eval_rougeL_for_task1562_zest_question_rewriting": 52.6904, + "eval_rougeL_for_task1586_scifact_title_generation": 17.0878, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.1665, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 76.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 73.2583, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 6.7708, + "eval_rougeL_for_task1631_open_pi_data_to_text": 82.9109, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 6.0, + "eval_rougeL_for_task1659_billsum_title_generation": 14.579, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 34.4193, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.6968, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 14.3333, + "eval_rougeL_for_task200_multinli_textual_entailment": 28.6667, + "eval_rougeL_for_task201_multinli_textual_entailment": 30.7728, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.6667, + "eval_rougeL_for_task219_rocstories_title_generation": 20.3045, + "eval_rougeL_for_task220_rocstories_title_generation": 74.0286, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 19.5253, + "eval_rougeL_for_task232_iirc_answerability_classification": 2.9558, + "eval_rougeL_for_task233_iirc_answerability_classification": 1.1464, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 33.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 23.869, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 42.0699, + "eval_rougeL_for_task288_gigaword_title_generation": 29.0656, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 82.3553, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 5.2201, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.2857, + "eval_rougeL_for_task330_gap_coreference_resolution": 13.2, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 8.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 24.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 40.7, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 42.7, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 22.3159, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 8.4738, + "eval_rougeL_for_task402_grailqa_question_rewriting": 54.6266, + "eval_rougeL_for_task418_persent_title_generation": 8.1648, + "eval_rougeL_for_task442_com_qa_question_rewriting": 54.2312, + "eval_rougeL_for_task500_scruples_title_generation": 6.7747, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 18.7117, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 5.1108, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 20.356, + "eval_rougeL_for_task602_wikitext_title_generation": 3.994, + "eval_rougeL_for_task613_liar_keyword_tagging": 19.4833, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.1639, + "eval_rougeL_for_task619_ohsumed_title_generation": 19.1208, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 17.5635, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 2.4104, + "eval_rougeL_for_task640_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 14.6667, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 68.6861, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 25.7066, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 69.4679, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.8523, + "eval_rougeL_for_task677_ollie_data_to_text": 22.7132, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 21.0, + "eval_rougeL_for_task743_eurlex_title_generation": 16.5067, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.1553, + "eval_rougeL_for_task769_qed_title_generation": 56.8173, + "eval_rougeL_for_task827_copa_cause_effect_classification": 55.3333, + "eval_rougeL_for_task828_copa_cause_effect_classification": 28.5, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 21.397, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.3, + "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 38.5609, + "eval_rougeL_for_task892_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 37.3333, + "eval_rougeL_for_task957_e2e_data_to_text": 34.7638, + "eval_rougeL_for_task970_sherliic_textual_entailment": 1.0, + "eval_rougeL_for_textual_entailment": 32.6929, + "eval_rougeL_for_title_generation": 20.8858, + "eval_rougeL_for_word_analogy": 1.5, + "eval_runtime": 2379.1305, + "eval_samples_per_second": 5.006, + "eval_steps_per_second": 0.313, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.8785, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 29.0848, + "eval_exact_match_for_answerability_classification": 50.4615, + "eval_exact_match_for_cause_effect_classification": 36.2857, + "eval_exact_match_for_coreference_resolution": 39.4286, + "eval_exact_match_for_data_to_text": 8.1114, + "eval_exact_match_for_dialogue_act_recognition": 35.1429, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 38.6, + "eval_exact_match_for_overlap_extraction": 7.0, + "eval_exact_match_for_question_rewriting": 2.9091, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 44.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 34.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 14.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 49.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 26.0, + "eval_exact_match_for_task1154_bard_word_analogy": 29.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 38.0, + "eval_exact_match_for_task1157_bard_word_analogy": 53.0, + "eval_exact_match_for_task1158_bard_word_analogy": 20.0, + "eval_exact_match_for_task1159_bard_word_analogy": 27.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 54.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 61.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 32.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 7.0, + "eval_exact_match_for_task220_rocstories_title_generation": 83.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 63.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 20.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 6.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 68.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 60.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 17.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 41.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 41.125, + "eval_exact_match_for_title_generation": 9.9776, + "eval_exact_match_for_word_analogy": 33.625, + "eval_f1": 45.9144, + "eval_f1_for_answerability_classification": 53.0256, + "eval_f1_for_cause_effect_classification": 54.5318, + "eval_f1_for_coreference_resolution": 47.5571, + "eval_f1_for_data_to_text": 52.6318, + "eval_f1_for_dialogue_act_recognition": 38.6429, + "eval_f1_for_grammar_error_correction": 56.4122, + "eval_f1_for_keyword_tagging": 51.141, + "eval_f1_for_overlap_extraction": 33.1044, + "eval_f1_for_question_rewriting": 67.5912, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 45.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 76.783, + "eval_f1_for_task035_winogrande_question_rewriting": 86.4344, + "eval_f1_for_task036_qasc_keyword_tagging": 66.2716, + "eval_f1_for_task039_qasc_overlap_extraction": 16.6667, + "eval_f1_for_task050_multirc_answerability_classification": 49.0, + "eval_f1_for_task102_commongen_data_to_text": 57.6619, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 26.0, + "eval_f1_for_task1154_bard_word_analogy": 29.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 38.0, + "eval_f1_for_task1157_bard_word_analogy": 53.0, + "eval_f1_for_task1158_bard_word_analogy": 20.0, + "eval_f1_for_task1159_bard_word_analogy": 27.0, + "eval_f1_for_task1161_coda_19_title_generation": 30.2132, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.0007, + "eval_f1_for_task121_atomic_question_rewriting": 44.4196, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.2729, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.7789, + "eval_f1_for_task1356_xlsum_title_generation": 12.8732, + "eval_f1_for_task1358_xlsum_title_generation": 36.4834, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 54.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 39.2178, + "eval_f1_for_task1409_dart_data_to_text": 48.4169, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.4438, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 39.1344, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3806, + "eval_f1_for_task1562_zest_question_rewriting": 47.9247, + "eval_f1_for_task1586_scifact_title_generation": 29.9875, + "eval_f1_for_task1598_nyc_data_to_text": 44.4989, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4762, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.8613, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 38.4354, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.2381, + "eval_f1_for_task1728_web_nlg_data_to_text": 56.7558, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 18.4224, + "eval_f1_for_task220_rocstories_title_generation": 83.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 51.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.8048, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 49.542, + "eval_f1_for_task288_gigaword_title_generation": 31.0145, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 17.3704, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 70.6968, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 27.3656, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 81.1728, + "eval_f1_for_task418_persent_title_generation": 23.6287, + "eval_f1_for_task442_com_qa_question_rewriting": 70.1711, + "eval_f1_for_task500_scruples_title_generation": 20.4678, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 41.6619, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 43.3049, + "eval_f1_for_task602_wikitext_title_generation": 8.2486, + "eval_f1_for_task613_liar_keyword_tagging": 21.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 33.6903, + "eval_f1_for_task619_ohsumed_title_generation": 30.2367, + "eval_f1_for_task620_ohsumed_keyword_tagging": 43.2524, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 6.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 74.8476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 36.5222, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.15, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.1915, + "eval_f1_for_task677_ollie_data_to_text": 34.5943, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 30.1507, + "eval_f1_for_task760_msr_sqa_data_to_text": 8.4277, + "eval_f1_for_task769_qed_title_generation": 67.8571, + "eval_f1_for_task827_copa_cause_effect_classification": 52.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 17.0, + "eval_f1_for_task890_gwsd_textual_entailment": 41.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.1667, + "eval_f1_for_task892_gap_coreference_resolution": 47.0, + "eval_f1_for_task893_gap_coreference_resolution": 50.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task957_e2e_data_to_text": 55.541, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 41.125, + "eval_f1_for_title_generation": 33.188, + "eval_f1_for_word_analogy": 33.625, + "eval_gen_len": 9.9882, + "eval_global_step": 50, + "eval_loss": 1.2373502254486084, + "eval_rouge1": 47.8866, + "eval_rouge1_for_answerability_classification": 53.0256, + "eval_rouge1_for_cause_effect_classification": 55.7043, + "eval_rouge1_for_coreference_resolution": 49.0409, + "eval_rouge1_for_data_to_text": 55.3995, + "eval_rouge1_for_dialogue_act_recognition": 44.5508, + "eval_rouge1_for_grammar_error_correction": 61.4345, + "eval_rouge1_for_keyword_tagging": 56.4676, + "eval_rouge1_for_overlap_extraction": 35.9811, + "eval_rouge1_for_question_rewriting": 69.2332, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 50.1667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 76.8185, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.1705, + "eval_rouge1_for_task036_qasc_keyword_tagging": 70.5905, + "eval_rouge1_for_task039_qasc_overlap_extraction": 21.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 49.0, + "eval_rouge1_for_task102_commongen_data_to_text": 71.8523, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 26.0, + "eval_rouge1_for_task1154_bard_word_analogy": 29.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 38.0, + "eval_rouge1_for_task1157_bard_word_analogy": 53.0, + "eval_rouge1_for_task1158_bard_word_analogy": 20.0, + "eval_rouge1_for_task1159_bard_word_analogy": 27.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 34.0932, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.2269, + "eval_rouge1_for_task121_atomic_question_rewriting": 46.5821, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.6748, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.265, + "eval_rouge1_for_task1356_xlsum_title_generation": 14.9445, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.9191, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 54.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 38.0222, + "eval_rouge1_for_task1407_dart_data_to_text": 40.1839, + "eval_rouge1_for_task1409_dart_data_to_text": 49.3084, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.2463, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.9431, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6227, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.1728, + "eval_rouge1_for_task1586_scifact_title_generation": 33.7192, + "eval_rouge1_for_task1598_nyc_data_to_text": 47.2004, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.898, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.9442, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 39.8225, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.2381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.8106, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.9605, + "eval_rouge1_for_task220_rocstories_title_generation": 83.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.65, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 50.2956, + "eval_rouge1_for_task288_gigaword_title_generation": 33.8599, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 19.4235, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 70.6524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 27.6175, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 83.7924, + "eval_rouge1_for_task418_persent_title_generation": 26.4186, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.4793, + "eval_rouge1_for_task500_scruples_title_generation": 22.053, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 42.4702, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.1798, + "eval_rouge1_for_task602_wikitext_title_generation": 8.9236, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 41.6456, + "eval_rouge1_for_task619_ohsumed_title_generation": 32.376, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 50.0667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 6.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 74.8476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 35.9889, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.8185, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.3409, + "eval_rouge1_for_task677_ollie_data_to_text": 37.2347, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.9817, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 9.1818, + "eval_rouge1_for_task769_qed_title_generation": 68.0286, + "eval_rouge1_for_task827_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 41.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.119, + "eval_rouge1_for_task892_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.6777, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.9306, + "eval_rouge1_for_title_generation": 35.3106, + "eval_rouge1_for_word_analogy": 33.625, + "eval_rougeL": 46.5556, + "eval_rougeL_for_answerability_classification": 53.0256, + "eval_rougeL_for_cause_effect_classification": 55.0257, + "eval_rougeL_for_coreference_resolution": 49.0409, + "eval_rougeL_for_data_to_text": 47.7147, + "eval_rougeL_for_dialogue_act_recognition": 44.1937, + "eval_rougeL_for_grammar_error_correction": 60.8147, + "eval_rougeL_for_keyword_tagging": 55.9487, + "eval_rougeL_for_overlap_extraction": 34.9081, + "eval_rougeL_for_question_rewriting": 65.7065, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 50.1667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 76.6852, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.9364, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.3959, + "eval_rougeL_for_task039_qasc_overlap_extraction": 21.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 49.0, + "eval_rougeL_for_task102_commongen_data_to_text": 62.2135, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 26.0, + "eval_rougeL_for_task1154_bard_word_analogy": 29.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 38.0, + "eval_rougeL_for_task1157_bard_word_analogy": 53.0, + "eval_rougeL_for_task1158_bard_word_analogy": 20.0, + "eval_rougeL_for_task1159_bard_word_analogy": 27.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.8321, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.4553, + "eval_rougeL_for_task121_atomic_question_rewriting": 40.3518, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.3263, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.1294, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.0485, + "eval_rougeL_for_task1358_xlsum_title_generation": 35.0523, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 54.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 35.5222, + "eval_rougeL_for_task1407_dart_data_to_text": 32.8614, + "eval_rougeL_for_task1409_dart_data_to_text": 40.6716, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.8716, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 39.1555, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7578, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.0873, + "eval_rougeL_for_task1586_scifact_title_generation": 27.3758, + "eval_rougeL_for_task1598_nyc_data_to_text": 34.1274, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.779, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.5198, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 35.2299, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.2381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.0496, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.9605, + "eval_rougeL_for_task220_rocstories_title_generation": 83.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.65, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 48.1496, + "eval_rougeL_for_task288_gigaword_title_generation": 29.8464, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 19.4235, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 70.6524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.2222, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 71.0587, + "eval_rougeL_for_task418_persent_title_generation": 23.118, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.5433, + "eval_rougeL_for_task500_scruples_title_generation": 20.8349, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 42.1097, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 43.2204, + "eval_rougeL_for_task602_wikitext_title_generation": 8.9236, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.2911, + "eval_rougeL_for_task619_ohsumed_title_generation": 30.6954, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 48.6667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 6.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 74.8476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 35.9889, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.7097, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.0352, + "eval_rougeL_for_task677_ollie_data_to_text": 30.1042, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.5829, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 6.8942, + "eval_rougeL_for_task769_qed_title_generation": 68.0286, + "eval_rougeL_for_task827_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 41.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.119, + "eval_rougeL_for_task892_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.7835, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.9306, + "eval_rougeL_for_title_generation": 32.8987, + "eval_rougeL_for_word_analogy": 33.625, + "eval_runtime": 1009.5361, + "eval_samples_per_second": 11.797, + "eval_steps_per_second": 0.738, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 1.4629, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 27.7834, + "eval_exact_match_for_answerability_classification": 49.6154, + "eval_exact_match_for_cause_effect_classification": 40.4286, + "eval_exact_match_for_coreference_resolution": 36.6429, + "eval_exact_match_for_data_to_text": 7.385, + "eval_exact_match_for_dialogue_act_recognition": 35.2857, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 36.8, + "eval_exact_match_for_overlap_extraction": 7.5, + "eval_exact_match_for_question_rewriting": 2.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 28.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 15.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 21.0, + "eval_exact_match_for_task1153_bard_word_analogy": 21.0, + "eval_exact_match_for_task1154_bard_word_analogy": 20.0, + "eval_exact_match_for_task1155_bard_word_analogy": 66.0, + "eval_exact_match_for_task1156_bard_word_analogy": 32.0, + "eval_exact_match_for_task1157_bard_word_analogy": 48.0, + "eval_exact_match_for_task1158_bard_word_analogy": 11.0, + "eval_exact_match_for_task1159_bard_word_analogy": 30.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 38.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 49.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 38.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 57.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 27.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task219_rocstories_title_generation": 8.0, + "eval_exact_match_for_task220_rocstories_title_generation": 54.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 50.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 3.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 12.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 11.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 23.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 53.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 26.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 46.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 69.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 52.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 67.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 11.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 52.0, + "eval_exact_match_for_textual_entailment": 38.0417, + "eval_exact_match_for_title_generation": 8.7444, + "eval_exact_match_for_word_analogy": 31.125, + "eval_f1": 44.5687, + "eval_f1_for_answerability_classification": 52.1795, + "eval_f1_for_cause_effect_classification": 58.234, + "eval_f1_for_coreference_resolution": 44.8103, + "eval_f1_for_data_to_text": 52.7752, + "eval_f1_for_dialogue_act_recognition": 38.7857, + "eval_f1_for_grammar_error_correction": 57.5892, + "eval_f1_for_keyword_tagging": 48.962, + "eval_f1_for_overlap_extraction": 31.7432, + "eval_f1_for_question_rewriting": 67.8773, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 71.0607, + "eval_f1_for_task035_winogrande_question_rewriting": 88.1562, + "eval_f1_for_task036_qasc_keyword_tagging": 62.7434, + "eval_f1_for_task039_qasc_overlap_extraction": 22.1667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 55.2, + "eval_f1_for_task1152_bard_word_analogy": 21.0, + "eval_f1_for_task1153_bard_word_analogy": 21.0, + "eval_f1_for_task1154_bard_word_analogy": 20.0, + "eval_f1_for_task1155_bard_word_analogy": 66.0, + "eval_f1_for_task1156_bard_word_analogy": 32.0, + "eval_f1_for_task1157_bard_word_analogy": 48.0, + "eval_f1_for_task1158_bard_word_analogy": 11.0, + "eval_f1_for_task1159_bard_word_analogy": 30.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.5045, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.557, + "eval_f1_for_task121_atomic_question_rewriting": 46.2165, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.6395, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.9609, + "eval_f1_for_task1356_xlsum_title_generation": 11.1124, + "eval_f1_for_task1358_xlsum_title_generation": 33.54, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 28.0, + "eval_f1_for_task1388_cb_textual_entailment": 38.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_f1_for_task1407_dart_data_to_text": 38.7718, + "eval_f1_for_task1409_dart_data_to_text": 50.7991, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.4148, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 49.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 34.5069, + "eval_f1_for_task1554_scitail_textual_entailment": 49.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.7635, + "eval_f1_for_task1562_zest_question_rewriting": 48.6821, + "eval_f1_for_task1586_scifact_title_generation": 24.5186, + "eval_f1_for_task1598_nyc_data_to_text": 48.8128, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 38.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.9337, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.1178, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 35.6982, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 64.4261, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.0139, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 36.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 30.0, + "eval_f1_for_task219_rocstories_title_generation": 22.1394, + "eval_f1_for_task220_rocstories_title_generation": 54.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 63.3714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 41.3197, + "eval_f1_for_task288_gigaword_title_generation": 28.0963, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 12.3667, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 55.3635, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 32.6136, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 80.9831, + "eval_f1_for_task418_persent_title_generation": 21.1366, + "eval_f1_for_task442_com_qa_question_rewriting": 70.7563, + "eval_f1_for_task500_scruples_title_generation": 17.5933, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 39.0911, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 45.2293, + "eval_f1_for_task602_wikitext_title_generation": 8.8433, + "eval_f1_for_task613_liar_keyword_tagging": 11.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 34.691, + "eval_f1_for_task619_ohsumed_title_generation": 34.0425, + "eval_f1_for_task620_ohsumed_keyword_tagging": 44.4333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 53.0, + "eval_f1_for_task640_e_snli_textual_entailment": 26.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 46.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 72.9667, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 33.9827, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.9409, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.4028, + "eval_f1_for_task677_ollie_data_to_text": 34.9613, + "eval_f1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_f1_for_task743_eurlex_title_generation": 28.0611, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.4327, + "eval_f1_for_task769_qed_title_generation": 79.9351, + "eval_f1_for_task827_copa_cause_effect_classification": 52.0, + "eval_f1_for_task828_copa_cause_effect_classification": 67.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 11.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 41.1667, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 55.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 56.1344, + "eval_f1_for_task970_sherliic_textual_entailment": 52.0, + "eval_f1_for_textual_entailment": 38.0417, + "eval_f1_for_title_generation": 31.1252, + "eval_f1_for_word_analogy": 31.125, + "eval_gen_len": 10.5372, + "eval_global_step": 100, + "eval_loss": 1.2449158430099487, + "eval_rouge1": 46.4084, + "eval_rouge1_for_answerability_classification": 52.1795, + "eval_rouge1_for_cause_effect_classification": 59.3357, + "eval_rouge1_for_coreference_resolution": 45.8288, + "eval_rouge1_for_data_to_text": 55.8299, + "eval_rouge1_for_dialogue_act_recognition": 43.6683, + "eval_rouge1_for_grammar_error_correction": 62.3693, + "eval_rouge1_for_keyword_tagging": 54.1176, + "eval_rouge1_for_overlap_extraction": 35.0578, + "eval_rouge1_for_question_rewriting": 69.65, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.8333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 71.0826, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.8237, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.1545, + "eval_rouge1_for_task039_qasc_overlap_extraction": 28.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 70.7678, + "eval_rouge1_for_task1152_bard_word_analogy": 21.0, + "eval_rouge1_for_task1153_bard_word_analogy": 21.0, + "eval_rouge1_for_task1154_bard_word_analogy": 20.0, + "eval_rouge1_for_task1155_bard_word_analogy": 66.0, + "eval_rouge1_for_task1156_bard_word_analogy": 32.0, + "eval_rouge1_for_task1157_bard_word_analogy": 48.0, + "eval_rouge1_for_task1158_bard_word_analogy": 11.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.6936, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8147, + "eval_rouge1_for_task121_atomic_question_rewriting": 48.5299, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.0793, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.7025, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.204, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.3129, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 38.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 34.5111, + "eval_rouge1_for_task1407_dart_data_to_text": 39.5124, + "eval_rouge1_for_task1409_dart_data_to_text": 51.7231, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9794, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 37.1558, + "eval_rouge1_for_task1554_scitail_textual_entailment": 49.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.7593, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.1992, + "eval_rouge1_for_task1586_scifact_title_generation": 27.7434, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.7998, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.2098, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.2258, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 37.7507, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 64.4261, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 58.2853, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.8508, + "eval_rouge1_for_task220_rocstories_title_generation": 54.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 64.7167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 42.1156, + "eval_rouge1_for_task288_gigaword_title_generation": 30.3318, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 13.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 55.319, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.9676, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 43.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.4897, + "eval_rouge1_for_task418_persent_title_generation": 23.3979, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.1781, + "eval_rouge1_for_task500_scruples_title_generation": 18.7202, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.6135, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 46.674, + "eval_rouge1_for_task602_wikitext_title_generation": 9.818, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.8333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 42.0493, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.9548, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 48.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 53.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 26.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 46.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 73.4667, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 33.4481, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.7424, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.3769, + "eval_rouge1_for_task677_ollie_data_to_text": 37.3264, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rouge1_for_task743_eurlex_title_generation": 29.4864, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.4546, + "eval_rouge1_for_task769_qed_title_generation": 79.98, + "eval_rouge1_for_task827_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 67.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 15.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 40.9267, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.3961, + "eval_rouge1_for_task970_sherliic_textual_entailment": 52.0, + "eval_rouge1_for_textual_entailment": 39.7639, + "eval_rouge1_for_title_generation": 33.0828, + "eval_rouge1_for_word_analogy": 31.125, + "eval_rougeL": 45.0145, + "eval_rougeL_for_answerability_classification": 52.1795, + "eval_rougeL_for_cause_effect_classification": 58.6625, + "eval_rougeL_for_coreference_resolution": 45.8288, + "eval_rougeL_for_data_to_text": 47.5196, + "eval_rougeL_for_dialogue_act_recognition": 43.2397, + "eval_rougeL_for_grammar_error_correction": 61.7036, + "eval_rougeL_for_keyword_tagging": 53.7484, + "eval_rougeL_for_overlap_extraction": 34.7816, + "eval_rougeL_for_question_rewriting": 65.8812, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.8333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 70.9649, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.9257, + "eval_rougeL_for_task036_qasc_keyword_tagging": 66.1418, + "eval_rougeL_for_task039_qasc_overlap_extraction": 28.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.4342, + "eval_rougeL_for_task1152_bard_word_analogy": 21.0, + "eval_rougeL_for_task1153_bard_word_analogy": 21.0, + "eval_rougeL_for_task1154_bard_word_analogy": 20.0, + "eval_rougeL_for_task1155_bard_word_analogy": 66.0, + "eval_rougeL_for_task1156_bard_word_analogy": 32.0, + "eval_rougeL_for_task1157_bard_word_analogy": 48.0, + "eval_rougeL_for_task1158_bard_word_analogy": 11.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.6015, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6991, + "eval_rougeL_for_task121_atomic_question_rewriting": 41.7555, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.611, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.3408, + "eval_rougeL_for_task1356_xlsum_title_generation": 11.2885, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.5588, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 38.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 31.5111, + "eval_rougeL_for_task1407_dart_data_to_text": 33.3075, + "eval_rougeL_for_task1409_dart_data_to_text": 41.7012, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5717, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 33.7554, + "eval_rougeL_for_task1554_scitail_textual_entailment": 49.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.8355, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.2204, + "eval_rougeL_for_task1586_scifact_title_generation": 22.2078, + "eval_rougeL_for_task1598_nyc_data_to_text": 37.9493, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.7591, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 93.9444, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 32.0369, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 64.4261, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.471, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task219_rocstories_title_generation": 26.8508, + "eval_rougeL_for_task220_rocstories_title_generation": 54.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 64.7167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 41.5633, + "eval_rougeL_for_task288_gigaword_title_generation": 26.531, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 13.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 55.319, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.9072, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 43.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 70.2102, + "eval_rougeL_for_task418_persent_title_generation": 20.2076, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.4642, + "eval_rougeL_for_task500_scruples_title_generation": 18.0045, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 39.0376, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 45.0956, + "eval_rougeL_for_task602_wikitext_title_generation": 9.818, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.8333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 38.3969, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.924, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.3, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 53.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 26.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 46.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 73.4667, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 33.4481, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.7427, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.6107, + "eval_rougeL_for_task677_ollie_data_to_text": 30.8224, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.7851, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.4265, + "eval_rougeL_for_task769_qed_title_generation": 79.98, + "eval_rougeL_for_task827_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 67.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 15.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 40.9267, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.7708, + "eval_rougeL_for_task970_sherliic_textual_entailment": 52.0, + "eval_rougeL_for_textual_entailment": 39.7639, + "eval_rougeL_for_title_generation": 30.5899, + "eval_rougeL_for_word_analogy": 31.125, + "eval_runtime": 1095.514, + "eval_samples_per_second": 10.872, + "eval_steps_per_second": 0.68, + "step": 100 + }, + { + "epoch": 0.04, + "learning_rate": 5e-05, + "loss": 1.3638, + "step": 200 + }, + { + "epoch": 0.04, + "eval_exact_match": 29.8321, + "eval_exact_match_for_answerability_classification": 49.5385, + "eval_exact_match_for_cause_effect_classification": 39.5714, + "eval_exact_match_for_coreference_resolution": 40.3571, + "eval_exact_match_for_data_to_text": 8.5956, + "eval_exact_match_for_dialogue_act_recognition": 39.0, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 44.4, + "eval_exact_match_for_overlap_extraction": 9.5, + "eval_exact_match_for_question_rewriting": 2.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 45.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 19.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 23.0, + "eval_exact_match_for_task1154_bard_word_analogy": 20.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 34.0, + "eval_exact_match_for_task1157_bard_word_analogy": 55.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 29.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 8.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 42.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 36.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 65.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 29.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 36.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 82.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 56.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 52.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 12.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 61.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 32.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 5.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 24.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 48.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 57.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 70.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 48.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 49.0, + "eval_exact_match_for_textual_entailment": 41.9167, + "eval_exact_match_for_title_generation": 10.1457, + "eval_exact_match_for_word_analogy": 31.625, + "eval_f1": 46.5951, + "eval_f1_for_answerability_classification": 52.0, + "eval_f1_for_cause_effect_classification": 58.2998, + "eval_f1_for_coreference_resolution": 48.0896, + "eval_f1_for_data_to_text": 52.261, + "eval_f1_for_dialogue_act_recognition": 42.4286, + "eval_f1_for_grammar_error_correction": 54.7868, + "eval_f1_for_keyword_tagging": 56.306, + "eval_f1_for_overlap_extraction": 33.9217, + "eval_f1_for_question_rewriting": 66.7084, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 49.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 63.2969, + "eval_f1_for_task035_winogrande_question_rewriting": 88.167, + "eval_f1_for_task036_qasc_keyword_tagging": 73.1634, + "eval_f1_for_task039_qasc_overlap_extraction": 27.9, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 53.5129, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 23.0, + "eval_f1_for_task1154_bard_word_analogy": 20.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 34.0, + "eval_f1_for_task1157_bard_word_analogy": 55.0, + "eval_f1_for_task1158_bard_word_analogy": 15.0, + "eval_f1_for_task1159_bard_word_analogy": 29.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.0072, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.5792, + "eval_f1_for_task121_atomic_question_rewriting": 45.9883, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.2227, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.538, + "eval_f1_for_task1356_xlsum_title_generation": 12.3209, + "eval_f1_for_task1358_xlsum_title_generation": 33.8797, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 42.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 38.7931, + "eval_f1_for_task1409_dart_data_to_text": 48.1216, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.0842, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.8311, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 79.4894, + "eval_f1_for_task1562_zest_question_rewriting": 49.8297, + "eval_f1_for_task1586_scifact_title_generation": 29.9366, + "eval_f1_for_task1598_nyc_data_to_text": 48.018, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 36.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.3493, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.6712, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.1969, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.1968, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.1473, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 36.0, + "eval_f1_for_task201_multinli_textual_entailment": 32.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 20.1469, + "eval_f1_for_task220_rocstories_title_generation": 82.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 47.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.3714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 39.9434, + "eval_f1_for_task288_gigaword_title_generation": 28.7653, + "eval_f1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 44.8, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 67.7198, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.6927, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 35.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 81.3977, + "eval_f1_for_task418_persent_title_generation": 21.9189, + "eval_f1_for_task442_com_qa_question_rewriting": 70.7163, + "eval_f1_for_task500_scruples_title_generation": 22.9244, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 39.4479, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 44.7931, + "eval_f1_for_task602_wikitext_title_generation": 12.2909, + "eval_f1_for_task613_liar_keyword_tagging": 19.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 37.406, + "eval_f1_for_task619_ohsumed_title_generation": 38.5934, + "eval_f1_for_task620_ohsumed_keyword_tagging": 46.2762, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 48.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.0905, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 74.4032, + "eval_f1_for_task671_ambigqa_question_rewriting": 60.5271, + "eval_f1_for_task677_ollie_data_to_text": 33.2717, + "eval_f1_for_task738_perspectrum_textual_entailment": 57.0, + "eval_f1_for_task743_eurlex_title_generation": 29.6586, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.6787, + "eval_f1_for_task769_qed_title_generation": 83.7414, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 70.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 29.0, + "eval_f1_for_task890_gwsd_textual_entailment": 48.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.6667, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 47.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_f1_for_task957_e2e_data_to_text": 54.9236, + "eval_f1_for_task970_sherliic_textual_entailment": 49.0, + "eval_f1_for_textual_entailment": 41.9167, + "eval_f1_for_title_generation": 34.2886, + "eval_f1_for_word_analogy": 31.625, + "eval_gen_len": 10.0882, + "eval_global_step": 200, + "eval_loss": 1.2204065322875977, + "eval_rouge1": 48.3661, + "eval_rouge1_for_answerability_classification": 52.0, + "eval_rouge1_for_cause_effect_classification": 58.8454, + "eval_rouge1_for_coreference_resolution": 48.7815, + "eval_rouge1_for_data_to_text": 55.1198, + "eval_rouge1_for_dialogue_act_recognition": 47.873, + "eval_rouge1_for_grammar_error_correction": 59.831, + "eval_rouge1_for_keyword_tagging": 60.6703, + "eval_rouge1_for_overlap_extraction": 36.9193, + "eval_rouge1_for_question_rewriting": 68.3024, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 49.5667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 63.3199, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.7253, + "eval_rouge1_for_task036_qasc_keyword_tagging": 78.0028, + "eval_rouge1_for_task039_qasc_overlap_extraction": 32.7333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.0327, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 23.0, + "eval_rouge1_for_task1154_bard_word_analogy": 20.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 34.0, + "eval_rouge1_for_task1157_bard_word_analogy": 55.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.0, + "eval_rouge1_for_task1159_bard_word_analogy": 29.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.7725, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.831, + "eval_rouge1_for_task121_atomic_question_rewriting": 48.0986, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.6383, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.2088, + "eval_rouge1_for_task1356_xlsum_title_generation": 14.3155, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.0528, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 42.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 30.7778, + "eval_rouge1_for_task1407_dart_data_to_text": 39.6818, + "eval_rouge1_for_task1409_dart_data_to_text": 48.9542, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.0757, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.966, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 82.5863, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.3352, + "eval_rouge1_for_task1586_scifact_title_generation": 33.4511, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.0029, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.7572, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 95.7822, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 34.8833, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.1968, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.2231, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 36.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.6707, + "eval_rouge1_for_task220_rocstories_title_generation": 82.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.7167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 41.1054, + "eval_rouge1_for_task288_gigaword_title_generation": 30.9601, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 47.7667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 67.6754, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 32.1648, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 41.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.0324, + "eval_rouge1_for_task418_persent_title_generation": 24.6223, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.9391, + "eval_rouge1_for_task500_scruples_title_generation": 24.5937, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.9921, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 46.1665, + "eval_rouge1_for_task602_wikitext_title_generation": 13.3871, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.6333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.7528, + "eval_rouge1_for_task619_ohsumed_title_generation": 41.2666, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 50.125, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 48.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.5905, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 75.6608, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 61.4185, + "eval_rouge1_for_task677_ollie_data_to_text": 35.8461, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.991, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.8469, + "eval_rouge1_for_task769_qed_title_generation": 83.7082, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 70.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 48.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.519, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.5062, + "eval_rouge1_for_task970_sherliic_textual_entailment": 49.0, + "eval_rouge1_for_textual_entailment": 43.6944, + "eval_rouge1_for_title_generation": 36.3955, + "eval_rouge1_for_word_analogy": 31.625, + "eval_rougeL": 46.9846, + "eval_rougeL_for_answerability_classification": 52.0, + "eval_rougeL_for_cause_effect_classification": 58.1364, + "eval_rougeL_for_coreference_resolution": 48.7815, + "eval_rougeL_for_data_to_text": 47.7453, + "eval_rougeL_for_dialogue_act_recognition": 47.7302, + "eval_rougeL_for_grammar_error_correction": 58.9811, + "eval_rougeL_for_keyword_tagging": 60.2503, + "eval_rougeL_for_overlap_extraction": 36.3505, + "eval_rougeL_for_question_rewriting": 64.3388, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 49.5667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 63.1866, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.8173, + "eval_rougeL_for_task036_qasc_keyword_tagging": 77.2362, + "eval_rougeL_for_task039_qasc_overlap_extraction": 32.7333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.9949, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 23.0, + "eval_rougeL_for_task1154_bard_word_analogy": 20.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 34.0, + "eval_rougeL_for_task1157_bard_word_analogy": 55.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.0, + "eval_rougeL_for_task1159_bard_word_analogy": 29.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 26.27, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8105, + "eval_rougeL_for_task121_atomic_question_rewriting": 41.3231, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7572, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 38.8402, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.91, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.2021, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 42.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 29.7778, + "eval_rougeL_for_task1407_dart_data_to_text": 33.2596, + "eval_rougeL_for_task1409_dart_data_to_text": 40.9637, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5292, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.8799, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 81.433, + "eval_rougeL_for_task1562_zest_question_rewriting": 45.9058, + "eval_rougeL_for_task1586_scifact_title_generation": 28.0891, + "eval_rougeL_for_task1598_nyc_data_to_text": 38.5047, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.4603, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.6004, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 30.0634, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.1968, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.2936, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 36.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.6707, + "eval_rougeL_for_task220_rocstories_title_generation": 82.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.7167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 39.9677, + "eval_rougeL_for_task288_gigaword_title_generation": 26.7711, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 84.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 47.7667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 67.6754, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.7657, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 41.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.1824, + "eval_rougeL_for_task418_persent_title_generation": 21.4689, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.1049, + "eval_rougeL_for_task500_scruples_title_generation": 23.2996, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 39.5958, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 44.0602, + "eval_rougeL_for_task602_wikitext_title_generation": 13.149, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.6333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.1894, + "eval_rougeL_for_task619_ohsumed_title_generation": 35.7632, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 48.7917, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 48.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.5905, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 73.99, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 59.1055, + "eval_rougeL_for_task677_ollie_data_to_text": 29.3365, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.6161, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.4197, + "eval_rougeL_for_task769_qed_title_generation": 83.7082, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 70.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 48.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.519, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 55.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.2734, + "eval_rougeL_for_task970_sherliic_textual_entailment": 49.0, + "eval_rougeL_for_textual_entailment": 43.6944, + "eval_rougeL_for_title_generation": 33.6419, + "eval_rougeL_for_word_analogy": 31.625, + "eval_runtime": 1062.9084, + "eval_samples_per_second": 11.205, + "eval_steps_per_second": 0.701, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.2523, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 29.3535, + "eval_exact_match_for_answerability_classification": 47.3846, + "eval_exact_match_for_cause_effect_classification": 39.8571, + "eval_exact_match_for_coreference_resolution": 40.5, + "eval_exact_match_for_data_to_text": 6.0533, + "eval_exact_match_for_dialogue_act_recognition": 38.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 40.0, + "eval_exact_match_for_overlap_extraction": 6.5, + "eval_exact_match_for_question_rewriting": 2.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 8.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 13.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 27.0, + "eval_exact_match_for_task1154_bard_word_analogy": 25.0, + "eval_exact_match_for_task1155_bard_word_analogy": 88.0, + "eval_exact_match_for_task1156_bard_word_analogy": 35.0, + "eval_exact_match_for_task1157_bard_word_analogy": 61.0, + "eval_exact_match_for_task1158_bard_word_analogy": 30.0, + "eval_exact_match_for_task1159_bard_word_analogy": 27.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 31.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 52.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 26.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 42.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 45.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 36.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 24.0, + "eval_exact_match_for_task219_rocstories_title_generation": 6.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 20.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 59.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 58.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 29.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 6.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 23.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 29.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 17.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 52.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 71.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 59.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 57.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 40.8333, + "eval_exact_match_for_title_generation": 8.8004, + "eval_exact_match_for_word_analogy": 39.75, + "eval_f1": 46.7539, + "eval_f1_for_answerability_classification": 49.8974, + "eval_f1_for_cause_effect_classification": 57.6793, + "eval_f1_for_coreference_resolution": 47.4614, + "eval_f1_for_data_to_text": 52.7679, + "eval_f1_for_dialogue_act_recognition": 42.1429, + "eval_f1_for_grammar_error_correction": 56.3359, + "eval_f1_for_keyword_tagging": 53.2105, + "eval_f1_for_overlap_extraction": 34.626, + "eval_f1_for_question_rewriting": 68.9985, + "eval_f1_for_task020_mctaco_answerability_classification": 48.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task034_winogrande_question_rewriting": 79.234, + "eval_f1_for_task035_winogrande_question_rewriting": 89.0256, + "eval_f1_for_task036_qasc_keyword_tagging": 63.4955, + "eval_f1_for_task039_qasc_overlap_extraction": 22.5, + "eval_f1_for_task050_multirc_answerability_classification": 52.0, + "eval_f1_for_task102_commongen_data_to_text": 53.7168, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 27.0, + "eval_f1_for_task1154_bard_word_analogy": 25.0, + "eval_f1_for_task1155_bard_word_analogy": 88.0, + "eval_f1_for_task1156_bard_word_analogy": 35.0, + "eval_f1_for_task1157_bard_word_analogy": 61.0, + "eval_f1_for_task1158_bard_word_analogy": 30.0, + "eval_f1_for_task1159_bard_word_analogy": 27.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.1261, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.8172, + "eval_f1_for_task121_atomic_question_rewriting": 45.9842, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.794, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.9495, + "eval_f1_for_task1356_xlsum_title_generation": 15.0887, + "eval_f1_for_task1358_xlsum_title_generation": 35.4495, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 31.0, + "eval_f1_for_task1388_cb_textual_entailment": 52.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_f1_for_task1407_dart_data_to_text": 41.7798, + "eval_f1_for_task1409_dart_data_to_text": 48.1173, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.3477, + "eval_f1_for_task1439_doqa_answerability_classification": 51.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 26.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 41.2892, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3242, + "eval_f1_for_task1562_zest_question_rewriting": 50.2534, + "eval_f1_for_task1586_scifact_title_generation": 35.2023, + "eval_f1_for_task1598_nyc_data_to_text": 48.5719, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 42.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5811, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_f1_for_task1631_open_pi_data_to_text": 93.6859, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.6116, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.9346, + "eval_f1_for_task1728_web_nlg_data_to_text": 58.147, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 24.0, + "eval_f1_for_task219_rocstories_title_generation": 20.0437, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_f1_for_task232_iirc_answerability_classification": 20.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 61.1714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 46.7519, + "eval_f1_for_task288_gigaword_title_generation": 32.554, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 16.2333, + "eval_f1_for_task329_gap_coreference_resolution": 39.0, + "eval_f1_for_task330_gap_coreference_resolution": 66.0111, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.5789, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.5667, + "eval_f1_for_task402_grailqa_question_rewriting": 81.7142, + "eval_f1_for_task418_persent_title_generation": 25.2376, + "eval_f1_for_task442_com_qa_question_rewriting": 70.923, + "eval_f1_for_task500_scruples_title_generation": 20.6945, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 43.8097, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.9927, + "eval_f1_for_task602_wikitext_title_generation": 14.3099, + "eval_f1_for_task613_liar_keyword_tagging": 17.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 40.8427, + "eval_f1_for_task619_ohsumed_title_generation": 44.441, + "eval_f1_for_task620_ohsumed_keyword_tagging": 42.9667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 29.0, + "eval_f1_for_task641_e_snli_textual_entailment": 0.0, + "eval_f1_for_task642_e_snli_textual_entailment": 41.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.5905, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 28.2905, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.0978, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.4035, + "eval_f1_for_task677_ollie_data_to_text": 35.3977, + "eval_f1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_f1_for_task743_eurlex_title_generation": 30.5287, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.716, + "eval_f1_for_task769_qed_title_generation": 86.805, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 59.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 57.0, + "eval_f1_for_task891_gap_coreference_resolution": 60.2524, + "eval_f1_for_task892_gap_coreference_resolution": 48.0, + "eval_f1_for_task893_gap_coreference_resolution": 44.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_f1_for_task957_e2e_data_to_text": 55.7399, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 40.8333, + "eval_f1_for_title_generation": 34.5117, + "eval_f1_for_word_analogy": 39.75, + "eval_gen_len": 9.0317, + "eval_global_step": 500, + "eval_loss": 1.2152013778686523, + "eval_rouge1": 48.5698, + "eval_rouge1_for_answerability_classification": 49.8974, + "eval_rouge1_for_cause_effect_classification": 58.4602, + "eval_rouge1_for_coreference_resolution": 48.2224, + "eval_rouge1_for_data_to_text": 55.6764, + "eval_rouge1_for_dialogue_act_recognition": 47.5206, + "eval_rouge1_for_grammar_error_correction": 61.4689, + "eval_rouge1_for_keyword_tagging": 58.2223, + "eval_rouge1_for_overlap_extraction": 36.5151, + "eval_rouge1_for_question_rewriting": 70.5893, + "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 50.9, + "eval_rouge1_for_task034_winogrande_question_rewriting": 79.2512, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.5791, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.1354, + "eval_rouge1_for_task039_qasc_overlap_extraction": 25.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.212, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 27.0, + "eval_rouge1_for_task1154_bard_word_analogy": 25.0, + "eval_rouge1_for_task1155_bard_word_analogy": 88.0, + "eval_rouge1_for_task1156_bard_word_analogy": 35.0, + "eval_rouge1_for_task1157_bard_word_analogy": 61.0, + "eval_rouge1_for_task1158_bard_word_analogy": 30.0, + "eval_rouge1_for_task1159_bard_word_analogy": 27.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.3144, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.0892, + "eval_rouge1_for_task121_atomic_question_rewriting": 48.4081, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.2762, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.9648, + "eval_rouge1_for_task1356_xlsum_title_generation": 18.5307, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.5054, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 31.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 52.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 32.6444, + "eval_rouge1_for_task1407_dart_data_to_text": 42.6706, + "eval_rouge1_for_task1409_dart_data_to_text": 48.5046, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8284, + "eval_rouge1_for_task1439_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 26.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 44.9325, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.1093, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.5952, + "eval_rouge1_for_task1586_scifact_title_generation": 39.5109, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.6062, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 80.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.877, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 93.9367, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.8079, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.9346, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.2135, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 24.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.6375, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 20.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 62.0167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 47.6969, + "eval_rouge1_for_task288_gigaword_title_generation": 35.2834, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 18.6, + "eval_rouge1_for_task329_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 65.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.6131, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 39.2333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.2393, + "eval_rouge1_for_task418_persent_title_generation": 28.3216, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.2807, + "eval_rouge1_for_task500_scruples_title_generation": 22.7317, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 44.5544, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.1729, + "eval_rouge1_for_task602_wikitext_title_generation": 15.7701, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.8, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 46.2751, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.1665, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 46.8, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 29.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 0.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.3762, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 28.9238, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.0139, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.1842, + "eval_rouge1_for_task677_ollie_data_to_text": 38.0061, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.1413, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 2.7936, + "eval_rouge1_for_task769_qed_title_generation": 86.7726, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 59.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 57.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 60.5381, + "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rouge1_for_task957_e2e_data_to_text": 57.011, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 42.4444, + "eval_rouge1_for_title_generation": 36.9342, + "eval_rouge1_for_word_analogy": 39.75, + "eval_rougeL": 47.0718, + "eval_rougeL_for_answerability_classification": 49.8974, + "eval_rougeL_for_cause_effect_classification": 57.9658, + "eval_rougeL_for_coreference_resolution": 48.1986, + "eval_rougeL_for_data_to_text": 47.3663, + "eval_rougeL_for_dialogue_act_recognition": 47.3302, + "eval_rougeL_for_grammar_error_correction": 60.8197, + "eval_rougeL_for_keyword_tagging": 57.6452, + "eval_rougeL_for_overlap_extraction": 35.588, + "eval_rougeL_for_question_rewriting": 66.8679, + "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 50.9, + "eval_rougeL_for_task034_winogrande_question_rewriting": 77.8231, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.0259, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.5499, + "eval_rougeL_for_task039_qasc_overlap_extraction": 25.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.6534, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 27.0, + "eval_rougeL_for_task1154_bard_word_analogy": 25.0, + "eval_rougeL_for_task1155_bard_word_analogy": 88.0, + "eval_rougeL_for_task1156_bard_word_analogy": 35.0, + "eval_rougeL_for_task1157_bard_word_analogy": 61.0, + "eval_rougeL_for_task1158_bard_word_analogy": 30.0, + "eval_rougeL_for_task1159_bard_word_analogy": 27.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.462, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0644, + "eval_rougeL_for_task121_atomic_question_rewriting": 42.2822, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.458, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.841, + "eval_rougeL_for_task1356_xlsum_title_generation": 15.6238, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.4081, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 31.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 52.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 31.3111, + "eval_rougeL_for_task1407_dart_data_to_text": 35.1477, + "eval_rougeL_for_task1409_dart_data_to_text": 39.7837, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.4385, + "eval_rougeL_for_task1439_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 26.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.7122, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.2009, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.933, + "eval_rougeL_for_task1586_scifact_title_generation": 33.0245, + "eval_rougeL_for_task1598_nyc_data_to_text": 39.2222, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 80.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.6447, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 47.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 92.7922, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.9704, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.9346, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.8087, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 24.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.6375, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 20.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 62.0167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 45.8427, + "eval_rougeL_for_task288_gigaword_title_generation": 30.7266, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 18.6, + "eval_rougeL_for_task329_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 65.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 76.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 86.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.262, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 39.2333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 70.8242, + "eval_rougeL_for_task418_persent_title_generation": 24.5914, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.074, + "eval_rougeL_for_task500_scruples_title_generation": 21.5081, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 44.3411, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.7205, + "eval_rougeL_for_task602_wikitext_title_generation": 15.532, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.8, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 43.1654, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.5115, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.9, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 29.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 0.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.9762, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 28.5905, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.7549, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 62.2796, + "eval_rougeL_for_task677_ollie_data_to_text": 30.5937, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rougeL_for_task743_eurlex_title_generation": 28.7216, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.6284, + "eval_rougeL_for_task769_qed_title_generation": 86.7726, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 59.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 57.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 60.5381, + "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 53.0, + "eval_rougeL_for_task957_e2e_data_to_text": 44.5609, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 42.4444, + "eval_rougeL_for_title_generation": 33.7016, + "eval_rougeL_for_word_analogy": 39.75, + "eval_runtime": 849.1643, + "eval_samples_per_second": 14.026, + "eval_steps_per_second": 0.877, + "step": 500 + }, + { + "epoch": 0.22, + "learning_rate": 5e-05, + "loss": 1.1771, + "step": 1000 + }, + { + "epoch": 0.22, + "eval_exact_match": 30.806, + "eval_exact_match_for_answerability_classification": 49.3846, + "eval_exact_match_for_cause_effect_classification": 38.8571, + "eval_exact_match_for_coreference_resolution": 43.0714, + "eval_exact_match_for_data_to_text": 6.9007, + "eval_exact_match_for_dialogue_act_recognition": 37.7143, + "eval_exact_match_for_grammar_error_correction": 6.5, + "eval_exact_match_for_keyword_tagging": 40.6, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 2.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 24.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 52.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 25.0, + "eval_exact_match_for_task1153_bard_word_analogy": 32.0, + "eval_exact_match_for_task1154_bard_word_analogy": 27.0, + "eval_exact_match_for_task1155_bard_word_analogy": 77.0, + "eval_exact_match_for_task1156_bard_word_analogy": 42.0, + "eval_exact_match_for_task1157_bard_word_analogy": 72.0, + "eval_exact_match_for_task1158_bard_word_analogy": 26.0, + "eval_exact_match_for_task1159_bard_word_analogy": 31.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 13.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 51.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 32.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 5.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 64.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 76.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 38.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 50.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 13.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 68.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 54.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 43.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 9.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 26.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 28.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 53.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 61.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 57.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 55.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 44.5833, + "eval_exact_match_for_title_generation": 9.1928, + "eval_exact_match_for_word_analogy": 41.5, + "eval_f1": 48.051, + "eval_f1_for_answerability_classification": 51.9487, + "eval_f1_for_cause_effect_classification": 55.9209, + "eval_f1_for_coreference_resolution": 52.2147, + "eval_f1_for_data_to_text": 51.733, + "eval_f1_for_dialogue_act_recognition": 41.2857, + "eval_f1_for_grammar_error_correction": 55.8833, + "eval_f1_for_keyword_tagging": 55.5927, + "eval_f1_for_overlap_extraction": 38.5445, + "eval_f1_for_question_rewriting": 65.3469, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 48.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 80.0031, + "eval_f1_for_task035_winogrande_question_rewriting": 89.2241, + "eval_f1_for_task036_qasc_keyword_tagging": 67.7445, + "eval_f1_for_task039_qasc_overlap_extraction": 31.4, + "eval_f1_for_task050_multirc_answerability_classification": 52.0, + "eval_f1_for_task102_commongen_data_to_text": 52.7153, + "eval_f1_for_task1152_bard_word_analogy": 25.0, + "eval_f1_for_task1153_bard_word_analogy": 32.0, + "eval_f1_for_task1154_bard_word_analogy": 27.0, + "eval_f1_for_task1155_bard_word_analogy": 77.0, + "eval_f1_for_task1156_bard_word_analogy": 42.6667, + "eval_f1_for_task1157_bard_word_analogy": 72.0, + "eval_f1_for_task1158_bard_word_analogy": 26.0, + "eval_f1_for_task1159_bard_word_analogy": 31.0, + "eval_f1_for_task1161_coda_19_title_generation": 32.3975, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.8419, + "eval_f1_for_task121_atomic_question_rewriting": 43.5692, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.4621, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8169, + "eval_f1_for_task1356_xlsum_title_generation": 15.4068, + "eval_f1_for_task1358_xlsum_title_generation": 36.0335, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_f1_for_task1407_dart_data_to_text": 33.1835, + "eval_f1_for_task1409_dart_data_to_text": 49.4862, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.4498, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 41.7721, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 81.3168, + "eval_f1_for_task1562_zest_question_rewriting": 50.277, + "eval_f1_for_task1586_scifact_title_generation": 34.1165, + "eval_f1_for_task1598_nyc_data_to_text": 51.4331, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.9823, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.6228, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 34.5918, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 71.6667, + "eval_f1_for_task1728_web_nlg_data_to_text": 55.7178, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 64.0, + "eval_f1_for_task201_multinli_textual_entailment": 32.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 18.2791, + "eval_f1_for_task220_rocstories_title_generation": 76.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 38.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.3714, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 45.689, + "eval_f1_for_task288_gigaword_title_generation": 31.4141, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 56.6667, + "eval_f1_for_task329_gap_coreference_resolution": 45.0, + "eval_f1_for_task330_gap_coreference_resolution": 75.8905, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_f1_for_task393_cod3s_cause_effect_classification": 28.0573, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 44.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 80.8825, + "eval_f1_for_task418_persent_title_generation": 24.9969, + "eval_f1_for_task442_com_qa_question_rewriting": 71.5136, + "eval_f1_for_task500_scruples_title_generation": 19.174, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.2684, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.0952, + "eval_f1_for_task602_wikitext_title_generation": 13.9717, + "eval_f1_for_task613_liar_keyword_tagging": 18.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 34.7223, + "eval_f1_for_task619_ohsumed_title_generation": 45.5832, + "eval_f1_for_task620_ohsumed_keyword_tagging": 47.9333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 28.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.9524, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.8861, + "eval_f1_for_task671_ambigqa_question_rewriting": 25.8191, + "eval_f1_for_task677_ollie_data_to_text": 34.6141, + "eval_f1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_f1_for_task743_eurlex_title_generation": 26.2803, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.6548, + "eval_f1_for_task769_qed_title_generation": 81.861, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 57.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 55.0, + "eval_f1_for_task891_gap_coreference_resolution": 60.2444, + "eval_f1_for_task892_gap_coreference_resolution": 52.0, + "eval_f1_for_task893_gap_coreference_resolution": 50.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task957_e2e_data_to_text": 55.3715, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 44.5833, + "eval_f1_for_title_generation": 34.7796, + "eval_f1_for_word_analogy": 41.5833, + "eval_gen_len": 9.0165, + "eval_global_step": 1000, + "eval_loss": 1.2505172491073608, + "eval_rouge1": 49.8501, + "eval_rouge1_for_answerability_classification": 51.9487, + "eval_rouge1_for_cause_effect_classification": 56.6144, + "eval_rouge1_for_coreference_resolution": 52.8049, + "eval_rouge1_for_data_to_text": 54.3958, + "eval_rouge1_for_dialogue_act_recognition": 47.0683, + "eval_rouge1_for_grammar_error_correction": 60.8458, + "eval_rouge1_for_keyword_tagging": 60.5022, + "eval_rouge1_for_overlap_extraction": 40.7605, + "eval_rouge1_for_question_rewriting": 66.9215, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 48.5667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 80.0492, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.8763, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.6398, + "eval_rouge1_for_task039_qasc_overlap_extraction": 35.2333, + "eval_rouge1_for_task050_multirc_answerability_classification": 52.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.1488, + "eval_rouge1_for_task1152_bard_word_analogy": 25.0, + "eval_rouge1_for_task1153_bard_word_analogy": 32.0, + "eval_rouge1_for_task1154_bard_word_analogy": 27.0, + "eval_rouge1_for_task1155_bard_word_analogy": 77.0, + "eval_rouge1_for_task1156_bard_word_analogy": 42.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 72.0, + "eval_rouge1_for_task1158_bard_word_analogy": 26.0, + "eval_rouge1_for_task1159_bard_word_analogy": 31.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.9706, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.3588, + "eval_rouge1_for_task121_atomic_question_rewriting": 46.1919, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.8385, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.5613, + "eval_rouge1_for_task1356_xlsum_title_generation": 18.2898, + "eval_rouge1_for_task1358_xlsum_title_generation": 40.217, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 31.1444, + "eval_rouge1_for_task1407_dart_data_to_text": 33.478, + "eval_rouge1_for_task1409_dart_data_to_text": 49.1404, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.4523, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 44.3855, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 84.2394, + "eval_rouge1_for_task1562_zest_question_rewriting": 53.3597, + "eval_rouge1_for_task1586_scifact_title_generation": 37.6427, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.7921, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.2937, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.7515, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.9242, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 71.6667, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.2402, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 64.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 23.284, + "eval_rouge1_for_task220_rocstories_title_generation": 76.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 38.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 63.2167, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 46.2877, + "eval_rouge1_for_task288_gigaword_title_generation": 34.528, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 56.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 75.819, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.2479, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 51.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.9905, + "eval_rouge1_for_task418_persent_title_generation": 27.7023, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.8506, + "eval_rouge1_for_task500_scruples_title_generation": 20.2075, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.5267, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.2524, + "eval_rouge1_for_task602_wikitext_title_generation": 14.3605, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.1667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 39.3866, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.6266, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 51.9667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 28.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.7381, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.7328, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 26.872, + "eval_rouge1_for_task677_ollie_data_to_text": 36.9037, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.2669, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.7081, + "eval_rouge1_for_task769_qed_title_generation": 81.206, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 57.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 55.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 60.5333, + "eval_rouge1_for_task892_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task957_e2e_data_to_text": 56.6708, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 46.4167, + "eval_rouge1_for_title_generation": 36.9356, + "eval_rouge1_for_word_analogy": 41.5833, + "eval_rougeL": 48.4694, + "eval_rougeL_for_answerability_classification": 51.9487, + "eval_rougeL_for_cause_effect_classification": 56.0458, + "eval_rougeL_for_coreference_resolution": 52.8049, + "eval_rougeL_for_data_to_text": 47.3638, + "eval_rougeL_for_dialogue_act_recognition": 46.9968, + "eval_rougeL_for_grammar_error_correction": 60.1374, + "eval_rougeL_for_keyword_tagging": 59.7063, + "eval_rougeL_for_overlap_extraction": 39.6709, + "eval_rougeL_for_question_rewriting": 63.1418, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 48.5667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 79.8141, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.7369, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.5434, + "eval_rougeL_for_task039_qasc_overlap_extraction": 35.2333, + "eval_rougeL_for_task050_multirc_answerability_classification": 52.0, + "eval_rougeL_for_task102_commongen_data_to_text": 57.1395, + "eval_rougeL_for_task1152_bard_word_analogy": 25.0, + "eval_rougeL_for_task1153_bard_word_analogy": 32.0, + "eval_rougeL_for_task1154_bard_word_analogy": 27.0, + "eval_rougeL_for_task1155_bard_word_analogy": 77.0, + "eval_rougeL_for_task1156_bard_word_analogy": 42.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 72.0, + "eval_rougeL_for_task1158_bard_word_analogy": 26.0, + "eval_rougeL_for_task1159_bard_word_analogy": 31.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.5831, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.2208, + "eval_rougeL_for_task121_atomic_question_rewriting": 39.2191, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 11.6182, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.8098, + "eval_rougeL_for_task1356_xlsum_title_generation": 16.4491, + "eval_rougeL_for_task1358_xlsum_title_generation": 34.3383, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 30.6444, + "eval_rougeL_for_task1407_dart_data_to_text": 29.5328, + "eval_rougeL_for_task1409_dart_data_to_text": 42.1723, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9544, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 39.734, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 83.3204, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.8578, + "eval_rougeL_for_task1586_scifact_title_generation": 30.895, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.8781, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.1354, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.5404, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 31.1276, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 71.6667, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.2817, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 64.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.284, + "eval_rougeL_for_task220_rocstories_title_generation": 76.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 38.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 63.2167, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 44.1085, + "eval_rougeL_for_task288_gigaword_title_generation": 30.3159, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 56.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 75.819, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.6667, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.2828, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 51.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 69.2056, + "eval_rougeL_for_task418_persent_title_generation": 23.7733, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.0151, + "eval_rougeL_for_task500_scruples_title_generation": 19.4232, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.2081, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 42.6044, + "eval_rougeL_for_task602_wikitext_title_generation": 14.3605, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.1667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 36.3713, + "eval_rougeL_for_task619_ohsumed_title_generation": 41.3212, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 50.4833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 28.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.3381, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 13.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.5761, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 23.9693, + "eval_rougeL_for_task677_ollie_data_to_text": 30.5566, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.2574, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.689, + "eval_rougeL_for_task769_qed_title_generation": 81.206, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 57.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 55.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 60.5333, + "eval_rougeL_for_task892_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.944, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 46.4167, + "eval_rougeL_for_title_generation": 33.9799, + "eval_rougeL_for_word_analogy": 41.5833, + "eval_runtime": 826.7462, + "eval_samples_per_second": 14.406, + "eval_steps_per_second": 0.901, + "step": 1000 + }, + { + "epoch": 0.33, + "learning_rate": 5e-05, + "loss": 1.098, + "step": 1500 + }, + { + "epoch": 0.33, + "eval_exact_match": 31.1419, + "eval_exact_match_for_answerability_classification": 53.5385, + "eval_exact_match_for_cause_effect_classification": 42.4286, + "eval_exact_match_for_coreference_resolution": 38.7143, + "eval_exact_match_for_data_to_text": 8.2324, + "eval_exact_match_for_dialogue_act_recognition": 36.7143, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 48.0, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 2.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 48.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 39.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 70.0, + "eval_exact_match_for_task102_commongen_data_to_text": 1.0, + "eval_exact_match_for_task1152_bard_word_analogy": 26.0, + "eval_exact_match_for_task1153_bard_word_analogy": 42.0, + "eval_exact_match_for_task1154_bard_word_analogy": 30.0, + "eval_exact_match_for_task1155_bard_word_analogy": 75.0, + "eval_exact_match_for_task1156_bard_word_analogy": 46.0, + "eval_exact_match_for_task1157_bard_word_analogy": 68.0, + "eval_exact_match_for_task1158_bard_word_analogy": 36.0, + "eval_exact_match_for_task1159_bard_word_analogy": 33.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 35.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 47.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 25.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 45.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 59.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 34.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 48.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 69.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 25.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 6.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 52.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 63.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 43.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 55.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 65.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 38.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 5.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 4.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 19.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 20.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 13.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 65.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 67.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 71.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 50.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 57.0, + "eval_exact_match_for_textual_entailment": 43.375, + "eval_exact_match_for_title_generation": 8.0717, + "eval_exact_match_for_word_analogy": 44.5, + "eval_f1": 47.8745, + "eval_f1_for_answerability_classification": 56.1026, + "eval_f1_for_cause_effect_classification": 60.0741, + "eval_f1_for_coreference_resolution": 45.9163, + "eval_f1_for_data_to_text": 51.6785, + "eval_f1_for_dialogue_act_recognition": 39.9286, + "eval_f1_for_grammar_error_correction": 56.0342, + "eval_f1_for_keyword_tagging": 60.1236, + "eval_f1_for_overlap_extraction": 34.05, + "eval_f1_for_question_rewriting": 67.0085, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 51.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.1269, + "eval_f1_for_task035_winogrande_question_rewriting": 90.979, + "eval_f1_for_task036_qasc_keyword_tagging": 69.5654, + "eval_f1_for_task039_qasc_overlap_extraction": 32.3333, + "eval_f1_for_task050_multirc_answerability_classification": 70.0, + "eval_f1_for_task102_commongen_data_to_text": 54.332, + "eval_f1_for_task1152_bard_word_analogy": 26.0, + "eval_f1_for_task1153_bard_word_analogy": 42.0, + "eval_f1_for_task1154_bard_word_analogy": 30.0, + "eval_f1_for_task1155_bard_word_analogy": 75.0, + "eval_f1_for_task1156_bard_word_analogy": 46.0, + "eval_f1_for_task1157_bard_word_analogy": 68.0, + "eval_f1_for_task1158_bard_word_analogy": 36.0, + "eval_f1_for_task1159_bard_word_analogy": 33.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.1584, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.6609, + "eval_f1_for_task121_atomic_question_rewriting": 44.0927, + "eval_f1_for_task133_winowhy_coreference_resolution": 35.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 14.4448, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.5733, + "eval_f1_for_task1356_xlsum_title_generation": 13.3923, + "eval_f1_for_task1358_xlsum_title_generation": 34.5216, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 33.7306, + "eval_f1_for_task1409_dart_data_to_text": 47.4443, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.6555, + "eval_f1_for_task1439_doqa_answerability_classification": 47.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 25.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 23.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.2817, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.4128, + "eval_f1_for_task1562_zest_question_rewriting": 49.0764, + "eval_f1_for_task1586_scifact_title_generation": 33.6765, + "eval_f1_for_task1598_nyc_data_to_text": 50.5645, + "eval_f1_for_task1612_sick_textual_entailment": 45.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.0475, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.3481, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 33.2718, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 73.619, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.9991, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 48.0, + "eval_f1_for_task200_multinli_textual_entailment": 69.0, + "eval_f1_for_task201_multinli_textual_entailment": 25.0, + "eval_f1_for_task202_multinli_textual_entailment": 6.0, + "eval_f1_for_task219_rocstories_title_generation": 14.9749, + "eval_f1_for_task220_rocstories_title_generation": 52.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 52.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 63.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 55.9381, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 35.7666, + "eval_f1_for_task288_gigaword_title_generation": 28.5238, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 16.9333, + "eval_f1_for_task329_gap_coreference_resolution": 45.0, + "eval_f1_for_task330_gap_coreference_resolution": 63.3524, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 77.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.5908, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 40.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 80.3458, + "eval_f1_for_task418_persent_title_generation": 24.5996, + "eval_f1_for_task442_com_qa_question_rewriting": 71.6853, + "eval_f1_for_task500_scruples_title_generation": 23.0702, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.8069, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 40.5758, + "eval_f1_for_task602_wikitext_title_generation": 12.1701, + "eval_f1_for_task613_liar_keyword_tagging": 19.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 40.9282, + "eval_f1_for_task619_ohsumed_title_generation": 42.5207, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.1667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_f1_for_task640_e_snli_textual_entailment": 20.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.219, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 26.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.8644, + "eval_f1_for_task671_ambigqa_question_rewriting": 32.6418, + "eval_f1_for_task677_ollie_data_to_text": 33.8527, + "eval_f1_for_task738_perspectrum_textual_entailment": 65.0, + "eval_f1_for_task743_eurlex_title_generation": 26.6433, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.2133, + "eval_f1_for_task769_qed_title_generation": 85.242, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 71.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 22.0, + "eval_f1_for_task890_gwsd_textual_entailment": 50.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.819, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 34.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task957_e2e_data_to_text": 54.5375, + "eval_f1_for_task970_sherliic_textual_entailment": 57.0, + "eval_f1_for_textual_entailment": 43.375, + "eval_f1_for_title_generation": 32.9555, + "eval_f1_for_word_analogy": 44.5, + "eval_gen_len": 8.8776, + "eval_global_step": 1500, + "eval_loss": 1.284918189048767, + "eval_rouge1": 49.6276, + "eval_rouge1_for_answerability_classification": 56.1026, + "eval_rouge1_for_cause_effect_classification": 60.7916, + "eval_rouge1_for_coreference_resolution": 46.4675, + "eval_rouge1_for_data_to_text": 54.4453, + "eval_rouge1_for_dialogue_act_recognition": 44.5635, + "eval_rouge1_for_grammar_error_correction": 60.8793, + "eval_rouge1_for_keyword_tagging": 65.8399, + "eval_rouge1_for_overlap_extraction": 35.6145, + "eval_rouge1_for_question_rewriting": 68.5279, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 51.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.2016, + "eval_rouge1_for_task035_winogrande_question_rewriting": 91.4925, + "eval_rouge1_for_task036_qasc_keyword_tagging": 75.014, + "eval_rouge1_for_task039_qasc_overlap_extraction": 35.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 70.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.3596, + "eval_rouge1_for_task1152_bard_word_analogy": 26.0, + "eval_rouge1_for_task1153_bard_word_analogy": 42.0, + "eval_rouge1_for_task1154_bard_word_analogy": 30.0, + "eval_rouge1_for_task1155_bard_word_analogy": 75.0, + "eval_rouge1_for_task1156_bard_word_analogy": 46.0, + "eval_rouge1_for_task1157_bard_word_analogy": 68.0, + "eval_rouge1_for_task1158_bard_word_analogy": 36.0, + "eval_rouge1_for_task1159_bard_word_analogy": 33.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 38.0923, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.7579, + "eval_rouge1_for_task121_atomic_question_rewriting": 46.6342, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 35.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.7817, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.5039, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.5386, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.2023, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 22.1111, + "eval_rouge1_for_task1407_dart_data_to_text": 34.3079, + "eval_rouge1_for_task1409_dart_data_to_text": 48.1177, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.1079, + "eval_rouge1_for_task1439_doqa_answerability_classification": 47.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 25.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 23.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.029, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.6506, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.5496, + "eval_rouge1_for_task1586_scifact_title_generation": 37.7448, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.4827, + "eval_rouge1_for_task1612_sick_textual_entailment": 45.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2496, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.3517, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.6313, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 73.619, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.5782, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 48.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 69.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 25.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 6.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.2709, + "eval_rouge1_for_task220_rocstories_title_generation": 52.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 63.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.7833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 36.0623, + "eval_rouge1_for_task288_gigaword_title_generation": 31.2631, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 17.5, + "eval_rouge1_for_task329_gap_coreference_resolution": 45.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 63.2571, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 77.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.8362, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.6667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.8817, + "eval_rouge1_for_task418_persent_title_generation": 27.2452, + "eval_rouge1_for_task442_com_qa_question_rewriting": 75.024, + "eval_rouge1_for_task500_scruples_title_generation": 23.9538, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.3026, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 42.7674, + "eval_rouge1_for_task602_wikitext_title_generation": 12.3661, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 45.705, + "eval_rouge1_for_task619_ohsumed_title_generation": 45.7218, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.4667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 20.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.719, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 26.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.8156, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 33.6963, + "eval_rouge1_for_task677_ollie_data_to_text": 36.667, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 65.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.6956, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.2242, + "eval_rouge1_for_task769_qed_title_generation": 85.2762, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 71.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 50.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 52.1524, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task957_e2e_data_to_text": 55.7948, + "eval_rouge1_for_task970_sherliic_textual_entailment": 57.0, + "eval_rouge1_for_textual_entailment": 45.2361, + "eval_rouge1_for_title_generation": 35.0843, + "eval_rouge1_for_word_analogy": 44.5, + "eval_rougeL": 48.2775, + "eval_rougeL_for_answerability_classification": 56.1026, + "eval_rougeL_for_cause_effect_classification": 60.1712, + "eval_rougeL_for_coreference_resolution": 46.4675, + "eval_rougeL_for_data_to_text": 47.2359, + "eval_rougeL_for_dialogue_act_recognition": 44.4349, + "eval_rougeL_for_grammar_error_correction": 60.0734, + "eval_rougeL_for_keyword_tagging": 65.3725, + "eval_rougeL_for_overlap_extraction": 34.7898, + "eval_rougeL_for_question_rewriting": 64.9625, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 51.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.9572, + "eval_rougeL_for_task035_winogrande_question_rewriting": 91.43, + "eval_rougeL_for_task036_qasc_keyword_tagging": 74.6602, + "eval_rougeL_for_task039_qasc_overlap_extraction": 35.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 70.0, + "eval_rougeL_for_task102_commongen_data_to_text": 58.6786, + "eval_rougeL_for_task1152_bard_word_analogy": 26.0, + "eval_rougeL_for_task1153_bard_word_analogy": 42.0, + "eval_rougeL_for_task1154_bard_word_analogy": 30.0, + "eval_rougeL_for_task1155_bard_word_analogy": 75.0, + "eval_rougeL_for_task1156_bard_word_analogy": 46.0, + "eval_rougeL_for_task1157_bard_word_analogy": 68.0, + "eval_rougeL_for_task1158_bard_word_analogy": 36.0, + "eval_rougeL_for_task1159_bard_word_analogy": 33.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.3068, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.7375, + "eval_rougeL_for_task121_atomic_question_rewriting": 39.1582, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 35.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 14.109, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.6956, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.3155, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.5955, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 21.2111, + "eval_rougeL_for_task1407_dart_data_to_text": 28.0965, + "eval_rougeL_for_task1409_dart_data_to_text": 40.3172, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.361, + "eval_rougeL_for_task1439_doqa_answerability_classification": 47.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 25.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 23.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.3688, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.7857, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.5447, + "eval_rougeL_for_task1586_scifact_title_generation": 30.2962, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.0852, + "eval_rougeL_for_task1612_sick_textual_entailment": 45.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.9802, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.2341, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.9046, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 73.619, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.0754, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 48.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 69.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 25.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 6.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.2709, + "eval_rougeL_for_task220_rocstories_title_generation": 52.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 63.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.7833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.4129, + "eval_rougeL_for_task288_gigaword_title_generation": 27.3921, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 17.5, + "eval_rougeL_for_task329_gap_coreference_resolution": 45.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 63.2571, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 77.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.1163, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.6667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 70.1687, + "eval_rougeL_for_task418_persent_title_generation": 23.2222, + "eval_rougeL_for_task442_com_qa_question_rewriting": 71.1031, + "eval_rougeL_for_task500_scruples_title_generation": 22.9861, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.9573, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.8014, + "eval_rougeL_for_task602_wikitext_title_generation": 12.3661, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 42.0822, + "eval_rougeL_for_task619_ohsumed_title_generation": 39.5963, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.4833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 76.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 20.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.719, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 26.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.404, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 32.4085, + "eval_rougeL_for_task677_ollie_data_to_text": 30.1159, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 65.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.9311, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.2035, + "eval_rougeL_for_task769_qed_title_generation": 85.2762, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 71.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 35.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 50.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 52.1524, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.5128, + "eval_rougeL_for_task970_sherliic_textual_entailment": 57.0, + "eval_rougeL_for_textual_entailment": 45.2361, + "eval_rougeL_for_title_generation": 32.2151, + "eval_rougeL_for_word_analogy": 44.5, + "eval_runtime": 864.6375, + "eval_samples_per_second": 13.775, + "eval_steps_per_second": 0.862, + "step": 1500 + }, + { + "epoch": 0.44, + "learning_rate": 5e-05, + "loss": 1.0612, + "step": 2000 + }, + { + "epoch": 0.44, + "eval_exact_match": 31.8304, + "eval_exact_match_for_answerability_classification": 52.7692, + "eval_exact_match_for_cause_effect_classification": 46.5714, + "eval_exact_match_for_coreference_resolution": 42.0, + "eval_exact_match_for_data_to_text": 6.9007, + "eval_exact_match_for_dialogue_act_recognition": 39.1429, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 44.0, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 4.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 4.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 14.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 64.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 28.0, + "eval_exact_match_for_task1153_bard_word_analogy": 37.0, + "eval_exact_match_for_task1154_bard_word_analogy": 29.0, + "eval_exact_match_for_task1155_bard_word_analogy": 86.0, + "eval_exact_match_for_task1156_bard_word_analogy": 48.0, + "eval_exact_match_for_task1157_bard_word_analogy": 65.0, + "eval_exact_match_for_task1158_bard_word_analogy": 18.0, + "eval_exact_match_for_task1159_bard_word_analogy": 33.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 45.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 70.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 52.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 19.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 50.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 45.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 38.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 86.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 13.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 4.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 97.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 54.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 42.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 71.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 62.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 56.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 65.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 61.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 31.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 23.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 55.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 72.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 62.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 73.0, + "eval_exact_match_for_textual_entailment": 42.6667, + "eval_exact_match_for_title_generation": 10.5381, + "eval_exact_match_for_word_analogy": 43.0, + "eval_f1": 48.3232, + "eval_f1_for_answerability_classification": 55.3846, + "eval_f1_for_cause_effect_classification": 64.7341, + "eval_f1_for_coreference_resolution": 47.7319, + "eval_f1_for_data_to_text": 49.0297, + "eval_f1_for_dialogue_act_recognition": 41.6429, + "eval_f1_for_grammar_error_correction": 56.6834, + "eval_f1_for_keyword_tagging": 59.7844, + "eval_f1_for_overlap_extraction": 26.7918, + "eval_f1_for_question_rewriting": 67.1918, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 53.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 91.1304, + "eval_f1_for_task035_winogrande_question_rewriting": 89.4786, + "eval_f1_for_task036_qasc_keyword_tagging": 59.3266, + "eval_f1_for_task039_qasc_overlap_extraction": 33.1524, + "eval_f1_for_task050_multirc_answerability_classification": 64.0, + "eval_f1_for_task102_commongen_data_to_text": 52.6033, + "eval_f1_for_task1152_bard_word_analogy": 28.0, + "eval_f1_for_task1153_bard_word_analogy": 37.0, + "eval_f1_for_task1154_bard_word_analogy": 29.0, + "eval_f1_for_task1155_bard_word_analogy": 86.0, + "eval_f1_for_task1156_bard_word_analogy": 48.6667, + "eval_f1_for_task1157_bard_word_analogy": 65.0, + "eval_f1_for_task1158_bard_word_analogy": 18.0, + "eval_f1_for_task1159_bard_word_analogy": 33.0, + "eval_f1_for_task1161_coda_19_title_generation": 33.7365, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 82.1885, + "eval_f1_for_task121_atomic_question_rewriting": 44.5551, + "eval_f1_for_task133_winowhy_coreference_resolution": 45.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.4809, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.0678, + "eval_f1_for_task1356_xlsum_title_generation": 18.8278, + "eval_f1_for_task1358_xlsum_title_generation": 33.9476, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 70.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 32.369, + "eval_f1_for_task1409_dart_data_to_text": 47.045, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.0289, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 52.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.1881, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.3379, + "eval_f1_for_task1562_zest_question_rewriting": 48.2367, + "eval_f1_for_task1586_scifact_title_generation": 35.192, + "eval_f1_for_task1598_nyc_data_to_text": 46.1104, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 82.6383, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_f1_for_task1631_open_pi_data_to_text": 89.9849, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.865, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 77.2381, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.7769, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 38.0, + "eval_f1_for_task200_multinli_textual_entailment": 86.0, + "eval_f1_for_task201_multinli_textual_entailment": 13.0, + "eval_f1_for_task202_multinli_textual_entailment": 4.0, + "eval_f1_for_task219_rocstories_title_generation": 20.1086, + "eval_f1_for_task220_rocstories_title_generation": 97.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_f1_for_task232_iirc_answerability_classification": 54.0, + "eval_f1_for_task233_iirc_answerability_classification": 42.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 72.7333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 20.4311, + "eval_f1_for_task288_gigaword_title_generation": 29.3664, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 8.0, + "eval_f1_for_task329_gap_coreference_resolution": 49.0, + "eval_f1_for_task330_gap_coreference_resolution": 62.1302, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.7226, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_f1_for_task402_grailqa_question_rewriting": 76.2418, + "eval_f1_for_task418_persent_title_generation": 25.4868, + "eval_f1_for_task442_com_qa_question_rewriting": 70.2347, + "eval_f1_for_task500_scruples_title_generation": 24.2557, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 39.8678, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.4801, + "eval_f1_for_task602_wikitext_title_generation": 9.2816, + "eval_f1_for_task613_liar_keyword_tagging": 23.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 44.4158, + "eval_f1_for_task619_ohsumed_title_generation": 45.0541, + "eval_f1_for_task620_ohsumed_keyword_tagging": 39.8, + "eval_f1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_f1_for_task640_e_snli_textual_entailment": 25.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.1286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 9.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.414, + "eval_f1_for_task671_ambigqa_question_rewriting": 34.9239, + "eval_f1_for_task677_ollie_data_to_text": 27.0527, + "eval_f1_for_task738_perspectrum_textual_entailment": 55.0, + "eval_f1_for_task743_eurlex_title_generation": 25.2875, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.3421, + "eval_f1_for_task769_qed_title_generation": 81.8345, + "eval_f1_for_task827_copa_cause_effect_classification": 72.0, + "eval_f1_for_task828_copa_cause_effect_classification": 62.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 21.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.4778, + "eval_f1_for_task892_gap_coreference_resolution": 51.0, + "eval_f1_for_task893_gap_coreference_resolution": 55.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 51.9544, + "eval_f1_for_task970_sherliic_textual_entailment": 73.0, + "eval_f1_for_textual_entailment": 42.6667, + "eval_f1_for_title_generation": 36.0861, + "eval_f1_for_word_analogy": 43.0833, + "eval_gen_len": 8.7186, + "eval_global_step": 2000, + "eval_loss": 1.3209514617919922, + "eval_rouge1": 50.1549, + "eval_rouge1_for_answerability_classification": 55.3846, + "eval_rouge1_for_cause_effect_classification": 65.4587, + "eval_rouge1_for_coreference_resolution": 48.334, + "eval_rouge1_for_data_to_text": 51.5161, + "eval_rouge1_for_dialogue_act_recognition": 46.9587, + "eval_rouge1_for_grammar_error_correction": 61.4571, + "eval_rouge1_for_keyword_tagging": 65.2109, + "eval_rouge1_for_overlap_extraction": 30.0863, + "eval_rouge1_for_question_rewriting": 68.842, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 53.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.1652, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.0785, + "eval_rouge1_for_task036_qasc_keyword_tagging": 65.1068, + "eval_rouge1_for_task039_qasc_overlap_extraction": 38.919, + "eval_rouge1_for_task050_multirc_answerability_classification": 64.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.379, + "eval_rouge1_for_task1152_bard_word_analogy": 28.0, + "eval_rouge1_for_task1153_bard_word_analogy": 37.0, + "eval_rouge1_for_task1154_bard_word_analogy": 29.0, + "eval_rouge1_for_task1155_bard_word_analogy": 86.0, + "eval_rouge1_for_task1156_bard_word_analogy": 48.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 65.0, + "eval_rouge1_for_task1158_bard_word_analogy": 18.0, + "eval_rouge1_for_task1159_bard_word_analogy": 33.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.8134, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 82.4052, + "eval_rouge1_for_task121_atomic_question_rewriting": 46.7251, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 45.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 15.9198, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.1968, + "eval_rouge1_for_task1356_xlsum_title_generation": 21.8385, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.7211, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 70.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 28.2111, + "eval_rouge1_for_task1407_dart_data_to_text": 33.1158, + "eval_rouge1_for_task1409_dart_data_to_text": 47.9286, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.472, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 52.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.2964, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.4423, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.0013, + "eval_rouge1_for_task1586_scifact_title_generation": 39.0752, + "eval_rouge1_for_task1598_nyc_data_to_text": 48.6766, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.92, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 90.1823, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 36.0428, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 77.2381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.569, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 38.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 86.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 13.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 4.0, + "eval_rouge1_for_task219_rocstories_title_generation": 24.099, + "eval_rouge1_for_task220_rocstories_title_generation": 97.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 54.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 42.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 73.4, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 21.2535, + "eval_rouge1_for_task288_gigaword_title_generation": 32.3574, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 9.2333, + "eval_rouge1_for_task329_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 61.919, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.9823, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.6509, + "eval_rouge1_for_task418_persent_title_generation": 28.4493, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.6176, + "eval_rouge1_for_task500_scruples_title_generation": 26.44, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 40.7212, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.5054, + "eval_rouge1_for_task602_wikitext_title_generation": 10.6746, + "eval_rouge1_for_task613_liar_keyword_tagging": 38.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 49.2286, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.7317, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 45.819, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.6286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 9.8333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.4176, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 36.0842, + "eval_rouge1_for_task677_ollie_data_to_text": 29.9655, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rouge1_for_task743_eurlex_title_generation": 25.7638, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.3376, + "eval_rouge1_for_task769_qed_title_generation": 81.8547, + "eval_rouge1_for_task827_copa_cause_effect_classification": 72.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 62.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.719, + "eval_rouge1_for_task892_gap_coreference_resolution": 51.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.6187, + "eval_rouge1_for_task970_sherliic_textual_entailment": 73.0, + "eval_rouge1_for_textual_entailment": 44.5278, + "eval_rouge1_for_title_generation": 38.3742, + "eval_rouge1_for_word_analogy": 43.0833, + "eval_rougeL": 48.8103, + "eval_rougeL_for_answerability_classification": 55.3846, + "eval_rougeL_for_cause_effect_classification": 64.7017, + "eval_rougeL_for_coreference_resolution": 48.334, + "eval_rougeL_for_data_to_text": 44.8413, + "eval_rougeL_for_dialogue_act_recognition": 46.7683, + "eval_rougeL_for_grammar_error_correction": 60.7099, + "eval_rougeL_for_keyword_tagging": 64.6182, + "eval_rougeL_for_overlap_extraction": 29.6446, + "eval_rougeL_for_question_rewriting": 65.4479, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 53.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.7254, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.7498, + "eval_rougeL_for_task036_qasc_keyword_tagging": 63.4769, + "eval_rougeL_for_task039_qasc_overlap_extraction": 38.919, + "eval_rougeL_for_task050_multirc_answerability_classification": 64.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.0019, + "eval_rougeL_for_task1152_bard_word_analogy": 28.0, + "eval_rougeL_for_task1153_bard_word_analogy": 37.0, + "eval_rougeL_for_task1154_bard_word_analogy": 29.0, + "eval_rougeL_for_task1155_bard_word_analogy": 86.0, + "eval_rougeL_for_task1156_bard_word_analogy": 48.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 65.0, + "eval_rougeL_for_task1158_bard_word_analogy": 18.0, + "eval_rougeL_for_task1159_bard_word_analogy": 33.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.6372, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 81.3877, + "eval_rougeL_for_task121_atomic_question_rewriting": 40.8826, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 45.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.0617, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.409, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.6114, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.6578, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 70.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 26.8778, + "eval_rougeL_for_task1407_dart_data_to_text": 28.0529, + "eval_rougeL_for_task1409_dart_data_to_text": 41.9738, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.8832, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 52.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 40.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.7229, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.5366, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.2307, + "eval_rougeL_for_task1586_scifact_title_generation": 32.2915, + "eval_rougeL_for_task1598_nyc_data_to_text": 38.6484, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 81.5379, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 88.4767, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.262, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 77.2381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.884, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 38.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 86.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 13.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 4.0, + "eval_rougeL_for_task219_rocstories_title_generation": 23.8768, + "eval_rougeL_for_task220_rocstories_title_generation": 97.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 49.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 54.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 42.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 71.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 73.4, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 20.3702, + "eval_rougeL_for_task288_gigaword_title_generation": 28.2591, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 9.2333, + "eval_rougeL_for_task329_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 61.919, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 82.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.6676, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 42.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.2173, + "eval_rougeL_for_task418_persent_title_generation": 24.3115, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.2232, + "eval_rougeL_for_task500_scruples_title_generation": 25.0009, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 40.1126, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 39.903, + "eval_rougeL_for_task602_wikitext_title_generation": 10.6746, + "eval_rougeL_for_task613_liar_keyword_tagging": 38.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 45.2442, + "eval_rougeL_for_task619_ohsumed_title_generation": 39.3671, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 44.4857, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 83.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.6286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 9.8333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.1751, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 34.3877, + "eval_rougeL_for_task677_ollie_data_to_text": 24.7525, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rougeL_for_task743_eurlex_title_generation": 23.9973, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.3376, + "eval_rougeL_for_task769_qed_title_generation": 81.8547, + "eval_rougeL_for_task827_copa_cause_effect_classification": 72.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 62.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.719, + "eval_rougeL_for_task892_gap_coreference_resolution": 51.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 69.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.5115, + "eval_rougeL_for_task970_sherliic_textual_entailment": 73.0, + "eval_rougeL_for_textual_entailment": 44.5278, + "eval_rougeL_for_title_generation": 35.2519, + "eval_rougeL_for_word_analogy": 43.0833, + "eval_runtime": 826.2621, + "eval_samples_per_second": 14.414, + "eval_steps_per_second": 0.902, + "step": 2000 + }, + { + "epoch": 0.55, + "learning_rate": 5e-05, + "loss": 1.0167, + "step": 2500 + }, + { + "epoch": 0.55, + "eval_exact_match": 31.3938, + "eval_exact_match_for_answerability_classification": 53.5385, + "eval_exact_match_for_cause_effect_classification": 47.4286, + "eval_exact_match_for_coreference_resolution": 43.0714, + "eval_exact_match_for_data_to_text": 8.5956, + "eval_exact_match_for_dialogue_act_recognition": 37.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 37.0, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 3.7273, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 52.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 7.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 14.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 26.0, + "eval_exact_match_for_task1153_bard_word_analogy": 37.0, + "eval_exact_match_for_task1154_bard_word_analogy": 28.0, + "eval_exact_match_for_task1155_bard_word_analogy": 63.0, + "eval_exact_match_for_task1156_bard_word_analogy": 41.0, + "eval_exact_match_for_task1157_bard_word_analogy": 60.0, + "eval_exact_match_for_task1158_bard_word_analogy": 22.0, + "eval_exact_match_for_task1159_bard_word_analogy": 26.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 11.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 39.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 67.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 49.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 32.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 37.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 9.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 64.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 37.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 40.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 85.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 16.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 1.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 96.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 86.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 65.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 51.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 72.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 68.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 62.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 41.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 5.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 18.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 24.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 8.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 70.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 73.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 64.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 71.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 52.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 52.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 67.0, + "eval_exact_match_for_textual_entailment": 42.625, + "eval_exact_match_for_title_generation": 10.3139, + "eval_exact_match_for_word_analogy": 37.875, + "eval_f1": 47.5477, + "eval_f1_for_answerability_classification": 56.1538, + "eval_f1_for_cause_effect_classification": 63.6117, + "eval_f1_for_coreference_resolution": 48.0163, + "eval_f1_for_data_to_text": 50.3583, + "eval_f1_for_dialogue_act_recognition": 39.0, + "eval_f1_for_grammar_error_correction": 57.221, + "eval_f1_for_keyword_tagging": 51.8912, + "eval_f1_for_overlap_extraction": 34.7492, + "eval_f1_for_question_rewriting": 66.5737, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 54.5, + "eval_f1_for_task034_winogrande_question_rewriting": 91.1151, + "eval_f1_for_task035_winogrande_question_rewriting": 89.3639, + "eval_f1_for_task036_qasc_keyword_tagging": 57.1036, + "eval_f1_for_task039_qasc_overlap_extraction": 35.6667, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 52.6638, + "eval_f1_for_task1152_bard_word_analogy": 26.0, + "eval_f1_for_task1153_bard_word_analogy": 37.6667, + "eval_f1_for_task1154_bard_word_analogy": 28.0, + "eval_f1_for_task1155_bard_word_analogy": 63.0, + "eval_f1_for_task1156_bard_word_analogy": 41.0, + "eval_f1_for_task1157_bard_word_analogy": 60.0, + "eval_f1_for_task1158_bard_word_analogy": 22.0, + "eval_f1_for_task1159_bard_word_analogy": 26.0, + "eval_f1_for_task1161_coda_19_title_generation": 31.1807, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0403, + "eval_f1_for_task121_atomic_question_rewriting": 42.4772, + "eval_f1_for_task133_winowhy_coreference_resolution": 39.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 13.7004, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.5716, + "eval_f1_for_task1356_xlsum_title_generation": 15.707, + "eval_f1_for_task1358_xlsum_title_generation": 32.2552, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 67.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 30.9416, + "eval_f1_for_task1409_dart_data_to_text": 49.0957, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.4778, + "eval_f1_for_task1439_doqa_answerability_classification": 49.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 32.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.5052, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.9641, + "eval_f1_for_task1562_zest_question_rewriting": 47.258, + "eval_f1_for_task1586_scifact_title_generation": 34.0493, + "eval_f1_for_task1598_nyc_data_to_text": 51.4134, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 37.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 80.7137, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 96.2565, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_f1_for_task1659_billsum_title_generation": 33.5688, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 72.2381, + "eval_f1_for_task1728_web_nlg_data_to_text": 56.6428, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 40.0, + "eval_f1_for_task200_multinli_textual_entailment": 85.0, + "eval_f1_for_task201_multinli_textual_entailment": 16.0, + "eval_f1_for_task202_multinli_textual_entailment": 1.0, + "eval_f1_for_task219_rocstories_title_generation": 13.0944, + "eval_f1_for_task220_rocstories_title_generation": 96.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 51.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 86.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 74.4548, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.8316, + "eval_f1_for_task288_gigaword_title_generation": 29.2639, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 2.1667, + "eval_f1_for_task329_gap_coreference_resolution": 53.0, + "eval_f1_for_task330_gap_coreference_resolution": 71.7238, + "eval_f1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 89.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.8663, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 42.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 81.6073, + "eval_f1_for_task418_persent_title_generation": 24.6295, + "eval_f1_for_task442_com_qa_question_rewriting": 71.2306, + "eval_f1_for_task500_scruples_title_generation": 25.7016, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 37.199, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.9832, + "eval_f1_for_task602_wikitext_title_generation": 8.0261, + "eval_f1_for_task613_liar_keyword_tagging": 12.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 36.7493, + "eval_f1_for_task619_ohsumed_title_generation": 43.7239, + "eval_f1_for_task620_ohsumed_keyword_tagging": 41.8, + "eval_f1_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_f1_for_task640_e_snli_textual_entailment": 24.0, + "eval_f1_for_task641_e_snli_textual_entailment": 8.0, + "eval_f1_for_task642_e_snli_textual_entailment": 33.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.8857, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 3.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.3018, + "eval_f1_for_task671_ambigqa_question_rewriting": 26.6307, + "eval_f1_for_task677_ollie_data_to_text": 25.9982, + "eval_f1_for_task738_perspectrum_textual_entailment": 70.0, + "eval_f1_for_task743_eurlex_title_generation": 25.699, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.2523, + "eval_f1_for_task769_qed_title_generation": 86.0905, + "eval_f1_for_task827_copa_cause_effect_classification": 64.0, + "eval_f1_for_task828_copa_cause_effect_classification": 71.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 52.0, + "eval_f1_for_task891_gap_coreference_resolution": 57.3111, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 52.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_f1_for_task957_e2e_data_to_text": 52.8816, + "eval_f1_for_task970_sherliic_textual_entailment": 67.0, + "eval_f1_for_textual_entailment": 42.625, + "eval_f1_for_title_generation": 34.9828, + "eval_f1_for_word_analogy": 37.9583, + "eval_gen_len": 8.7019, + "eval_global_step": 2500, + "eval_loss": 1.296701192855835, + "eval_rouge1": 49.3022, + "eval_rouge1_for_answerability_classification": 56.1538, + "eval_rouge1_for_cause_effect_classification": 64.2503, + "eval_rouge1_for_coreference_resolution": 48.7007, + "eval_rouge1_for_data_to_text": 53.2714, + "eval_rouge1_for_dialogue_act_recognition": 43.4011, + "eval_rouge1_for_grammar_error_correction": 62.3629, + "eval_rouge1_for_keyword_tagging": 56.5336, + "eval_rouge1_for_overlap_extraction": 37.005, + "eval_rouge1_for_question_rewriting": 68.3025, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 54.5, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.1629, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.8919, + "eval_rouge1_for_task036_qasc_keyword_tagging": 62.4157, + "eval_rouge1_for_task039_qasc_overlap_extraction": 39.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 67.2395, + "eval_rouge1_for_task1152_bard_word_analogy": 26.0, + "eval_rouge1_for_task1153_bard_word_analogy": 37.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 28.0, + "eval_rouge1_for_task1155_bard_word_analogy": 63.0, + "eval_rouge1_for_task1156_bard_word_analogy": 41.0, + "eval_rouge1_for_task1157_bard_word_analogy": 60.0, + "eval_rouge1_for_task1158_bard_word_analogy": 22.0, + "eval_rouge1_for_task1159_bard_word_analogy": 26.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 35.1169, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.211, + "eval_rouge1_for_task121_atomic_question_rewriting": 45.0245, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 39.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 14.3342, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.6159, + "eval_rouge1_for_task1356_xlsum_title_generation": 18.8089, + "eval_rouge1_for_task1358_xlsum_title_generation": 36.3438, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 67.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 20.4746, + "eval_rouge1_for_task1407_dart_data_to_text": 31.2216, + "eval_rouge1_for_task1409_dart_data_to_text": 49.9774, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.5339, + "eval_rouge1_for_task1439_doqa_answerability_classification": 49.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 32.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.176, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.192, + "eval_rouge1_for_task1562_zest_question_rewriting": 51.1964, + "eval_rouge1_for_task1586_scifact_title_generation": 37.9862, + "eval_rouge1_for_task1598_nyc_data_to_text": 54.3771, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 80.9575, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.2951, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.5152, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 72.2381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.734, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 40.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 85.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 16.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 1.0, + "eval_rouge1_for_task219_rocstories_title_generation": 17.277, + "eval_rouge1_for_task220_rocstories_title_generation": 96.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 86.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 75.3, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.51, + "eval_rouge1_for_task288_gigaword_title_generation": 32.0405, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 2.8333, + "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 71.6524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 89.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.3032, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 50.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 83.8513, + "eval_rouge1_for_task418_persent_title_generation": 27.7461, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.7257, + "eval_rouge1_for_task500_scruples_title_generation": 26.7269, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 38.1953, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 41.1838, + "eval_rouge1_for_task602_wikitext_title_generation": 9.3192, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.7825, + "eval_rouge1_for_task619_ohsumed_title_generation": 47.5366, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 47.2, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 24.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 8.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 96.3857, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 3.4, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.5473, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 28.143, + "eval_rouge1_for_task677_ollie_data_to_text": 28.8116, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 70.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.18, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.2751, + "eval_rouge1_for_task769_qed_title_generation": 86.1107, + "eval_rouge1_for_task827_copa_cause_effect_classification": 64.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 71.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 52.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 57.5524, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 52.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.2942, + "eval_rouge1_for_task970_sherliic_textual_entailment": 67.0, + "eval_rouge1_for_textual_entailment": 44.375, + "eval_rouge1_for_title_generation": 37.2817, + "eval_rouge1_for_word_analogy": 37.9583, + "eval_rougeL": 47.9093, + "eval_rougeL_for_answerability_classification": 56.1538, + "eval_rougeL_for_cause_effect_classification": 63.7048, + "eval_rougeL_for_coreference_resolution": 48.7007, + "eval_rougeL_for_data_to_text": 45.9251, + "eval_rougeL_for_dialogue_act_recognition": 43.2726, + "eval_rougeL_for_grammar_error_correction": 61.6299, + "eval_rougeL_for_keyword_tagging": 55.707, + "eval_rougeL_for_overlap_extraction": 36.6852, + "eval_rougeL_for_question_rewriting": 64.496, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 54.5, + "eval_rougeL_for_task034_winogrande_question_rewriting": 89.9332, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.084, + "eval_rougeL_for_task036_qasc_keyword_tagging": 60.0324, + "eval_rougeL_for_task039_qasc_overlap_extraction": 39.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.1287, + "eval_rougeL_for_task1152_bard_word_analogy": 26.0, + "eval_rougeL_for_task1153_bard_word_analogy": 37.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 28.0, + "eval_rougeL_for_task1155_bard_word_analogy": 63.0, + "eval_rougeL_for_task1156_bard_word_analogy": 41.0, + "eval_rougeL_for_task1157_bard_word_analogy": 60.0, + "eval_rougeL_for_task1158_bard_word_analogy": 22.0, + "eval_rougeL_for_task1159_bard_word_analogy": 26.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 29.6676, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.404, + "eval_rougeL_for_task121_atomic_question_rewriting": 37.7288, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 39.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 13.7256, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.6702, + "eval_rougeL_for_task1356_xlsum_title_generation": 16.3699, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.9464, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 67.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 19.5746, + "eval_rougeL_for_task1407_dart_data_to_text": 26.2285, + "eval_rougeL_for_task1409_dart_data_to_text": 41.9053, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.0408, + "eval_rougeL_for_task1439_doqa_answerability_classification": 49.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 32.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.8625, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.219, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.9764, + "eval_rougeL_for_task1586_scifact_title_generation": 31.2588, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.6655, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 79.6753, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.1491, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.2843, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 72.2381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 51.5247, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 40.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 85.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 16.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 1.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.277, + "eval_rougeL_for_task220_rocstories_title_generation": 96.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 86.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 75.3, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.8703, + "eval_rougeL_for_task288_gigaword_title_generation": 28.5356, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 2.8333, + "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 71.6524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 51.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 89.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 87.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.5204, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 50.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 69.8065, + "eval_rougeL_for_task418_persent_title_generation": 22.9357, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.6478, + "eval_rougeL_for_task500_scruples_title_generation": 25.3543, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 37.6819, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 40.4187, + "eval_rougeL_for_task602_wikitext_title_generation": 8.8863, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.7465, + "eval_rougeL_for_task619_ohsumed_title_generation": 39.8334, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 45.45, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 52.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 24.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 8.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 96.3857, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 3.4, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.1447, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 25.3854, + "eval_rougeL_for_task677_ollie_data_to_text": 24.4254, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 70.0, + "eval_rougeL_for_task743_eurlex_title_generation": 23.9862, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.2643, + "eval_rougeL_for_task769_qed_title_generation": 86.1107, + "eval_rougeL_for_task827_copa_cause_effect_classification": 64.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 71.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 52.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 57.5524, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 52.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 57.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.2454, + "eval_rougeL_for_task970_sherliic_textual_entailment": 67.0, + "eval_rougeL_for_textual_entailment": 44.375, + "eval_rougeL_for_title_generation": 34.3449, + "eval_rougeL_for_word_analogy": 37.9583, + "eval_runtime": 799.0049, + "eval_samples_per_second": 14.906, + "eval_steps_per_second": 0.932, + "step": 2500 + }, + { + "epoch": 0.66, + "learning_rate": 5e-05, + "loss": 0.9888, + "step": 3000 + }, + { + "epoch": 0.66, + "eval_exact_match": 31.738, + "eval_exact_match_for_answerability_classification": 56.9231, + "eval_exact_match_for_cause_effect_classification": 44.1429, + "eval_exact_match_for_coreference_resolution": 40.2857, + "eval_exact_match_for_data_to_text": 8.5956, + "eval_exact_match_for_dialogue_act_recognition": 39.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 38.6, + "eval_exact_match_for_overlap_extraction": 12.5, + "eval_exact_match_for_question_rewriting": 2.6364, + "eval_exact_match_for_task020_mctaco_answerability_classification": 56.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 14.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 62.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 28.0, + "eval_exact_match_for_task1153_bard_word_analogy": 32.0, + "eval_exact_match_for_task1154_bard_word_analogy": 17.0, + "eval_exact_match_for_task1155_bard_word_analogy": 72.0, + "eval_exact_match_for_task1156_bard_word_analogy": 35.0, + "eval_exact_match_for_task1157_bard_word_analogy": 62.0, + "eval_exact_match_for_task1158_bard_word_analogy": 30.0, + "eval_exact_match_for_task1159_bard_word_analogy": 28.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 30.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 72.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 42.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 62.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 35.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 8.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 91.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 13.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 3.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 52.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 93.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 55.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 63.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 58.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 80.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 3.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 53.0, + "eval_exact_match_for_task743_eurlex_title_generation": 3.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 64.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 73.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 73.0, + "eval_exact_match_for_textual_entailment": 44.6667, + "eval_exact_match_for_title_generation": 10.2578, + "eval_exact_match_for_word_analogy": 38.0, + "eval_f1": 48.9548, + "eval_f1_for_answerability_classification": 59.4872, + "eval_f1_for_cause_effect_classification": 63.9887, + "eval_f1_for_coreference_resolution": 45.6807, + "eval_f1_for_data_to_text": 50.3664, + "eval_f1_for_dialogue_act_recognition": 41.9286, + "eval_f1_for_grammar_error_correction": 56.8084, + "eval_f1_for_keyword_tagging": 55.4252, + "eval_f1_for_overlap_extraction": 34.8553, + "eval_f1_for_question_rewriting": 70.1678, + "eval_f1_for_task020_mctaco_answerability_classification": 56.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 48.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.8607, + "eval_f1_for_task035_winogrande_question_rewriting": 90.1651, + "eval_f1_for_task036_qasc_keyword_tagging": 60.8545, + "eval_f1_for_task039_qasc_overlap_extraction": 35.3333, + "eval_f1_for_task050_multirc_answerability_classification": 62.0, + "eval_f1_for_task102_commongen_data_to_text": 49.6681, + "eval_f1_for_task1152_bard_word_analogy": 28.0, + "eval_f1_for_task1153_bard_word_analogy": 32.0, + "eval_f1_for_task1154_bard_word_analogy": 17.0, + "eval_f1_for_task1155_bard_word_analogy": 72.0, + "eval_f1_for_task1156_bard_word_analogy": 35.6667, + "eval_f1_for_task1157_bard_word_analogy": 62.0, + "eval_f1_for_task1158_bard_word_analogy": 30.0, + "eval_f1_for_task1159_bard_word_analogy": 28.0, + "eval_f1_for_task1161_coda_19_title_generation": 35.1249, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.0242, + "eval_f1_for_task121_atomic_question_rewriting": 43.9856, + "eval_f1_for_task133_winowhy_coreference_resolution": 30.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 18.064, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8114, + "eval_f1_for_task1356_xlsum_title_generation": 18.1511, + "eval_f1_for_task1358_xlsum_title_generation": 34.3594, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 72.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 35.4214, + "eval_f1_for_task1409_dart_data_to_text": 47.2279, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6481, + "eval_f1_for_task1439_doqa_answerability_classification": 42.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.4891, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.9687, + "eval_f1_for_task1562_zest_question_rewriting": 49.876, + "eval_f1_for_task1586_scifact_title_generation": 36.0761, + "eval_f1_for_task1598_nyc_data_to_text": 47.7643, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.1528, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_f1_for_task1631_open_pi_data_to_text": 96.1344, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 35.1681, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.6667, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.1936, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 91.0, + "eval_f1_for_task201_multinli_textual_entailment": 13.0, + "eval_f1_for_task202_multinli_textual_entailment": 3.0, + "eval_f1_for_task219_rocstories_title_generation": 9.813, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 52.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 93.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 66.0667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 34.3774, + "eval_f1_for_task288_gigaword_title_generation": 28.3827, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 2.8333, + "eval_f1_for_task329_gap_coreference_resolution": 47.0, + "eval_f1_for_task330_gap_coreference_resolution": 70.2635, + "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 81.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.1609, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 39.5, + "eval_f1_for_task402_grailqa_question_rewriting": 81.3582, + "eval_f1_for_task418_persent_title_generation": 25.3217, + "eval_f1_for_task442_com_qa_question_rewriting": 71.3179, + "eval_f1_for_task500_scruples_title_generation": 26.7568, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.9788, + "eval_f1_for_task520_aquamuse_answerability_classification": 58.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.7561, + "eval_f1_for_task602_wikitext_title_generation": 11.3703, + "eval_f1_for_task613_liar_keyword_tagging": 22.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 41.0932, + "eval_f1_for_task619_ohsumed_title_generation": 44.9898, + "eval_f1_for_task620_ohsumed_keyword_tagging": 43.5, + "eval_f1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_f1_for_task640_e_snli_textual_entailment": 38.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.4381, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.6777, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.6165, + "eval_f1_for_task677_ollie_data_to_text": 26.331, + "eval_f1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_f1_for_task743_eurlex_title_generation": 26.4016, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.2231, + "eval_f1_for_task769_qed_title_generation": 86.8497, + "eval_f1_for_task827_copa_cause_effect_classification": 64.0, + "eval_f1_for_task828_copa_cause_effect_classification": 73.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 56.3667, + "eval_f1_for_task892_gap_coreference_resolution": 49.0, + "eval_f1_for_task893_gap_coreference_resolution": 39.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_f1_for_task957_e2e_data_to_text": 53.2278, + "eval_f1_for_task970_sherliic_textual_entailment": 73.0, + "eval_f1_for_textual_entailment": 44.6667, + "eval_f1_for_title_generation": 36.5041, + "eval_f1_for_word_analogy": 38.0833, + "eval_gen_len": 8.8583, + "eval_global_step": 3000, + "eval_loss": 1.3285871744155884, + "eval_rouge1": 50.6862, + "eval_rouge1_for_answerability_classification": 59.4872, + "eval_rouge1_for_cause_effect_classification": 64.3258, + "eval_rouge1_for_coreference_resolution": 46.3146, + "eval_rouge1_for_data_to_text": 53.4978, + "eval_rouge1_for_dialogue_act_recognition": 46.1531, + "eval_rouge1_for_grammar_error_correction": 61.9856, + "eval_rouge1_for_keyword_tagging": 60.0425, + "eval_rouge1_for_overlap_extraction": 36.3353, + "eval_rouge1_for_question_rewriting": 71.8032, + "eval_rouge1_for_task020_mctaco_answerability_classification": 56.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 48.0667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8981, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.7101, + "eval_rouge1_for_task036_qasc_keyword_tagging": 64.9413, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 62.0, + "eval_rouge1_for_task102_commongen_data_to_text": 65.9055, + "eval_rouge1_for_task1152_bard_word_analogy": 28.0, + "eval_rouge1_for_task1153_bard_word_analogy": 32.0, + "eval_rouge1_for_task1154_bard_word_analogy": 17.0, + "eval_rouge1_for_task1155_bard_word_analogy": 72.0, + "eval_rouge1_for_task1156_bard_word_analogy": 35.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 62.0, + "eval_rouge1_for_task1158_bard_word_analogy": 30.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 39.1864, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.2736, + "eval_rouge1_for_task121_atomic_question_rewriting": 46.622, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 30.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 18.4298, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.8765, + "eval_rouge1_for_task1356_xlsum_title_generation": 21.1125, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.5153, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 72.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 22.5714, + "eval_rouge1_for_task1407_dart_data_to_text": 36.2666, + "eval_rouge1_for_task1409_dart_data_to_text": 48.3875, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.8335, + "eval_rouge1_for_task1439_doqa_answerability_classification": 42.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 42.156, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.1378, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.7764, + "eval_rouge1_for_task1586_scifact_title_generation": 39.6206, + "eval_rouge1_for_task1598_nyc_data_to_text": 49.9868, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.4154, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.4436, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 37.6255, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.6667, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 61.7275, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 91.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 13.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 3.0, + "eval_rouge1_for_task219_rocstories_title_generation": 13.9105, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 52.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 93.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 67.0667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.5039, + "eval_rouge1_for_task288_gigaword_title_generation": 31.341, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 3.5, + "eval_rouge1_for_task329_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 70.219, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 81.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.0252, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 46.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 84.0753, + "eval_rouge1_for_task418_persent_title_generation": 28.6984, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.714, + "eval_rouge1_for_task500_scruples_title_generation": 27.8703, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.3969, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 58.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 44.3438, + "eval_rouge1_for_task602_wikitext_title_generation": 12.2471, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 43.5888, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.5389, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 49.3333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.9381, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 23.1667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.4133, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.0606, + "eval_rouge1_for_task677_ollie_data_to_text": 28.3587, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.2491, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.2456, + "eval_rouge1_for_task769_qed_title_generation": 86.7749, + "eval_rouge1_for_task827_copa_cause_effect_classification": 64.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 73.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 56.219, + "eval_rouge1_for_task892_gap_coreference_resolution": 49.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.7518, + "eval_rouge1_for_task970_sherliic_textual_entailment": 73.0, + "eval_rouge1_for_textual_entailment": 46.4722, + "eval_rouge1_for_title_generation": 38.7364, + "eval_rouge1_for_word_analogy": 38.3333, + "eval_rougeL": 49.2431, + "eval_rougeL_for_answerability_classification": 59.4872, + "eval_rougeL_for_cause_effect_classification": 63.6945, + "eval_rougeL_for_coreference_resolution": 46.3146, + "eval_rougeL_for_data_to_text": 46.19, + "eval_rougeL_for_dialogue_act_recognition": 46.0245, + "eval_rougeL_for_grammar_error_correction": 61.3193, + "eval_rougeL_for_keyword_tagging": 59.3227, + "eval_rougeL_for_overlap_extraction": 35.5889, + "eval_rougeL_for_question_rewriting": 67.9492, + "eval_rougeL_for_task020_mctaco_answerability_classification": 56.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 48.0667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.6262, + "eval_rougeL_for_task035_winogrande_question_rewriting": 90.504, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.7587, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 62.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.7795, + "eval_rougeL_for_task1152_bard_word_analogy": 28.0, + "eval_rougeL_for_task1153_bard_word_analogy": 32.0, + "eval_rougeL_for_task1154_bard_word_analogy": 17.0, + "eval_rougeL_for_task1155_bard_word_analogy": 72.0, + "eval_rougeL_for_task1156_bard_word_analogy": 35.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 62.0, + "eval_rougeL_for_task1158_bard_word_analogy": 30.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 33.1169, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.3158, + "eval_rougeL_for_task121_atomic_question_rewriting": 39.5414, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 30.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 17.7223, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.0991, + "eval_rougeL_for_task1356_xlsum_title_generation": 17.9434, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.5036, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 72.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 21.6714, + "eval_rougeL_for_task1407_dart_data_to_text": 31.0225, + "eval_rougeL_for_task1409_dart_data_to_text": 41.7594, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3656, + "eval_rougeL_for_task1439_doqa_answerability_classification": 42.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.4863, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.2729, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.9692, + "eval_rougeL_for_task1586_scifact_title_generation": 33.1965, + "eval_rougeL_for_task1598_nyc_data_to_text": 40.2002, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.2927, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 59.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 94.4283, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 30.4983, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.6667, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.0338, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 91.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 13.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 3.0, + "eval_rougeL_for_task219_rocstories_title_generation": 13.9105, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 53.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 52.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 93.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 67.0667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 34.0111, + "eval_rougeL_for_task288_gigaword_title_generation": 27.35, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 3.5, + "eval_rougeL_for_task329_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 70.219, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 81.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 29.5869, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 46.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 69.8832, + "eval_rougeL_for_task418_persent_title_generation": 25.1063, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3602, + "eval_rougeL_for_task500_scruples_title_generation": 26.7557, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.9618, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 58.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.3349, + "eval_rougeL_for_task602_wikitext_title_generation": 12.009, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 40.6078, + "eval_rougeL_for_task619_ohsumed_title_generation": 39.9675, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 47.9167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 57.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.9381, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 23.1667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.4272, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.4226, + "eval_rougeL_for_task677_ollie_data_to_text": 23.6716, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rougeL_for_task743_eurlex_title_generation": 24.9683, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.2304, + "eval_rougeL_for_task769_qed_title_generation": 86.7749, + "eval_rougeL_for_task827_copa_cause_effect_classification": 64.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 73.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 56.219, + "eval_rougeL_for_task892_gap_coreference_resolution": 49.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 71.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 67.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 62.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.5741, + "eval_rougeL_for_task970_sherliic_textual_entailment": 73.0, + "eval_rougeL_for_textual_entailment": 46.4722, + "eval_rougeL_for_title_generation": 35.5204, + "eval_rougeL_for_word_analogy": 38.3333, + "eval_runtime": 815.5509, + "eval_samples_per_second": 14.604, + "eval_steps_per_second": 0.913, + "step": 3000 + }, + { + "epoch": 0.76, + "learning_rate": 5e-05, + "loss": 0.9616, + "step": 3500 + }, + { + "epoch": 0.76, + "eval_exact_match": 32.0319, + "eval_exact_match_for_answerability_classification": 53.3846, + "eval_exact_match_for_cause_effect_classification": 49.5714, + "eval_exact_match_for_coreference_resolution": 42.2143, + "eval_exact_match_for_data_to_text": 7.6271, + "eval_exact_match_for_dialogue_act_recognition": 40.5714, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 44.0, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 3.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 8.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 2.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 61.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 32.0, + "eval_exact_match_for_task1153_bard_word_analogy": 33.0, + "eval_exact_match_for_task1154_bard_word_analogy": 23.0, + "eval_exact_match_for_task1155_bard_word_analogy": 68.0, + "eval_exact_match_for_task1156_bard_word_analogy": 48.0, + "eval_exact_match_for_task1157_bard_word_analogy": 65.0, + "eval_exact_match_for_task1158_bard_word_analogy": 36.0, + "eval_exact_match_for_task1159_bard_word_analogy": 28.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 33.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 5.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 52.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 29.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 86.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 33.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 55.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 36.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 81.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 17.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 3.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 51.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 82.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 64.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 4.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 64.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 57.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 6.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 22.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 36.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 2.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 68.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 79.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 73.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 74.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 66.0, + "eval_exact_match_for_textual_entailment": 42.3333, + "eval_exact_match_for_title_generation": 10.426, + "eval_exact_match_for_word_analogy": 41.625, + "eval_f1": 48.7626, + "eval_f1_for_answerability_classification": 55.9487, + "eval_f1_for_cause_effect_classification": 67.1505, + "eval_f1_for_coreference_resolution": 47.3874, + "eval_f1_for_data_to_text": 50.8858, + "eval_f1_for_dialogue_act_recognition": 42.4286, + "eval_f1_for_grammar_error_correction": 57.8068, + "eval_f1_for_keyword_tagging": 58.2997, + "eval_f1_for_overlap_extraction": 32.6229, + "eval_f1_for_question_rewriting": 70.2887, + "eval_f1_for_task020_mctaco_answerability_classification": 48.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 54.0, + "eval_f1_for_task034_winogrande_question_rewriting": 92.3898, + "eval_f1_for_task035_winogrande_question_rewriting": 90.8309, + "eval_f1_for_task036_qasc_keyword_tagging": 63.7843, + "eval_f1_for_task039_qasc_overlap_extraction": 36.0, + "eval_f1_for_task050_multirc_answerability_classification": 61.0, + "eval_f1_for_task102_commongen_data_to_text": 53.9688, + "eval_f1_for_task1152_bard_word_analogy": 32.0, + "eval_f1_for_task1153_bard_word_analogy": 33.0, + "eval_f1_for_task1154_bard_word_analogy": 23.0, + "eval_f1_for_task1155_bard_word_analogy": 68.0, + "eval_f1_for_task1156_bard_word_analogy": 48.6667, + "eval_f1_for_task1157_bard_word_analogy": 65.0, + "eval_f1_for_task1158_bard_word_analogy": 36.0, + "eval_f1_for_task1159_bard_word_analogy": 28.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.3079, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.3631, + "eval_f1_for_task121_atomic_question_rewriting": 45.4875, + "eval_f1_for_task133_winowhy_coreference_resolution": 33.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 16.532, + "eval_f1_for_task1344_rte_textual_entailment": 52.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.1792, + "eval_f1_for_task1356_xlsum_title_generation": 18.6386, + "eval_f1_for_task1358_xlsum_title_generation": 33.4567, + "eval_f1_for_task1385_anli_textual_entailment": 29.0, + "eval_f1_for_task1386_anli_textual_entailment": 30.0, + "eval_f1_for_task1387_anli_textual_entailment": 30.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 86.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 34.0973, + "eval_f1_for_task1409_dart_data_to_text": 48.6795, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.9268, + "eval_f1_for_task1439_doqa_answerability_classification": 33.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_f1_for_task1540_peer_read_title_generation": 37.3842, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.6868, + "eval_f1_for_task1562_zest_question_rewriting": 49.0477, + "eval_f1_for_task1586_scifact_title_generation": 37.2561, + "eval_f1_for_task1598_nyc_data_to_text": 49.8758, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.5842, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_f1_for_task1631_open_pi_data_to_text": 94.2192, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 33.3752, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 70.7333, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.9721, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 35.0, + "eval_f1_for_task200_multinli_textual_entailment": 81.0, + "eval_f1_for_task201_multinli_textual_entailment": 17.0, + "eval_f1_for_task202_multinli_textual_entailment": 3.0, + "eval_f1_for_task219_rocstories_title_generation": 9.9302, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 51.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 82.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 73.3, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 29.2458, + "eval_f1_for_task288_gigaword_title_generation": 28.5528, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 11.2667, + "eval_f1_for_task329_gap_coreference_resolution": 55.0, + "eval_f1_for_task330_gap_coreference_resolution": 72.3905, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 87.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 31.7963, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 37.3333, + "eval_f1_for_task402_grailqa_question_rewriting": 79.0211, + "eval_f1_for_task418_persent_title_generation": 25.7892, + "eval_f1_for_task442_com_qa_question_rewriting": 71.0438, + "eval_f1_for_task500_scruples_title_generation": 21.8582, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 35.9939, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.1414, + "eval_f1_for_task602_wikitext_title_generation": 8.2101, + "eval_f1_for_task613_liar_keyword_tagging": 21.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 30.5903, + "eval_f1_for_task619_ohsumed_title_generation": 44.8735, + "eval_f1_for_task620_ohsumed_keyword_tagging": 47.4333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 25.0, + "eval_f1_for_task642_e_snli_textual_entailment": 36.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.6143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 82.3578, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.8706, + "eval_f1_for_task677_ollie_data_to_text": 27.1868, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 26.4155, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.2525, + "eval_f1_for_task769_qed_title_generation": 85.898, + "eval_f1_for_task827_copa_cause_effect_classification": 79.0, + "eval_f1_for_task828_copa_cause_effect_classification": 73.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 45.7333, + "eval_f1_for_task892_gap_coreference_resolution": 46.0, + "eval_f1_for_task893_gap_coreference_resolution": 48.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 74.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task957_e2e_data_to_text": 53.4717, + "eval_f1_for_task970_sherliic_textual_entailment": 66.0, + "eval_f1_for_textual_entailment": 42.3333, + "eval_f1_for_title_generation": 35.5549, + "eval_f1_for_word_analogy": 41.7083, + "eval_gen_len": 8.7143, + "eval_global_step": 3500, + "eval_loss": 1.3259482383728027, + "eval_rouge1": 50.4851, + "eval_rouge1_for_answerability_classification": 55.9487, + "eval_rouge1_for_cause_effect_classification": 67.4397, + "eval_rouge1_for_coreference_resolution": 47.9871, + "eval_rouge1_for_data_to_text": 53.5827, + "eval_rouge1_for_dialogue_act_recognition": 47.3102, + "eval_rouge1_for_grammar_error_correction": 62.8533, + "eval_rouge1_for_keyword_tagging": 62.8317, + "eval_rouge1_for_overlap_extraction": 35.5442, + "eval_rouge1_for_question_rewriting": 71.8837, + "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 53.9, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.4299, + "eval_rouge1_for_task035_winogrande_question_rewriting": 91.4139, + "eval_rouge1_for_task036_qasc_keyword_tagging": 68.9108, + "eval_rouge1_for_task039_qasc_overlap_extraction": 41.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 61.0, + "eval_rouge1_for_task102_commongen_data_to_text": 68.002, + "eval_rouge1_for_task1152_bard_word_analogy": 32.0, + "eval_rouge1_for_task1153_bard_word_analogy": 33.0, + "eval_rouge1_for_task1154_bard_word_analogy": 23.0, + "eval_rouge1_for_task1155_bard_word_analogy": 68.0, + "eval_rouge1_for_task1156_bard_word_analogy": 48.6667, + "eval_rouge1_for_task1157_bard_word_analogy": 65.0, + "eval_rouge1_for_task1158_bard_word_analogy": 36.0, + "eval_rouge1_for_task1159_bard_word_analogy": 28.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.3213, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.6208, + "eval_rouge1_for_task121_atomic_question_rewriting": 47.6023, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 33.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 17.3679, + "eval_rouge1_for_task1344_rte_textual_entailment": 52.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2416, + "eval_rouge1_for_task1356_xlsum_title_generation": 21.437, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.9627, + "eval_rouge1_for_task1385_anli_textual_entailment": 29.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 86.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 23.8381, + "eval_rouge1_for_task1407_dart_data_to_text": 34.1792, + "eval_rouge1_for_task1409_dart_data_to_text": 49.6648, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.9394, + "eval_rouge1_for_task1439_doqa_answerability_classification": 33.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.5008, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.7671, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.6249, + "eval_rouge1_for_task1586_scifact_title_generation": 40.8985, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.3602, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.8876, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 94.3039, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.2048, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 70.7333, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.7207, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 81.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 17.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 3.0, + "eval_rouge1_for_task219_rocstories_title_generation": 13.673, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 51.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 82.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 74.3, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 30.0883, + "eval_rouge1_for_task288_gigaword_title_generation": 31.5762, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 11.8333, + "eval_rouge1_for_task329_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 72.319, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 87.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 31.7424, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 44.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 81.2345, + "eval_rouge1_for_task418_persent_title_generation": 28.5052, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.44, + "eval_rouge1_for_task500_scruples_title_generation": 23.17, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.409, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.7304, + "eval_rouge1_for_task602_wikitext_title_generation": 9.2063, + "eval_rouge1_for_task613_liar_keyword_tagging": 35.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.669, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.5497, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 51.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 36.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 83.1481, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.0775, + "eval_rouge1_for_task677_ollie_data_to_text": 29.5619, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.2574, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.2919, + "eval_rouge1_for_task769_qed_title_generation": 85.4717, + "eval_rouge1_for_task827_copa_cause_effect_classification": 79.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 73.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 45.7333, + "eval_rouge1_for_task892_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 74.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task957_e2e_data_to_text": 53.9441, + "eval_rouge1_for_task970_sherliic_textual_entailment": 66.0, + "eval_rouge1_for_textual_entailment": 44.1944, + "eval_rouge1_for_title_generation": 37.6552, + "eval_rouge1_for_word_analogy": 41.7083, + "eval_rougeL": 48.9722, + "eval_rougeL_for_answerability_classification": 55.9487, + "eval_rougeL_for_cause_effect_classification": 66.8984, + "eval_rougeL_for_coreference_resolution": 47.9871, + "eval_rougeL_for_data_to_text": 45.995, + "eval_rougeL_for_dialogue_act_recognition": 47.1816, + "eval_rougeL_for_grammar_error_correction": 62.3051, + "eval_rougeL_for_keyword_tagging": 62.1965, + "eval_rougeL_for_overlap_extraction": 35.0874, + "eval_rougeL_for_question_rewriting": 67.9934, + "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 53.9, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.8719, + "eval_rougeL_for_task035_winogrande_question_rewriting": 91.1509, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.218, + "eval_rougeL_for_task039_qasc_overlap_extraction": 41.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 61.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.0805, + "eval_rougeL_for_task1152_bard_word_analogy": 32.0, + "eval_rougeL_for_task1153_bard_word_analogy": 33.0, + "eval_rougeL_for_task1154_bard_word_analogy": 23.0, + "eval_rougeL_for_task1155_bard_word_analogy": 68.0, + "eval_rougeL_for_task1156_bard_word_analogy": 48.6667, + "eval_rougeL_for_task1157_bard_word_analogy": 65.0, + "eval_rougeL_for_task1158_bard_word_analogy": 36.0, + "eval_rougeL_for_task1159_bard_word_analogy": 28.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 32.4747, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.5052, + "eval_rougeL_for_task121_atomic_question_rewriting": 41.7842, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 33.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 16.5706, + "eval_rougeL_for_task1344_rte_textual_entailment": 52.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.5459, + "eval_rougeL_for_task1356_xlsum_title_generation": 18.8256, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.0185, + "eval_rougeL_for_task1385_anli_textual_entailment": 29.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 64.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 86.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 22.9381, + "eval_rougeL_for_task1407_dart_data_to_text": 28.9226, + "eval_rougeL_for_task1409_dart_data_to_text": 42.5276, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.7079, + "eval_rougeL_for_task1439_doqa_answerability_classification": 33.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 36.0352, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.9022, + "eval_rougeL_for_task1562_zest_question_rewriting": 44.2567, + "eval_rougeL_for_task1586_scifact_title_generation": 33.2789, + "eval_rougeL_for_task1598_nyc_data_to_text": 42.1087, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 68.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.295, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 91.6552, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.8596, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 70.7333, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 53.5, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 81.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 17.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 3.0, + "eval_rougeL_for_task219_rocstories_title_generation": 13.673, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 51.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 82.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 74.3, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 29.1748, + "eval_rougeL_for_task288_gigaword_title_generation": 27.5927, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 11.8333, + "eval_rougeL_for_task329_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 72.319, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 87.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.6667, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 30.9168, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 44.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.4862, + "eval_rougeL_for_task418_persent_title_generation": 24.6498, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3918, + "eval_rougeL_for_task500_scruples_title_generation": 21.5557, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.0741, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.8781, + "eval_rougeL_for_task602_wikitext_title_generation": 9.2063, + "eval_rougeL_for_task613_liar_keyword_tagging": 35.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 29.7054, + "eval_rougeL_for_task619_ohsumed_title_generation": 39.6971, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 49.65, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 65.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 36.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.1143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 2.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 82.1422, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 61.4973, + "eval_rougeL_for_task677_ollie_data_to_text": 24.3949, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.2021, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1076, + "eval_rougeL_for_task769_qed_title_generation": 85.4717, + "eval_rougeL_for_task827_copa_cause_effect_classification": 79.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 73.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 44.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 36.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 45.7333, + "eval_rougeL_for_task892_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 73.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 74.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.9213, + "eval_rougeL_for_task970_sherliic_textual_entailment": 66.0, + "eval_rougeL_for_textual_entailment": 43.7917, + "eval_rougeL_for_title_generation": 34.5623, + "eval_rougeL_for_word_analogy": 41.7083, + "eval_runtime": 819.7363, + "eval_samples_per_second": 14.529, + "eval_steps_per_second": 0.909, + "step": 3500 + }, + { + "epoch": 0.87, + "learning_rate": 5e-05, + "loss": 0.924, + "step": 4000 + }, + { + "epoch": 0.87, + "eval_exact_match": 32.3006, + "eval_exact_match_for_answerability_classification": 56.8462, + "eval_exact_match_for_cause_effect_classification": 50.8571, + "eval_exact_match_for_coreference_resolution": 40.5714, + "eval_exact_match_for_data_to_text": 9.201, + "eval_exact_match_for_dialogue_act_recognition": 42.4286, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 45.4, + "eval_exact_match_for_overlap_extraction": 17.0, + "eval_exact_match_for_question_rewriting": 4.3636, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 54.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 9.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 34.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 67.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 32.0, + "eval_exact_match_for_task1153_bard_word_analogy": 40.0, + "eval_exact_match_for_task1154_bard_word_analogy": 25.0, + "eval_exact_match_for_task1155_bard_word_analogy": 73.0, + "eval_exact_match_for_task1156_bard_word_analogy": 39.0, + "eval_exact_match_for_task1157_bard_word_analogy": 64.0, + "eval_exact_match_for_task1158_bard_word_analogy": 38.0, + "eval_exact_match_for_task1159_bard_word_analogy": 32.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 10.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 4.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 60.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 15.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 19.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 15.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 36.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 74.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 44.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 31.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 14.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 68.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 41.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 7.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 48.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 91.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 1.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 96.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 41.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 43.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 95.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 53.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 59.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 53.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 68.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 53.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 68.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 70.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 65.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 35.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 2.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 64.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 7.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 22.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 25.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 7.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 52.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 67.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 76.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 71.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 54.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 55.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 63.0, + "eval_exact_match_for_textual_entailment": 39.8333, + "eval_exact_match_for_title_generation": 10.5381, + "eval_exact_match_for_word_analogy": 42.875, + "eval_f1": 48.6628, + "eval_f1_for_answerability_classification": 58.9487, + "eval_f1_for_cause_effect_classification": 65.9783, + "eval_f1_for_coreference_resolution": 45.0517, + "eval_f1_for_data_to_text": 51.1915, + "eval_f1_for_dialogue_act_recognition": 44.7143, + "eval_f1_for_grammar_error_correction": 56.5771, + "eval_f1_for_keyword_tagging": 59.1652, + "eval_f1_for_overlap_extraction": 40.2203, + "eval_f1_for_question_rewriting": 69.5843, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 56.6667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.4469, + "eval_f1_for_task035_winogrande_question_rewriting": 88.6732, + "eval_f1_for_task036_qasc_keyword_tagging": 57.1117, + "eval_f1_for_task039_qasc_overlap_extraction": 47.7333, + "eval_f1_for_task050_multirc_answerability_classification": 67.0, + "eval_f1_for_task102_commongen_data_to_text": 54.3955, + "eval_f1_for_task1152_bard_word_analogy": 32.0, + "eval_f1_for_task1153_bard_word_analogy": 40.0, + "eval_f1_for_task1154_bard_word_analogy": 25.0, + "eval_f1_for_task1155_bard_word_analogy": 73.0, + "eval_f1_for_task1156_bard_word_analogy": 39.6667, + "eval_f1_for_task1157_bard_word_analogy": 64.0, + "eval_f1_for_task1158_bard_word_analogy": 38.0, + "eval_f1_for_task1159_bard_word_analogy": 32.0, + "eval_f1_for_task1161_coda_19_title_generation": 32.2533, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.2153, + "eval_f1_for_task121_atomic_question_rewriting": 42.8244, + "eval_f1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 16.8991, + "eval_f1_for_task1344_rte_textual_entailment": 60.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.1278, + "eval_f1_for_task1356_xlsum_title_generation": 22.047, + "eval_f1_for_task1358_xlsum_title_generation": 33.7611, + "eval_f1_for_task1385_anli_textual_entailment": 15.0, + "eval_f1_for_task1386_anli_textual_entailment": 19.0, + "eval_f1_for_task1387_anli_textual_entailment": 15.0, + "eval_f1_for_task1388_cb_textual_entailment": 36.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 74.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 36.0363, + "eval_f1_for_task1409_dart_data_to_text": 48.1426, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6636, + "eval_f1_for_task1439_doqa_answerability_classification": 44.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 31.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 36.5794, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4905, + "eval_f1_for_task1562_zest_question_rewriting": 49.854, + "eval_f1_for_task1586_scifact_title_generation": 34.4545, + "eval_f1_for_task1598_nyc_data_to_text": 50.5286, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.8586, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_f1_for_task1631_open_pi_data_to_text": 96.4553, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 32.8496, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 74.9524, + "eval_f1_for_task1728_web_nlg_data_to_text": 57.8692, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 48.0, + "eval_f1_for_task200_multinli_textual_entailment": 91.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 1.0, + "eval_f1_for_task219_rocstories_title_generation": 15.1976, + "eval_f1_for_task220_rocstories_title_generation": 96.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_f1_for_task232_iirc_answerability_classification": 41.0, + "eval_f1_for_task233_iirc_answerability_classification": 43.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 95.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 62.3, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.7073, + "eval_f1_for_task288_gigaword_title_generation": 30.8025, + "eval_f1_for_task290_tellmewhy_answerability_classification": 86.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 1.6667, + "eval_f1_for_task329_gap_coreference_resolution": 53.0, + "eval_f1_for_task330_gap_coreference_resolution": 75.5714, + "eval_f1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 84.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 29.6901, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 38.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 78.17, + "eval_f1_for_task418_persent_title_generation": 29.0123, + "eval_f1_for_task442_com_qa_question_rewriting": 71.0299, + "eval_f1_for_task500_scruples_title_generation": 25.6271, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 38.4471, + "eval_f1_for_task520_aquamuse_answerability_classification": 64.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 38.608, + "eval_f1_for_task602_wikitext_title_generation": 8.2101, + "eval_f1_for_task613_liar_keyword_tagging": 22.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 32.8248, + "eval_f1_for_task619_ohsumed_title_generation": 43.9199, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.3333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_f1_for_task640_e_snli_textual_entailment": 25.0, + "eval_f1_for_task641_e_snli_textual_entailment": 7.0, + "eval_f1_for_task642_e_snli_textual_entailment": 39.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.7143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 4.5, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.2148, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.0122, + "eval_f1_for_task677_ollie_data_to_text": 25.9049, + "eval_f1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_f1_for_task743_eurlex_title_generation": 26.7082, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.8343, + "eval_f1_for_task769_qed_title_generation": 86.4097, + "eval_f1_for_task827_copa_cause_effect_classification": 76.0, + "eval_f1_for_task828_copa_cause_effect_classification": 71.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 27.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.9, + "eval_f1_for_task892_gap_coreference_resolution": 50.0, + "eval_f1_for_task893_gap_coreference_resolution": 55.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task957_e2e_data_to_text": 53.2923, + "eval_f1_for_task970_sherliic_textual_entailment": 63.0, + "eval_f1_for_textual_entailment": 39.8333, + "eval_f1_for_title_generation": 36.2373, + "eval_f1_for_word_analogy": 42.9583, + "eval_gen_len": 8.7158, + "eval_global_step": 4000, + "eval_loss": 1.3395925760269165, + "eval_rouge1": 50.4486, + "eval_rouge1_for_answerability_classification": 58.9487, + "eval_rouge1_for_cause_effect_classification": 66.1518, + "eval_rouge1_for_coreference_resolution": 45.7371, + "eval_rouge1_for_data_to_text": 53.6991, + "eval_rouge1_for_dialogue_act_recognition": 49.3435, + "eval_rouge1_for_grammar_error_correction": 61.752, + "eval_rouge1_for_keyword_tagging": 65.1605, + "eval_rouge1_for_overlap_extraction": 41.6705, + "eval_rouge1_for_question_rewriting": 71.181, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 56.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.5202, + "eval_rouge1_for_task035_winogrande_question_rewriting": 89.2628, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.1881, + "eval_rouge1_for_task039_qasc_overlap_extraction": 49.5667, + "eval_rouge1_for_task050_multirc_answerability_classification": 67.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.4778, + "eval_rouge1_for_task1152_bard_word_analogy": 32.0, + "eval_rouge1_for_task1153_bard_word_analogy": 40.0, + "eval_rouge1_for_task1154_bard_word_analogy": 25.0, + "eval_rouge1_for_task1155_bard_word_analogy": 73.0, + "eval_rouge1_for_task1156_bard_word_analogy": 41.0, + "eval_rouge1_for_task1157_bard_word_analogy": 64.0, + "eval_rouge1_for_task1158_bard_word_analogy": 38.0, + "eval_rouge1_for_task1159_bard_word_analogy": 32.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 36.1265, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.4276, + "eval_rouge1_for_task121_atomic_question_rewriting": 45.4061, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 17.3126, + "eval_rouge1_for_task1344_rte_textual_entailment": 60.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.1497, + "eval_rouge1_for_task1356_xlsum_title_generation": 26.5127, + "eval_rouge1_for_task1358_xlsum_title_generation": 38.2585, + "eval_rouge1_for_task1385_anli_textual_entailment": 15.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 19.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 15.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 36.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 74.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 23.4048, + "eval_rouge1_for_task1407_dart_data_to_text": 36.7075, + "eval_rouge1_for_task1409_dart_data_to_text": 48.8674, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9052, + "eval_rouge1_for_task1439_doqa_answerability_classification": 44.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 31.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 40.7678, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5987, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.4779, + "eval_rouge1_for_task1586_scifact_title_generation": 38.1117, + "eval_rouge1_for_task1598_nyc_data_to_text": 52.8554, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 82.1289, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.7529, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.1857, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 74.9524, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 59.069, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 48.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 91.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 1.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.4611, + "eval_rouge1_for_task220_rocstories_title_generation": 96.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 41.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 43.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 95.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 62.9667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.7743, + "eval_rouge1_for_task288_gigaword_title_generation": 34.2609, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 86.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 1.6667, + "eval_rouge1_for_task329_gap_coreference_resolution": 53.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 75.5, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 84.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 29.6925, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 47.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.7386, + "eval_rouge1_for_task418_persent_title_generation": 31.8763, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.4139, + "eval_rouge1_for_task500_scruples_title_generation": 26.56, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 39.1003, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 64.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.8837, + "eval_rouge1_for_task602_wikitext_title_generation": 9.2063, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.5, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 34.0367, + "eval_rouge1_for_task619_ohsumed_title_generation": 46.4211, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 43.9, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 25.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 7.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.2143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 4.5, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.3479, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 62.1177, + "eval_rouge1_for_task677_ollie_data_to_text": 28.0092, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.4518, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.8644, + "eval_rouge1_for_task769_qed_title_generation": 86.4299, + "eval_rouge1_for_task827_copa_cause_effect_classification": 76.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 71.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.9, + "eval_rouge1_for_task892_gap_coreference_resolution": 50.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 55.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.5904, + "eval_rouge1_for_task970_sherliic_textual_entailment": 63.0, + "eval_rouge1_for_textual_entailment": 41.6944, + "eval_rouge1_for_title_generation": 38.5905, + "eval_rouge1_for_word_analogy": 43.125, + "eval_rougeL": 49.089, + "eval_rougeL_for_answerability_classification": 58.9487, + "eval_rougeL_for_cause_effect_classification": 65.5279, + "eval_rougeL_for_coreference_resolution": 45.7371, + "eval_rougeL_for_data_to_text": 46.7015, + "eval_rougeL_for_dialogue_act_recognition": 49.215, + "eval_rougeL_for_grammar_error_correction": 61.0541, + "eval_rougeL_for_keyword_tagging": 64.5102, + "eval_rougeL_for_overlap_extraction": 40.8832, + "eval_rougeL_for_question_rewriting": 67.8433, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 56.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.1103, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.4105, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.0869, + "eval_rougeL_for_task039_qasc_overlap_extraction": 49.5667, + "eval_rougeL_for_task050_multirc_answerability_classification": 67.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.4583, + "eval_rougeL_for_task1152_bard_word_analogy": 32.0, + "eval_rougeL_for_task1153_bard_word_analogy": 40.0, + "eval_rougeL_for_task1154_bard_word_analogy": 25.0, + "eval_rougeL_for_task1155_bard_word_analogy": 73.0, + "eval_rougeL_for_task1156_bard_word_analogy": 41.0, + "eval_rougeL_for_task1157_bard_word_analogy": 64.0, + "eval_rougeL_for_task1158_bard_word_analogy": 38.0, + "eval_rougeL_for_task1159_bard_word_analogy": 32.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 30.7814, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.4072, + "eval_rougeL_for_task121_atomic_question_rewriting": 39.5793, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 0.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 16.7443, + "eval_rougeL_for_task1344_rte_textual_entailment": 60.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.1494, + "eval_rougeL_for_task1356_xlsum_title_generation": 22.8261, + "eval_rougeL_for_task1358_xlsum_title_generation": 32.1612, + "eval_rougeL_for_task1385_anli_textual_entailment": 15.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 19.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 15.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 36.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 74.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 22.5048, + "eval_rougeL_for_task1407_dart_data_to_text": 30.4648, + "eval_rougeL_for_task1409_dart_data_to_text": 43.3756, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.3745, + "eval_rougeL_for_task1439_doqa_answerability_classification": 44.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 31.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 35.4703, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7338, + "eval_rougeL_for_task1562_zest_question_rewriting": 47.1564, + "eval_rougeL_for_task1586_scifact_title_generation": 31.9314, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.1304, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.7934, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 96.0934, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.0921, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 74.9524, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 52.6747, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 48.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 91.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 1.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.4611, + "eval_rougeL_for_task220_rocstories_title_generation": 96.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 54.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 41.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 43.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 95.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 62.9667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.1996, + "eval_rougeL_for_task288_gigaword_title_generation": 30.0341, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 86.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 1.6667, + "eval_rougeL_for_task329_gap_coreference_resolution": 53.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 75.5, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 53.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 84.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 90.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 88.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.0076, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 47.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 68.9469, + "eval_rougeL_for_task418_persent_title_generation": 26.925, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.3657, + "eval_rougeL_for_task500_scruples_title_generation": 25.2842, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 38.7614, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 64.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.9538, + "eval_rougeL_for_task602_wikitext_title_generation": 8.9683, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.5, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 31.3543, + "eval_rougeL_for_task619_ohsumed_title_generation": 38.9652, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.15, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 86.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 25.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 7.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.8143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 4.5, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.589, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 59.7678, + "eval_rougeL_for_task677_ollie_data_to_text": 23.2281, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 52.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.1649, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.8131, + "eval_rougeL_for_task769_qed_title_generation": 86.4299, + "eval_rougeL_for_task827_copa_cause_effect_classification": 76.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 71.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 58.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 39.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.9, + "eval_rougeL_for_task892_gap_coreference_resolution": 50.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 55.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 65.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.1179, + "eval_rougeL_for_task970_sherliic_textual_entailment": 63.0, + "eval_rougeL_for_textual_entailment": 41.6944, + "eval_rougeL_for_title_generation": 35.4551, + "eval_rougeL_for_word_analogy": 43.125, + "eval_runtime": 824.1536, + "eval_samples_per_second": 14.451, + "eval_steps_per_second": 0.904, + "step": 4000 + }, + { + "epoch": 0.98, + "learning_rate": 5e-05, + "loss": 0.9446, + "step": 4500 + }, + { + "epoch": 0.98, + "eval_exact_match": 32.9471, + "eval_exact_match_for_answerability_classification": 57.2308, + "eval_exact_match_for_cause_effect_classification": 46.8571, + "eval_exact_match_for_coreference_resolution": 43.8571, + "eval_exact_match_for_data_to_text": 7.9903, + "eval_exact_match_for_dialogue_act_recognition": 40.5714, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 44.6, + "eval_exact_match_for_overlap_extraction": 15.5, + "eval_exact_match_for_question_rewriting": 3.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 55.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 58.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 3.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 40.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 31.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 33.0, + "eval_exact_match_for_task1153_bard_word_analogy": 43.0, + "eval_exact_match_for_task1154_bard_word_analogy": 30.0, + "eval_exact_match_for_task1155_bard_word_analogy": 89.0, + "eval_exact_match_for_task1156_bard_word_analogy": 50.0, + "eval_exact_match_for_task1157_bard_word_analogy": 67.0, + "eval_exact_match_for_task1158_bard_word_analogy": 35.0, + "eval_exact_match_for_task1159_bard_word_analogy": 32.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 7.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 30.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 3.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 58.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 22.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 22.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 39.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 66.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 90.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 40.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 32.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 3.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 12.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 59.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 46.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 51.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 92.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 10.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 2.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 58.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 94.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 66.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 2.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 62.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 59.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 73.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 52.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 39.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 60.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 10.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 25.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 49.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 62.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 69.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 34.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 57.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 71.0, + "eval_exact_match_for_textual_entailment": 42.375, + "eval_exact_match_for_title_generation": 10.5381, + "eval_exact_match_for_word_analogy": 47.375, + "eval_f1": 49.9412, + "eval_f1_for_answerability_classification": 59.8462, + "eval_f1_for_cause_effect_classification": 65.7962, + "eval_f1_for_coreference_resolution": 50.2158, + "eval_f1_for_data_to_text": 51.135, + "eval_f1_for_dialogue_act_recognition": 42.5, + "eval_f1_for_grammar_error_correction": 57.1389, + "eval_f1_for_keyword_tagging": 57.4218, + "eval_f1_for_overlap_extraction": 35.4038, + "eval_f1_for_question_rewriting": 70.2686, + "eval_f1_for_task020_mctaco_answerability_classification": 55.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 62.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.5214, + "eval_f1_for_task035_winogrande_question_rewriting": 89.6529, + "eval_f1_for_task036_qasc_keyword_tagging": 74.7138, + "eval_f1_for_task039_qasc_overlap_extraction": 42.1667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 51.4662, + "eval_f1_for_task1152_bard_word_analogy": 33.0, + "eval_f1_for_task1153_bard_word_analogy": 43.0, + "eval_f1_for_task1154_bard_word_analogy": 30.0, + "eval_f1_for_task1155_bard_word_analogy": 89.0, + "eval_f1_for_task1156_bard_word_analogy": 50.0, + "eval_f1_for_task1157_bard_word_analogy": 67.0, + "eval_f1_for_task1158_bard_word_analogy": 35.0, + "eval_f1_for_task1159_bard_word_analogy": 32.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.7192, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5337, + "eval_f1_for_task121_atomic_question_rewriting": 45.6578, + "eval_f1_for_task133_winowhy_coreference_resolution": 30.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 15.098, + "eval_f1_for_task1344_rte_textual_entailment": 58.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.9777, + "eval_f1_for_task1356_xlsum_title_generation": 19.8139, + "eval_f1_for_task1358_xlsum_title_generation": 35.5179, + "eval_f1_for_task1385_anli_textual_entailment": 26.0, + "eval_f1_for_task1386_anli_textual_entailment": 22.0, + "eval_f1_for_task1387_anli_textual_entailment": 22.0, + "eval_f1_for_task1388_cb_textual_entailment": 39.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 66.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 90.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 34.1878, + "eval_f1_for_task1409_dart_data_to_text": 46.8304, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 30.1155, + "eval_f1_for_task1439_doqa_answerability_classification": 40.0, + "eval_f1_for_task1442_doqa_answerability_classification": 59.0, + "eval_f1_for_task1516_imppres_textual_entailment": 32.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 41.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.8989, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1622, + "eval_f1_for_task1562_zest_question_rewriting": 49.5003, + "eval_f1_for_task1586_scifact_title_generation": 34.0489, + "eval_f1_for_task1598_nyc_data_to_text": 49.5476, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 81.4929, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 95.8106, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_f1_for_task1659_billsum_title_generation": 33.0166, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 79.6, + "eval_f1_for_task1728_web_nlg_data_to_text": 59.5834, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 51.0, + "eval_f1_for_task200_multinli_textual_entailment": 92.0, + "eval_f1_for_task201_multinli_textual_entailment": 10.0, + "eval_f1_for_task202_multinli_textual_entailment": 2.0, + "eval_f1_for_task219_rocstories_title_generation": 9.0167, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, + "eval_f1_for_task232_iirc_answerability_classification": 58.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 94.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 76.9, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 28.641, + "eval_f1_for_task288_gigaword_title_generation": 32.8041, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 7.8333, + "eval_f1_for_task329_gap_coreference_resolution": 38.0, + "eval_f1_for_task330_gap_coreference_resolution": 71.5714, + "eval_f1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.49, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 43.5, + "eval_f1_for_task402_grailqa_question_rewriting": 80.6993, + "eval_f1_for_task418_persent_title_generation": 26.651, + "eval_f1_for_task442_com_qa_question_rewriting": 71.0346, + "eval_f1_for_task500_scruples_title_generation": 24.2693, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.9663, + "eval_f1_for_task520_aquamuse_answerability_classification": 60.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 42.2298, + "eval_f1_for_task602_wikitext_title_generation": 7.836, + "eval_f1_for_task613_liar_keyword_tagging": 25.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 40.0837, + "eval_f1_for_task619_ohsumed_title_generation": 45.1469, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.4, + "eval_f1_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 31.0, + "eval_f1_for_task642_e_snli_textual_entailment": 38.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.3286, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 18.55, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.891, + "eval_f1_for_task671_ambigqa_question_rewriting": 61.9932, + "eval_f1_for_task677_ollie_data_to_text": 32.0334, + "eval_f1_for_task738_perspectrum_textual_entailment": 49.0, + "eval_f1_for_task743_eurlex_title_generation": 25.4772, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.1286, + "eval_f1_for_task769_qed_title_generation": 87.1261, + "eval_f1_for_task827_copa_cause_effect_classification": 62.0, + "eval_f1_for_task828_copa_cause_effect_classification": 69.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 34.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.5667, + "eval_f1_for_task892_gap_coreference_resolution": 47.0, + "eval_f1_for_task893_gap_coreference_resolution": 57.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_f1_for_task957_e2e_data_to_text": 52.8825, + "eval_f1_for_task970_sherliic_textual_entailment": 71.0, + "eval_f1_for_textual_entailment": 42.375, + "eval_f1_for_title_generation": 36.1762, + "eval_f1_for_word_analogy": 47.375, + "eval_gen_len": 8.3693, + "eval_global_step": 4500, + "eval_loss": 1.3463222980499268, + "eval_rouge1": 51.6785, + "eval_rouge1_for_answerability_classification": 59.8462, + "eval_rouge1_for_cause_effect_classification": 66.2898, + "eval_rouge1_for_coreference_resolution": 50.8119, + "eval_rouge1_for_data_to_text": 54.0703, + "eval_rouge1_for_dialogue_act_recognition": 47.2054, + "eval_rouge1_for_grammar_error_correction": 62.069, + "eval_rouge1_for_keyword_tagging": 62.3616, + "eval_rouge1_for_overlap_extraction": 36.7984, + "eval_rouge1_for_question_rewriting": 71.8614, + "eval_rouge1_for_task020_mctaco_answerability_classification": 55.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 62.5, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.5604, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.2134, + "eval_rouge1_for_task036_qasc_keyword_tagging": 79.3797, + "eval_rouge1_for_task039_qasc_overlap_extraction": 44.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.1698, + "eval_rouge1_for_task1152_bard_word_analogy": 33.0, + "eval_rouge1_for_task1153_bard_word_analogy": 43.0, + "eval_rouge1_for_task1154_bard_word_analogy": 30.0, + "eval_rouge1_for_task1155_bard_word_analogy": 89.0, + "eval_rouge1_for_task1156_bard_word_analogy": 50.0, + "eval_rouge1_for_task1157_bard_word_analogy": 67.0, + "eval_rouge1_for_task1158_bard_word_analogy": 35.0, + "eval_rouge1_for_task1159_bard_word_analogy": 32.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.8867, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8638, + "eval_rouge1_for_task121_atomic_question_rewriting": 47.9813, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 30.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 16.008, + "eval_rouge1_for_task1344_rte_textual_entailment": 58.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2085, + "eval_rouge1_for_task1356_xlsum_title_generation": 24.1363, + "eval_rouge1_for_task1358_xlsum_title_generation": 39.9072, + "eval_rouge1_for_task1385_anli_textual_entailment": 26.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 22.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 22.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 39.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 66.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 90.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 22.2714, + "eval_rouge1_for_task1407_dart_data_to_text": 34.7105, + "eval_rouge1_for_task1409_dart_data_to_text": 47.2564, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9198, + "eval_rouge1_for_task1439_doqa_answerability_classification": 40.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 32.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.7436, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.2182, + "eval_rouge1_for_task1562_zest_question_rewriting": 52.7392, + "eval_rouge1_for_task1586_scifact_title_generation": 38.407, + "eval_rouge1_for_task1598_nyc_data_to_text": 51.8675, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 76.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 81.7238, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.141, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.0887, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 79.6, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 60.8526, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 51.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 92.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 10.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 2.0, + "eval_rouge1_for_task219_rocstories_title_generation": 12.5127, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 58.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 94.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 77.15, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 29.5967, + "eval_rouge1_for_task288_gigaword_title_generation": 36.1333, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 10.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 38.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 71.5, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.4662, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 48.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 82.9119, + "eval_rouge1_for_task418_persent_title_generation": 29.9217, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.3197, + "eval_rouge1_for_task500_scruples_title_generation": 25.2463, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 37.363, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 60.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 43.4997, + "eval_rouge1_for_task602_wikitext_title_generation": 8.8322, + "eval_rouge1_for_task613_liar_keyword_tagging": 41.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 43.5626, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.4035, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.6, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.8286, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 18.35, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.8029, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.1511, + "eval_rouge1_for_task677_ollie_data_to_text": 34.9654, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 49.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.2629, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.1589, + "eval_rouge1_for_task769_qed_title_generation": 87.1603, + "eval_rouge1_for_task827_copa_cause_effect_classification": 62.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 69.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, + "eval_rouge1_for_task890_gwsd_textual_entailment": 34.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.8, + "eval_rouge1_for_task892_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 57.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.6159, + "eval_rouge1_for_task970_sherliic_textual_entailment": 71.0, + "eval_rouge1_for_textual_entailment": 44.1806, + "eval_rouge1_for_title_generation": 38.4025, + "eval_rouge1_for_word_analogy": 47.375, + "eval_rougeL": 50.3038, + "eval_rougeL_for_answerability_classification": 59.8462, + "eval_rougeL_for_cause_effect_classification": 65.71, + "eval_rougeL_for_coreference_resolution": 50.8119, + "eval_rougeL_for_data_to_text": 46.925, + "eval_rougeL_for_dialogue_act_recognition": 47.0769, + "eval_rougeL_for_grammar_error_correction": 61.4579, + "eval_rougeL_for_keyword_tagging": 61.9574, + "eval_rougeL_for_overlap_extraction": 36.5482, + "eval_rougeL_for_question_rewriting": 68.2844, + "eval_rougeL_for_task020_mctaco_answerability_classification": 55.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 62.5, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.2884, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.6702, + "eval_rougeL_for_task036_qasc_keyword_tagging": 78.8416, + "eval_rougeL_for_task039_qasc_overlap_extraction": 44.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.1639, + "eval_rougeL_for_task1152_bard_word_analogy": 33.0, + "eval_rougeL_for_task1153_bard_word_analogy": 43.0, + "eval_rougeL_for_task1154_bard_word_analogy": 30.0, + "eval_rougeL_for_task1155_bard_word_analogy": 89.0, + "eval_rougeL_for_task1156_bard_word_analogy": 50.0, + "eval_rougeL_for_task1157_bard_word_analogy": 67.0, + "eval_rougeL_for_task1158_bard_word_analogy": 35.0, + "eval_rougeL_for_task1159_bard_word_analogy": 32.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 33.145, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1565, + "eval_rougeL_for_task121_atomic_question_rewriting": 41.546, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 30.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 15.3215, + "eval_rougeL_for_task1344_rte_textual_entailment": 58.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.3171, + "eval_rougeL_for_task1356_xlsum_title_generation": 21.0322, + "eval_rougeL_for_task1358_xlsum_title_generation": 33.6496, + "eval_rougeL_for_task1385_anli_textual_entailment": 26.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 22.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 22.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 39.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 66.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 90.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 21.3714, + "eval_rougeL_for_task1407_dart_data_to_text": 29.9855, + "eval_rougeL_for_task1409_dart_data_to_text": 41.161, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5625, + "eval_rougeL_for_task1439_doqa_answerability_classification": 40.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 32.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 38.0901, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.3532, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.4086, + "eval_rougeL_for_task1586_scifact_title_generation": 31.0979, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.8315, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 74.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 80.4717, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 95.1243, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.9601, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 79.6, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 54.9261, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 51.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 92.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 10.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 2.0, + "eval_rougeL_for_task219_rocstories_title_generation": 12.5127, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 55.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 58.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 94.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 77.15, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 29.0964, + "eval_rougeL_for_task288_gigaword_title_generation": 31.1698, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 10.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 38.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 71.5, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 59.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 86.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 84.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.9802, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 48.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 69.5725, + "eval_rougeL_for_task418_persent_title_generation": 25.9512, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2715, + "eval_rougeL_for_task500_scruples_title_generation": 23.8366, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.7772, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 60.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 41.7196, + "eval_rougeL_for_task602_wikitext_title_generation": 8.5941, + "eval_rougeL_for_task613_liar_keyword_tagging": 41.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 40.9901, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.7433, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.1167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 55.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.8286, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 18.35, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.8184, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.607, + "eval_rougeL_for_task677_ollie_data_to_text": 28.5123, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 49.0, + "eval_rougeL_for_task743_eurlex_title_generation": 24.3992, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.1589, + "eval_rougeL_for_task769_qed_title_generation": 87.1603, + "eval_rougeL_for_task827_copa_cause_effect_classification": 62.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 69.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 53.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 38.6667, + "eval_rougeL_for_task890_gwsd_textual_entailment": 34.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.8, + "eval_rougeL_for_task892_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 57.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 64.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 66.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 63.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.8544, + "eval_rougeL_for_task970_sherliic_textual_entailment": 71.0, + "eval_rougeL_for_textual_entailment": 44.0833, + "eval_rougeL_for_title_generation": 35.3579, + "eval_rougeL_for_word_analogy": 47.375, + "eval_runtime": 780.4718, + "eval_samples_per_second": 15.26, + "eval_steps_per_second": 0.955, + "step": 4500 + }, + { + "epoch": 1.09, + "learning_rate": 5e-05, + "loss": 0.7759, + "step": 5000 + }, + { + "epoch": 1.09, + "eval_exact_match": 31.9395, + "eval_exact_match_for_answerability_classification": 57.7692, + "eval_exact_match_for_cause_effect_classification": 43.5714, + "eval_exact_match_for_coreference_resolution": 43.1429, + "eval_exact_match_for_data_to_text": 8.8378, + "eval_exact_match_for_dialogue_act_recognition": 35.4286, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 45.8, + "eval_exact_match_for_overlap_extraction": 19.0, + "eval_exact_match_for_question_rewriting": 2.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 6.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 21.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 38.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 69.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 27.0, + "eval_exact_match_for_task1153_bard_word_analogy": 35.0, + "eval_exact_match_for_task1154_bard_word_analogy": 26.0, + "eval_exact_match_for_task1155_bard_word_analogy": 75.0, + "eval_exact_match_for_task1156_bard_word_analogy": 47.0, + "eval_exact_match_for_task1157_bard_word_analogy": 66.0, + "eval_exact_match_for_task1158_bard_word_analogy": 43.0, + "eval_exact_match_for_task1159_bard_word_analogy": 30.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_atomic_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 25.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 5.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 72.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 24.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 24.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 23.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 38.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 77.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 33.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 15.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 2.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 32.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 65.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 41.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 6.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 42.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 87.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 12.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 3.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 98.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 92.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 62.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 49.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 66.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 57.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 53.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 55.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 40.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 3.0, + "eval_exact_match_for_task418_persent_title_generation": 1.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 55.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 8.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 21.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 82.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 30.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 10.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 91.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 2.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 43.0, + "eval_exact_match_for_task743_eurlex_title_generation": 2.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 74.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 66.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 51.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 61.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 73.0, + "eval_exact_match_for_textual_entailment": 40.4167, + "eval_exact_match_for_title_generation": 11.0426, + "eval_exact_match_for_word_analogy": 43.625, + "eval_f1": 48.9892, + "eval_f1_for_answerability_classification": 60.3846, + "eval_f1_for_cause_effect_classification": 62.0067, + "eval_f1_for_coreference_resolution": 49.2667, + "eval_f1_for_data_to_text": 50.9876, + "eval_f1_for_dialogue_act_recognition": 38.5714, + "eval_f1_for_grammar_error_correction": 56.8748, + "eval_f1_for_keyword_tagging": 60.9542, + "eval_f1_for_overlap_extraction": 42.2249, + "eval_f1_for_question_rewriting": 69.9737, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 57.0, + "eval_f1_for_task034_winogrande_question_rewriting": 90.5841, + "eval_f1_for_task035_winogrande_question_rewriting": 90.0224, + "eval_f1_for_task036_qasc_keyword_tagging": 67.5567, + "eval_f1_for_task039_qasc_overlap_extraction": 51.1667, + "eval_f1_for_task050_multirc_answerability_classification": 69.0, + "eval_f1_for_task102_commongen_data_to_text": 51.4398, + "eval_f1_for_task1152_bard_word_analogy": 27.0, + "eval_f1_for_task1153_bard_word_analogy": 35.6667, + "eval_f1_for_task1154_bard_word_analogy": 26.0, + "eval_f1_for_task1155_bard_word_analogy": 75.0, + "eval_f1_for_task1156_bard_word_analogy": 47.0, + "eval_f1_for_task1157_bard_word_analogy": 66.0, + "eval_f1_for_task1158_bard_word_analogy": 43.0, + "eval_f1_for_task1159_bard_word_analogy": 30.0, + "eval_f1_for_task1161_coda_19_title_generation": 34.5717, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.4842, + "eval_f1_for_task121_atomic_question_rewriting": 49.7587, + "eval_f1_for_task133_winowhy_coreference_resolution": 25.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 17.8101, + "eval_f1_for_task1344_rte_textual_entailment": 72.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.1188, + "eval_f1_for_task1356_xlsum_title_generation": 19.663, + "eval_f1_for_task1358_xlsum_title_generation": 33.5356, + "eval_f1_for_task1385_anli_textual_entailment": 24.0, + "eval_f1_for_task1386_anli_textual_entailment": 24.0, + "eval_f1_for_task1387_anli_textual_entailment": 23.0, + "eval_f1_for_task1388_cb_textual_entailment": 38.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 33.7275, + "eval_f1_for_task1409_dart_data_to_text": 46.2857, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6405, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 58.0, + "eval_f1_for_task1516_imppres_textual_entailment": 33.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 15.0, + "eval_f1_for_task1540_peer_read_title_generation": 38.6478, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 84.1092, + "eval_f1_for_task1562_zest_question_rewriting": 51.4667, + "eval_f1_for_task1586_scifact_title_generation": 34.3793, + "eval_f1_for_task1598_nyc_data_to_text": 51.7594, + "eval_f1_for_task1612_sick_textual_entailment": 32.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 79.1049, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_f1_for_task1631_open_pi_data_to_text": 96.7338, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_f1_for_task1659_billsum_title_generation": 32.8059, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 77.6286, + "eval_f1_for_task1728_web_nlg_data_to_text": 60.4289, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 42.0, + "eval_f1_for_task200_multinli_textual_entailment": 87.0, + "eval_f1_for_task201_multinli_textual_entailment": 12.0, + "eval_f1_for_task202_multinli_textual_entailment": 3.0, + "eval_f1_for_task219_rocstories_title_generation": 12.4714, + "eval_f1_for_task220_rocstories_title_generation": 98.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 92.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 72.7, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.2832, + "eval_f1_for_task288_gigaword_title_generation": 33.5208, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 13.6667, + "eval_f1_for_task329_gap_coreference_resolution": 39.0, + "eval_f1_for_task330_gap_coreference_resolution": 75.7048, + "eval_f1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 78.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 85.0, + "eval_f1_for_task393_cod3s_cause_effect_classification": 30.1865, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 42.5, + "eval_f1_for_task402_grailqa_question_rewriting": 77.0094, + "eval_f1_for_task418_persent_title_generation": 25.8343, + "eval_f1_for_task442_com_qa_question_rewriting": 71.0346, + "eval_f1_for_task500_scruples_title_generation": 18.1529, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 36.4359, + "eval_f1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 39.2626, + "eval_f1_for_task602_wikitext_title_generation": 7.8404, + "eval_f1_for_task613_liar_keyword_tagging": 21.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 37.527, + "eval_f1_for_task619_ohsumed_title_generation": 45.919, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.4333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 82.0, + "eval_f1_for_task640_e_snli_textual_entailment": 30.0, + "eval_f1_for_task641_e_snli_textual_entailment": 10.0, + "eval_f1_for_task642_e_snli_textual_entailment": 33.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 96.1143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.7871, + "eval_f1_for_task671_ambigqa_question_rewriting": 62.3392, + "eval_f1_for_task677_ollie_data_to_text": 28.0637, + "eval_f1_for_task738_perspectrum_textual_entailment": 43.0, + "eval_f1_for_task743_eurlex_title_generation": 25.1148, + "eval_f1_for_task760_msr_sqa_data_to_text": 0.5688, + "eval_f1_for_task769_qed_title_generation": 89.6218, + "eval_f1_for_task827_copa_cause_effect_classification": 54.0, + "eval_f1_for_task828_copa_cause_effect_classification": 66.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 60.5333, + "eval_f1_for_task892_gap_coreference_resolution": 46.0, + "eval_f1_for_task893_gap_coreference_resolution": 61.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_f1_for_task957_e2e_data_to_text": 52.5707, + "eval_f1_for_task970_sherliic_textual_entailment": 73.0, + "eval_f1_for_textual_entailment": 40.4167, + "eval_f1_for_title_generation": 36.0052, + "eval_f1_for_word_analogy": 43.7083, + "eval_gen_len": 8.6388, + "eval_global_step": 5000, + "eval_loss": 1.3755230903625488, + "eval_rouge1": 50.7425, + "eval_rouge1_for_answerability_classification": 60.3846, + "eval_rouge1_for_cause_effect_classification": 62.3751, + "eval_rouge1_for_coreference_resolution": 50.2241, + "eval_rouge1_for_data_to_text": 53.7679, + "eval_rouge1_for_dialogue_act_recognition": 42.5864, + "eval_rouge1_for_grammar_error_correction": 62.1212, + "eval_rouge1_for_keyword_tagging": 66.5929, + "eval_rouge1_for_overlap_extraction": 43.436, + "eval_rouge1_for_question_rewriting": 71.5376, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 57.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 90.6574, + "eval_rouge1_for_task035_winogrande_question_rewriting": 90.5403, + "eval_rouge1_for_task036_qasc_keyword_tagging": 73.317, + "eval_rouge1_for_task039_qasc_overlap_extraction": 53.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 69.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.2016, + "eval_rouge1_for_task1152_bard_word_analogy": 27.0, + "eval_rouge1_for_task1153_bard_word_analogy": 35.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 26.0, + "eval_rouge1_for_task1155_bard_word_analogy": 75.0, + "eval_rouge1_for_task1156_bard_word_analogy": 47.0, + "eval_rouge1_for_task1157_bard_word_analogy": 66.0, + "eval_rouge1_for_task1158_bard_word_analogy": 43.0, + "eval_rouge1_for_task1159_bard_word_analogy": 30.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 37.5628, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.8256, + "eval_rouge1_for_task121_atomic_question_rewriting": 51.9229, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 25.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 18.5119, + "eval_rouge1_for_task1344_rte_textual_entailment": 72.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2069, + "eval_rouge1_for_task1356_xlsum_title_generation": 23.5772, + "eval_rouge1_for_task1358_xlsum_title_generation": 37.1423, + "eval_rouge1_for_task1385_anli_textual_entailment": 24.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 24.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 23.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 38.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 21.1048, + "eval_rouge1_for_task1407_dart_data_to_text": 34.4209, + "eval_rouge1_for_task1409_dart_data_to_text": 47.4739, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.9064, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 33.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 15.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 41.946, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.3361, + "eval_rouge1_for_task1562_zest_question_rewriting": 54.7573, + "eval_rouge1_for_task1586_scifact_title_generation": 38.9286, + "eval_rouge1_for_task1598_nyc_data_to_text": 53.8385, + "eval_rouge1_for_task1612_sick_textual_entailment": 32.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.4801, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 96.865, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rouge1_for_task1659_billsum_title_generation": 35.1175, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 77.6286, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 62.0606, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 42.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 87.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 12.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 3.0, + "eval_rouge1_for_task219_rocstories_title_generation": 16.781, + "eval_rouge1_for_task220_rocstories_title_generation": 98.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 92.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 72.8667, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.872, + "eval_rouge1_for_task288_gigaword_title_generation": 36.4119, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 16.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 75.5238, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 78.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 85.0, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 30.1867, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 52.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.3196, + "eval_rouge1_for_task418_persent_title_generation": 28.5323, + "eval_rouge1_for_task442_com_qa_question_rewriting": 74.3197, + "eval_rouge1_for_task500_scruples_title_generation": 19.4922, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 36.7773, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 40.7363, + "eval_rouge1_for_task602_wikitext_title_generation": 8.5119, + "eval_rouge1_for_task613_liar_keyword_tagging": 36.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.1059, + "eval_rouge1_for_task619_ohsumed_title_generation": 48.5614, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.3667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 82.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 30.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 10.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 96.6143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.7568, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 63.1265, + "eval_rouge1_for_task677_ollie_data_to_text": 30.8061, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 43.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.2619, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 0.6586, + "eval_rouge1_for_task769_qed_title_generation": 89.6218, + "eval_rouge1_for_task827_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 66.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 60.719, + "eval_rouge1_for_task892_gap_coreference_resolution": 46.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 61.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rouge1_for_task957_e2e_data_to_text": 54.2852, + "eval_rouge1_for_task970_sherliic_textual_entailment": 73.0, + "eval_rouge1_for_textual_entailment": 42.2778, + "eval_rouge1_for_title_generation": 38.179, + "eval_rouge1_for_word_analogy": 43.7083, + "eval_rougeL": 49.3174, + "eval_rougeL_for_answerability_classification": 60.3846, + "eval_rougeL_for_cause_effect_classification": 61.823, + "eval_rougeL_for_coreference_resolution": 50.2241, + "eval_rougeL_for_data_to_text": 46.4898, + "eval_rougeL_for_dialogue_act_recognition": 42.4578, + "eval_rougeL_for_grammar_error_correction": 61.5169, + "eval_rougeL_for_keyword_tagging": 65.708, + "eval_rougeL_for_overlap_extraction": 43.1151, + "eval_rougeL_for_question_rewriting": 68.0454, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 57.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 90.0795, + "eval_rougeL_for_task035_winogrande_question_rewriting": 89.7263, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.9757, + "eval_rougeL_for_task039_qasc_overlap_extraction": 53.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 69.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.1373, + "eval_rougeL_for_task1152_bard_word_analogy": 27.0, + "eval_rougeL_for_task1153_bard_word_analogy": 35.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 26.0, + "eval_rougeL_for_task1155_bard_word_analogy": 75.0, + "eval_rougeL_for_task1156_bard_word_analogy": 47.0, + "eval_rougeL_for_task1157_bard_word_analogy": 66.0, + "eval_rougeL_for_task1158_bard_word_analogy": 43.0, + "eval_rougeL_for_task1159_bard_word_analogy": 30.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 31.6958, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.8678, + "eval_rougeL_for_task121_atomic_question_rewriting": 46.8739, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 25.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 17.8575, + "eval_rougeL_for_task1344_rte_textual_entailment": 72.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.3311, + "eval_rougeL_for_task1356_xlsum_title_generation": 19.9874, + "eval_rougeL_for_task1358_xlsum_title_generation": 31.8439, + "eval_rougeL_for_task1385_anli_textual_entailment": 24.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 24.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 23.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 38.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 63.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 77.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 20.2048, + "eval_rougeL_for_task1407_dart_data_to_text": 28.6166, + "eval_rougeL_for_task1409_dart_data_to_text": 41.604, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 36.5626, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 33.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 52.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 15.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 37.6318, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.4712, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.6889, + "eval_rougeL_for_task1586_scifact_title_generation": 32.9059, + "eval_rougeL_for_task1598_nyc_data_to_text": 41.8809, + "eval_rougeL_for_task1612_sick_textual_entailment": 32.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 71.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 78.6066, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 63.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 96.6278, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 60.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.9963, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 77.6286, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 55.3197, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 42.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 87.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 12.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 3.0, + "eval_rougeL_for_task219_rocstories_title_generation": 16.781, + "eval_rougeL_for_task220_rocstories_title_generation": 98.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 92.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 72.8667, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 33.2302, + "eval_rougeL_for_task288_gigaword_title_generation": 31.6294, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.0, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 16.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 75.5238, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 57.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 78.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 84.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 85.0, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 28.8311, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 52.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.6463, + "eval_rougeL_for_task418_persent_title_generation": 24.8964, + "eval_rougeL_for_task442_com_qa_question_rewriting": 70.2715, + "eval_rougeL_for_task500_scruples_title_generation": 18.6302, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 36.4048, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 38.8075, + "eval_rougeL_for_task602_wikitext_title_generation": 8.2738, + "eval_rougeL_for_task613_liar_keyword_tagging": 36.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 37.5966, + "eval_rougeL_for_task619_ohsumed_title_generation": 40.6354, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.2833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 82.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 30.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 10.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 96.6143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.5948, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 60.8124, + "eval_rougeL_for_task677_ollie_data_to_text": 25.0134, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 43.0, + "eval_rougeL_for_task743_eurlex_title_generation": 24.352, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 0.6478, + "eval_rougeL_for_task769_qed_title_generation": 89.6218, + "eval_rougeL_for_task827_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 66.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 63.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 60.719, + "eval_rougeL_for_task892_gap_coreference_resolution": 46.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 61.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 59.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 56.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.6378, + "eval_rougeL_for_task970_sherliic_textual_entailment": 73.0, + "eval_rougeL_for_textual_entailment": 42.0, + "eval_rougeL_for_title_generation": 35.1809, + "eval_rougeL_for_word_analogy": 43.7083, + "eval_runtime": 802.0916, + "eval_samples_per_second": 14.849, + "eval_steps_per_second": 0.929, + "step": 5000 + }, + { + "epoch": 1.09, + "step": 5000, + "total_flos": 3.547375273718579e+17, + "train_loss": 1.031546841430664, + "train_runtime": 33373.8945, + "train_samples_per_second": 2.397, + "train_steps_per_second": 0.15 + } + ], + "max_steps": 5000, + "num_train_epochs": 2, + "total_flos": 3.547375273718579e+17, + "trial_name": null, + "trial_params": null +}